From b98b567c2501540ef4a9d586c26ab8271c6d1f0d Mon Sep 17 00:00:00 2001 From: ostannard Date: Thu, 12 Oct 2023 17:03:01 +0100 Subject: [PATCH 001/720] [ARM] Correctly handle .inst in IT and VPT blocks (#68902) Advance the IT and VPT block state when parsing the .inst directive, so that it is possible to use them to emit conditional instructions. If we don't do this, then a later instruction inside or just after the block will have a mis-matched condition, so be incorrectly reported as an error. --- .../lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 2 ++ llvm/test/MC/ARM/inst-directive-it-vpt.s | 26 +++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 llvm/test/MC/ARM/inst-directive-it-vpt.s diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 590887b765d7f..373d5b59bca66 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -11981,6 +11981,8 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) { } getTargetStreamer().emitInst(Value->getValue(), CurSuffix); + forwardITPosition(); + forwardVPTPosition(); return false; }; diff --git a/llvm/test/MC/ARM/inst-directive-it-vpt.s b/llvm/test/MC/ARM/inst-directive-it-vpt.s new file mode 100644 index 0000000000000..8550d720ed422 --- /dev/null +++ b/llvm/test/MC/ARM/inst-directive-it-vpt.s @@ -0,0 +1,26 @@ +// RUN: llvm-mc %s -triple armv8m.main -mattr=+mve -filetype asm -o - | FileCheck %s + + .thumb + +// CHECK: it eq +// CHECK: .inst.n 0x3001 +// CHECK: add.w r0, r0, #1 + it eq + .inst.n 0x3001 // addeq r0, #1 + add r0, #1 + +// CHECK: vpst +// CHECK: .inst.w 0xef220844 +// CHECK: vadd.i32 q0, q1, q2 + vpst + .inst.w 0xef220844 // vaddt.i32 q0, q1, q2 + vadd.i32 q0, q1, q2 + +// CHECK: ite eq +// CHECK: .inst.n 0x3001 +// CHECK: addne r0, #1 +// CHECK: add.w r0, r0, #1 + ite eq + .inst.n 0x3001 // addeq r0, #1 + addne r0, #1 + add r0, #1 From c136e722aa4b03209da48b641c6f413202cb0ff9 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Thu, 12 Oct 2023 12:20:40 -0400 Subject: [PATCH 002/720] [Remarks] Fix '-fpermissive'. NFC --- llvm/tools/llvm-remarkutil/RemarkCounter.cpp | 8 ++++---- llvm/tools/llvm-remarkutil/RemarkCounter.h | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/tools/llvm-remarkutil/RemarkCounter.cpp b/llvm/tools/llvm-remarkutil/RemarkCounter.cpp index b7cbebc0ca8e6..fa05f4fda95fb 100644 --- a/llvm/tools/llvm-remarkutil/RemarkCounter.cpp +++ b/llvm/tools/llvm-remarkutil/RemarkCounter.cpp @@ -167,7 +167,7 @@ Error ArgumentCounter::getAllMatchingArgumentsInRemark( std::optional Counter::getGroupByKey(const Remark &Remark) { - switch (GroupBy) { + switch (_GroupBy) { case GroupBy::PER_FUNCTION: return Remark.FunctionName.str(); case GroupBy::TOTAL: @@ -177,7 +177,7 @@ std::optional Counter::getGroupByKey(const Remark &Remark) { if (!Remark.Loc.has_value()) return std::nullopt; - if (GroupBy == GroupBy::PER_FUNCTION_WITH_DEBUG_LOC) + if (_GroupBy == GroupBy::PER_FUNCTION_WITH_DEBUG_LOC) return Remark.Loc->SourceFilePath.str() + ":" + Remark.FunctionName.str(); return Remark.Loc->SourceFilePath.str(); } @@ -213,7 +213,7 @@ Error ArgumentCounter::print(StringRef OutputFileName) { return MaybeOF.takeError(); auto OF = std::move(*MaybeOF); - OF->os() << groupByToStr(GroupBy) << ","; + OF->os() << groupByToStr(_GroupBy) << ","; unsigned Idx = 0; for (auto [Key, _] : ArgumentSetIdxMap) { OF->os() << Key; @@ -243,7 +243,7 @@ Error RemarkCounter::print(StringRef OutputFileName) { return MaybeOF.takeError(); auto OF = std::move(*MaybeOF); - OF->os() << groupByToStr(GroupBy) << "," + OF->os() << groupByToStr(_GroupBy) << "," << "Count\n"; for (auto [Key, Count] : CountedByRemarksMap) OF->os() << Key << "," << Count << "\n"; diff --git a/llvm/tools/llvm-remarkutil/RemarkCounter.h b/llvm/tools/llvm-remarkutil/RemarkCounter.h index aa9eaf698849c..89cd3f7388d07 100644 --- a/llvm/tools/llvm-remarkutil/RemarkCounter.h +++ b/llvm/tools/llvm-remarkutil/RemarkCounter.h @@ -110,9 +110,9 @@ inline Error checkRegex(const Regex &Regex) { /// Abstract counter class used to define the general required methods for /// counting a remark. struct Counter { - GroupBy GroupBy; + GroupBy _GroupBy; Counter(){}; - Counter(enum GroupBy GroupBy) : GroupBy(GroupBy) {} + Counter(enum GroupBy GroupBy) : _GroupBy(GroupBy) {} /// Obtain the field for collecting remark info based on how we are /// collecting. Remarks are grouped by FunctionName, Source, Source and /// Function or collect by file. @@ -161,7 +161,7 @@ struct ArgumentCounter : Counter { createArgumentCounter(enum GroupBy GroupBy, ArrayRef Arguments, StringRef Buffer, Filters &Filter) { ArgumentCounter AC; - AC.GroupBy = GroupBy; + AC._GroupBy = GroupBy; for (auto &Arg : Arguments) { if (Arg.IsRegex) { if (auto E = checkRegex(Arg.FilterRE)) From 7b12d8bf8a1ff1540e32345b045f813644708a71 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Thu, 12 Oct 2023 12:19:40 -0400 Subject: [PATCH 003/720] [clang][Tests] Fix shared build. NFC --- clang/unittests/AST/Interp/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clang/unittests/AST/Interp/CMakeLists.txt b/clang/unittests/AST/Interp/CMakeLists.txt index e8d41091af40c..8fa5c85064dbc 100644 --- a/clang/unittests/AST/Interp/CMakeLists.txt +++ b/clang/unittests/AST/Interp/CMakeLists.txt @@ -5,7 +5,10 @@ add_clang_unittest(InterpTests clang_target_link_libraries(InterpTests PRIVATE clangAST + clangASTMatchers clangBasic + clangFrontend + clangSerialization clangTooling ) From cff50072a0573515b16bae5047d0e3864b170f01 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 12 Oct 2023 12:36:19 -0400 Subject: [PATCH 004/720] [gn] port f445be9790f9 --- llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn index 303a6c29d7b91..0649daf46b927 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn @@ -473,6 +473,8 @@ static_library("builtins") { sources -= [ "fp_mode.c" ] sources += [ "aarch64/fp_mode.c", + "aarch64/sme-abi-init.c", + "aarch64/sme-abi.S", "cpu_model.c", ] if (current_os == "mingw") { From 0aacc2137a80c58f2db7304ac852057a7915fa70 Mon Sep 17 00:00:00 2001 From: Peiming Liu <36770114+PeimingLiu@users.noreply.github.com> Date: Thu, 12 Oct 2023 09:42:12 -0700 Subject: [PATCH 005/720] [mlir][sparse] introduce sparse_tensor.reorder_coo operation (#68827) --- .../SparseTensor/IR/SparseTensorAttrDefs.td | 17 ++++++++-- .../SparseTensor/IR/SparseTensorOps.td | 32 ++++++++++++++++++- .../SparseTensor/IR/SparseTensorDialect.cpp | 27 ++++++++++++++++ mlir/test/Dialect/SparseTensor/fold.mlir | 13 ++++++++ mlir/test/Dialect/SparseTensor/invalid.mlir | 22 +++++++++++++ mlir/test/Dialect/SparseTensor/roundtrip.mlir | 14 ++++++++ 6 files changed, 121 insertions(+), 4 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td index afd978c1c57eb..38c7200afb41f 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td @@ -134,7 +134,7 @@ def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding", level-coordinates. The dimension-expressions collectively define the inverse map, which only needs to be provided for elaborate cases where it cannot be inferred automatically. - + Each dimension could also have an optional `SparseTensorDimSliceAttr`. Within the sparse storage format, we refer to indices that are stored explicitly as **coordinates** and offsets into the storage format as **positions**. @@ -237,10 +237,10 @@ def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding", }> ... tensor<20x30xf32, #BSR_explicit> ... - // ELL format. + // ELL format. // In the simple format for matrix, one array stores values and another // array stores column indices. The arrays have the same number of rows - // as the original matrix, but only have as many columns as + // as the original matrix, but only have as many columns as // the maximum number of nonzeros on a row of the original matrix. // There are many variants for ELL such as jagged diagonal scheme. // To implement ELL, map provides a notion of "counting a @@ -376,6 +376,9 @@ def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding", /// the null encoding (since dense-tensors are always all-dense). bool isAllDense() const; + /// Returns true if it is a sparse tensor encoding in COO format. + bool isCOO() const; + /// Returns true if every level is ordered. Also returns true for /// the null encoding (since dense-tensors are always all-ordered). bool isAllOrdered() const; @@ -468,6 +471,10 @@ def SparseTensorStorageSpecifierKindAttr def IsSparseTensorPred : CPred<"!!::mlir::sparse_tensor::getSparseTensorEncoding($_self)">; +def IsCOOPred + : CPred<"!!::mlir::sparse_tensor::getSparseTensorEncoding($_self) && " + " ::mlir::sparse_tensor::getSparseTensorEncoding($_self).isCOO()">; + def IsSparseTensorSlicePred : CPred<"!!::mlir::sparse_tensor::getSparseTensorEncoding($_self) && " " ::mlir::sparse_tensor::getSparseTensorEncoding($_self).isSlice()">; @@ -478,10 +485,14 @@ def IsSparseTensorSlicePred class SparseTensorOf allowedTypes> : TensorOf; +class COOSparseTensorOf allowedTypes> + : TensorOf; + class SparseTensorSliceOf allowedTypes> : TensorOf; def AnySparseTensor : SparseTensorOf<[AnyType]>; +def AnyCOOSparseTensor : COOSparseTensorOf<[AnyType]>; def AnySparseTensorSlice : SparseTensorSliceOf<[AnyType]>; class RankedSparseTensorOf allowedTypes> diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td index 042ae9693f486..afbabb97eb71f 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -770,7 +770,7 @@ def SparseTensor_OutOp : SparseTensor_Op<"out", []>, } //===----------------------------------------------------------------------===// -// Sparse Tensor Sorting Operations. +// Sparse Tensor Sorting/Ordering Operations. //===----------------------------------------------------------------------===// def SparseTensor_SortOp : SparseTensor_Op<"sort">, @@ -809,6 +809,36 @@ def SparseTensor_SortOp : SparseTensor_Op<"sort">, let hasVerifier = 1; } +def SparseTensor_ReorderCOOOp : SparseTensor_Op<"reorder_coo", [Pure]>, + Arguments<(ins AnyCOOSparseTensor: $input_coo, + SparseTensorSortKindAttr:$algorithm)>, + Results<(outs AnyCOOSparseTensor: $result_coo)> { + let summary = "Reorder the input COO such that it has the the same order as " + "the output COO"; + let description = [{ + sparse_tensor.reorder_coo reorder input COO to the same order as specified by + the output format. E.g., reorder an unordered COO into an ordered one. + + The input and result COO tensor must have the same element type, position type and + coordinate type. At the moment, the operation also only supports ordering + input and result COO with the same dim2lvl map. + + Example: + + ```mlir + %res = sparse_tensor.reorder_coo quick_sort %coo : tensor to + tensor + + ``` + }]; + + let assemblyFormat = "$algorithm $input_coo attr-dict" + "`:` type($input_coo) `to` type($result_coo)"; + + let hasFolder = 1; + let hasVerifier = 1; +} + //===----------------------------------------------------------------------===// // Sparse Tensor Syntax Operations. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 5b84d2158bc82..ef9d4fea68628 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -336,6 +336,10 @@ bool SparseTensorEncodingAttr::isAllDense() const { return !getImpl() || llvm::all_of(getLvlTypes(), isDenseDLT); } +bool SparseTensorEncodingAttr::isCOO() const { + return getImpl() && isCOOType(*this, 0, true); +} + bool SparseTensorEncodingAttr::isAllOrdered() const { return !getImpl() || llvm::all_of(getLvlTypes(), isOrderedDLT); } @@ -1417,6 +1421,29 @@ LogicalResult ForeachOp::verify() { return success(); } +OpFoldResult ReorderCOOOp::fold(FoldAdaptor adaptor) { + if (getSparseTensorEncoding(getInputCoo().getType()) == + getSparseTensorEncoding(getResultCoo().getType())) + return getInputCoo(); + + return {}; +} + +LogicalResult ReorderCOOOp::verify() { + SparseTensorType srcStt = getSparseTensorType(getInputCoo()); + SparseTensorType dstStt = getSparseTensorType(getResultCoo()); + + if (!srcStt.hasSameDimToLvl(dstStt)) + emitError("Unmatched dim2lvl map between input and result COO"); + + if (srcStt.getPosType() != dstStt.getPosType() || + srcStt.getCrdType() != dstStt.getCrdType() || + srcStt.getElementType() != dstStt.getElementType()) { + emitError("Unmatched storage format between input and result COO"); + } + return success(); +} + LogicalResult ReduceOp::verify() { Type inputType = getX().getType(); // Check correct number of block arguments and return type. diff --git a/mlir/test/Dialect/SparseTensor/fold.mlir b/mlir/test/Dialect/SparseTensor/fold.mlir index 089431f9e18e9..3dd1a629c129f 100644 --- a/mlir/test/Dialect/SparseTensor/fold.mlir +++ b/mlir/test/Dialect/SparseTensor/fold.mlir @@ -62,3 +62,16 @@ func.func @sparse_get_specifier_dce_fold(%arg0: !sparse_tensor.storage_specifier : !sparse_tensor.storage_specifier<#SparseVector> return %2 : index } + + + +#COO = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton)}> + +// CHECK-LABEL: func @sparse_reorder_coo( +// CHECK-SAME: %[[A:.*]]: tensor> +// CHECK-NOT: %[[R:.*]] = sparse_tensor.reorder_coo +// CHECK: return %[[A]] +func.func @sparse_reorder_coo(%arg0 : tensor) -> tensor { + %ret = sparse_tensor.reorder_coo quick_sort %arg0 : tensor to tensor + return %ret : tensor +} diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir index 2df4237efa0bb..805f3d161921c 100644 --- a/mlir/test/Dialect/SparseTensor/invalid.mlir +++ b/mlir/test/Dialect/SparseTensor/invalid.mlir @@ -839,3 +839,25 @@ func.func @sparse_alloc_escapes(%arg0: index) -> tensor<10x?xf64, #CSR> { %0 = bufferization.alloc_tensor(%arg0) : tensor<10x?xf64, #CSR> return %0: tensor<10x?xf64, #CSR> } + +// ----- + +#UnorderedCOO = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed(nonunique, nonordered), d1 : singleton(nonordered))}> +#OrderedCOOPerm = #sparse_tensor.encoding<{map = (d0, d1) -> (d1 : compressed(nonunique), d0 : singleton)}> + +func.func @sparse_permuted_reorder_coo(%arg0 : tensor) -> tensor { + // expected-error@+1 {{Unmatched dim2lvl map between input and result COO}} + %ret = sparse_tensor.reorder_coo quick_sort %arg0 : tensor to tensor + return %ret : tensor +} + +// ----- + +#UnorderedCOO = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed(nonunique, nonordered), d1 : singleton(nonordered))}> +#OrderedCOO = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton)}> + +func.func @sparse_permuted_reorder_coo(%arg0 : tensor) -> tensor { + // expected-error@+1 {{Unmatched storage format between input and result COO}} + %ret = sparse_tensor.reorder_coo quick_sort %arg0 : tensor to tensor + return %ret : tensor +} diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir index 82267be34b938..cbc3bb824924c 100644 --- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir @@ -633,3 +633,17 @@ func.func @sparse_sort_coo_stable(%arg0: index, %arg1: memref, %arg2: mem sparse_tensor.sort insertion_sort_stable %arg0, %arg1 jointly %arg2 {perm_map = #ID_MAP, ny = 1 : index}: memref jointly memref return %arg1, %arg2 : memref, memref } + +// ----- + +#UnorderedCOO = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed(nonunique, nonordered), d1 : singleton(nonordered))}> +#OrderedCOO = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton)}> + +// CHECK-LABEL: func @sparse_reorder_coo( +// CHECK-SAME: %[[A:.*]]: tensor> +// CHECK: %[[R:.*]] = sparse_tensor.reorder_coo quick_sort %[[A]] +// CHECK: return %[[R]] +func.func @sparse_reorder_coo(%arg0 : tensor) -> tensor { + %ret = sparse_tensor.reorder_coo quick_sort %arg0 : tensor to tensor + return %ret : tensor +} From b44b3494f60296db6aca38a14cab061d9b747a0a Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Thu, 12 Oct 2023 16:43:19 +0000 Subject: [PATCH 006/720] [mlir][ArmSVE] Avoid UBSAN issue with VectorType::Builder (NFC) This patch just avoids the underlying bug in VectorType::Builder, which currently has incorrect copy/move constructors. See https://lab.llvm.org/buildbot/#/builders/5/builds/37355 --- mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp b/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp index f54a26c27c2ac..92278c0d74d57 100644 --- a/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp +++ b/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp @@ -117,7 +117,7 @@ struct SvboolConversionOpLowering : public ConvertOpToLLVMPattern { auto extractOrInsertPosition = ArrayRef(index).drop_back(); auto sourceVector = rewriter.create( loc, source, extractOrInsertPosition); - auto convertedType = + VectorType convertedType = VectorType::Builder(llvm::cast(sourceVector.getType())) .setDim(0, resultType.getShape().back()); auto convertedVector = From 7dcb260bef9f7b6926b0711aad69f883443996e4 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 12 Oct 2023 13:28:35 -0400 Subject: [PATCH 007/720] [gn] port 0ce6255a5058 (HipStdPar) --- llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn | 1 + .../utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn | 1 + .../secondary/llvm/lib/Transforms/HipStdPar/BUILD.gn | 10 ++++++++++ 3 files changed, 12 insertions(+) create mode 100644 llvm/utils/gn/secondary/llvm/lib/Transforms/HipStdPar/BUILD.gn diff --git a/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn index cba3bf6cd38c8..d98420100df01 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn @@ -9,6 +9,7 @@ static_library("Passes") { "//llvm/lib/Target", "//llvm/lib/Transforms/AggressiveInstCombine", "//llvm/lib/Transforms/Coroutines", + "//llvm/lib/Transforms/HipStdPar", "//llvm/lib/Transforms/IPO", "//llvm/lib/Transforms/InstCombine", "//llvm/lib/Transforms/Instrumentation", diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn index eb9df8ac230f9..1afff26bca027 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn @@ -120,6 +120,7 @@ static_library("LLVMAMDGPUCodeGen") { "//llvm/lib/Support", "//llvm/lib/Target", "//llvm/lib/TargetParser", + "//llvm/lib/Transforms/HipStdPar", "//llvm/lib/Transforms/IPO", "//llvm/lib/Transforms/Scalar", "//llvm/lib/Transforms/Utils", diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/HipStdPar/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/HipStdPar/BUILD.gn new file mode 100644 index 0000000000000..f564817957d58 --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/HipStdPar/BUILD.gn @@ -0,0 +1,10 @@ +static_library("HipStdPar") { + output_name = "LLVMHipStdPar" + deps = [ + "//llvm/lib/Analysis", + "//llvm/lib/IR", + "//llvm/lib/Support", + "//llvm/lib/Transforms/Utils", + ] + sources = [ "HipStdPar.cpp" ] +} From 9b89b80dbafedd7c3f4b7895840c1d53cfda4b1e Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 12 Oct 2023 17:29:02 +0000 Subject: [PATCH 008/720] [gn build] Port 31c2cf113617 --- llvm/utils/gn/secondary/llvm/tools/llvm-remarkutil/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-remarkutil/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-remarkutil/BUILD.gn index e55e82ff6f1f6..920abdc65f0b7 100644 --- a/llvm/utils/gn/secondary/llvm/tools/llvm-remarkutil/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/llvm-remarkutil/BUILD.gn @@ -9,6 +9,7 @@ executable("llvm-remarkutil") { sources = [ "RemarkConvert.cpp", "RemarkCount.cpp", + "RemarkCounter.cpp", "RemarkSizeDiff.cpp", "RemarkUtil.cpp", "RemarkUtilHelpers.cpp", From b56488c8790a8fc3cd0fc97c74999b54afcd9176 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 12 Oct 2023 10:53:33 -0700 Subject: [PATCH 009/720] [libc++] Improve the output of the generated-output CI job (#68903) The step that checked for ignore_format.txt being consistent with the tree wouldn't print any explicit diagnostic when failing, which led to confusion. After this patch, an explicit diagnostic will be printed by the job along with the required diff to ignore_format.txt. --- libcxx/utils/ci/run-buildbot | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot index a71318123db3b..b5c48568c995e 100755 --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -209,6 +209,8 @@ check-generated-output) clean generate-cmake + set +x # Printing all the commands below just creates extremely confusing output + # Reject patches that forgot to re-run the generator scripts. echo "+++ Making sure the generator scripts were run" ${NINJA} -vC "${BUILD_DIR}" libcxx-generate-files @@ -222,20 +224,23 @@ check-generated-output) false fi + echo "+++ Making sure libcxx/utils/data/ignore_format.txt was updated appropriately" + cp ${MONOREPO_ROOT}/libcxx/utils/data/ignore_format.txt ${BUILD_DIR}/before.txt ${MONOREPO_ROOT}/libcxx/utils/generate_ignore_format.sh - git diff | tee ${BUILD_DIR}/generated_output.patch - git ls-files -o --exclude-standard | tee ${BUILD_DIR}/generated_output.status - ! grep -q '^--- a' ${BUILD_DIR}/generated_output.patch || false - if [ -s ${BUILD_DIR}/generated_output.status ]; then + diff ${BUILD_DIR}/before.txt ${MONOREPO_ROOT}/libcxx/utils/data/ignore_format.txt | tee ${BUILD_DIR}/ignore_format.diff || true + if [ -s ${BUILD_DIR}/ignore_format.diff ]; then echo "It looks like the list of not formatted files has changed." echo "If a file is now properly formatted with clang-format, remove the file name from " echo "libcxx/utils/data/ignore_format.txt. Otherwise you have to fix the" - echo "formatting of some of the changed files." + echo "formatting of some of the changed files. The diff above represents the " + echo "changes that would be needed to ignore_format.txt to keep it representative " + echo "of which files are mis-formatted in the project." false fi # Reject patches that introduce non-ASCII characters or hard tabs. # Depends on LC_COLLATE set at the top of this script. + set -x ! grep -rn '[^ -~]' libcxx/include libcxx/src libcxx/test libcxx/benchmarks \ --exclude '*.dat' \ --exclude '*unicode*.cpp' \ From 4c6cba31aaaa767cdb7f83ec4ca0eab9b6eae127 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Thu, 12 Oct 2023 21:54:07 +0400 Subject: [PATCH 010/720] [clang][NFC] Specify Type and ExtQuals as having 16-byte alignment (#68377) While working on LLDB visualizer for `QualType`, I stumbled upon `Type` and `ExtQuals` defined with `alignas(8)`. Such alignment leaves only 3 lower bits available for pointer tagging, whereas `QualType` requires 4 (3 qualifiers + discriminator between `Type *` and `ExtQuals *`). Turns out `Type` and its derived classes are allocated with `TypeAlignment == 16` passed to `Allocate()`. So I'm removing misleading `alignas(8)` and fixing corresponding static asserts. Since they are already allocated with 16-byte alignment, this is a non-functional change. --- clang/include/clang/AST/Type.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index a78d8f60462b2..3e7e4f4f75b58 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -1482,7 +1482,8 @@ class ExtQualsTypeCommonBase { /// in three low bits on the QualType pointer; a fourth bit records whether /// the pointer is an ExtQuals node. The extended qualifiers (address spaces, /// Objective-C GC attributes) are much more rare. -class ExtQuals : public ExtQualsTypeCommonBase, public llvm::FoldingSetNode { +class alignas(TypeAlignment) ExtQuals : public ExtQualsTypeCommonBase, + public llvm::FoldingSetNode { // NOTE: changing the fast qualifiers should be straightforward as // long as you don't make 'const' non-fast. // 1. Qualifiers: @@ -1594,7 +1595,7 @@ enum class AutoTypeKeyword { /// /// Types, once created, are immutable. /// -class alignas(8) Type : public ExtQualsTypeCommonBase { +class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { public: enum TypeClass { #define TYPE(Class, Base) Class, @@ -1982,9 +1983,10 @@ class alignas(8) Type : public ExtQualsTypeCommonBase { Type(TypeClass tc, QualType canon, TypeDependence Dependence) : ExtQualsTypeCommonBase(this, canon.isNull() ? QualType(this_(), 0) : canon) { - static_assert(sizeof(*this) <= 8 + sizeof(ExtQualsTypeCommonBase), + static_assert(sizeof(*this) <= + alignof(decltype(*this)) + sizeof(ExtQualsTypeCommonBase), "changing bitfields changed sizeof(Type)!"); - static_assert(alignof(decltype(*this)) % sizeof(void *) == 0, + static_assert(alignof(decltype(*this)) % TypeAlignment == 0, "Insufficient alignment!"); TypeBits.TC = tc; TypeBits.Dependence = static_cast(Dependence); @@ -5348,7 +5350,7 @@ class DeducedType : public Type { /// Represents a C++11 auto or C++14 decltype(auto) type, possibly constrained /// by a type-constraint. -class alignas(8) AutoType : public DeducedType, public llvm::FoldingSetNode { +class AutoType : public DeducedType, public llvm::FoldingSetNode { friend class ASTContext; // ASTContext creates these ConceptDecl *TypeConstraintConcept; @@ -5456,9 +5458,7 @@ class DeducedTemplateSpecializationType : public DeducedType, /// TemplateArguments, followed by a QualType representing the /// non-canonical aliased type when the template is a type alias /// template. -class alignas(8) TemplateSpecializationType - : public Type, - public llvm::FoldingSetNode { +class TemplateSpecializationType : public Type, public llvm::FoldingSetNode { friend class ASTContext; // ASTContext creates these /// The name of the template being specialized. This is @@ -5872,9 +5872,8 @@ class DependentNameType : public TypeWithKeyword, public llvm::FoldingSetNode { /// Represents a template specialization type whose template cannot be /// resolved, e.g. /// A::template B -class alignas(8) DependentTemplateSpecializationType - : public TypeWithKeyword, - public llvm::FoldingSetNode { +class DependentTemplateSpecializationType : public TypeWithKeyword, + public llvm::FoldingSetNode { friend class ASTContext; // ASTContext creates these /// The nested name specifier containing the qualifier. From dd0f642e6ec5049ccabe3f462cc427ffe213829b Mon Sep 17 00:00:00 2001 From: vabridgers <58314289+vabridgers@users.noreply.github.com> Date: Thu, 12 Oct 2023 13:14:20 -0500 Subject: [PATCH 011/720] [Sema] Add check for bitfield assignments to larger integral types (#68276) We noticed that clang does not check for bitfield assignment widths, while gcc does check this. gcc produced a warning like so for it's -Wconversion flag: ``` $ gcc -Wconversion -c test.c test.c: In function 'foo': test.c:10:15: warning: conversion from 'int' to 'signed char:7' may change value [-Wconversion] 10 | vxx.bf = x; // no warning | ^ ``` This change simply adds this check for integral types under the -Wbitfield-conversion compiler option. --- clang/docs/ReleaseNotes.rst | 3 ++ clang/include/clang/Basic/DiagnosticGroups.td | 2 ++ .../clang/Basic/DiagnosticSemaKinds.td | 3 ++ clang/lib/Sema/SemaChecking.cpp | 13 ++++++- clang/test/SemaCXX/bitfield-width.c | 34 +++++++++++++++++++ 5 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 clang/test/SemaCXX/bitfield-width.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 2d918967e7f0b..31969201a1cac 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -185,6 +185,9 @@ New Compiler Flags the preprocessed text to the output. This can greatly reduce the size of the preprocessed output, which can be helpful when trying to reduce a test case. +* ``-Wbitfield-conversion`` was added to detect assignments of integral + types to a bitfield that may change the value. + Deprecated Compiler Flags ------------------------- diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 0b09c00219184..674eb9f4ef2e7 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -53,6 +53,7 @@ def SingleBitBitFieldConstantConversion : def BitFieldConstantConversion : DiagGroup<"bitfield-constant-conversion", [SingleBitBitFieldConstantConversion]>; def BitFieldEnumConversion : DiagGroup<"bitfield-enum-conversion">; +def BitFieldConversion : DiagGroup<"bitfield-conversion">; def BitFieldWidth : DiagGroup<"bitfield-width">; def CompoundTokenSplitByMacro : DiagGroup<"compound-token-split-by-macro">; def CompoundTokenSplitBySpace : DiagGroup<"compound-token-split-by-space">; @@ -933,6 +934,7 @@ def Conversion : DiagGroup<"conversion", ConstantConversion, EnumConversion, BitFieldEnumConversion, + BitFieldConversion, FloatConversion, Shorten64To32, IntConversion, diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index c1a6e3831127e..ab7fe881976aa 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6171,6 +6171,9 @@ def warn_signed_bitfield_enum_conversion : Warning< "signed bit-field %0 needs an extra bit to represent the largest positive " "enumerators of %1">, InGroup, DefaultIgnore; +def warn_bitfield_too_small_for_integral_type : Warning< + "conversion from %2 (%3 bits) to bit-field %0 (%1 bits) may change value">, + InGroup, DefaultIgnore; def note_change_bitfield_sign : Note< "consider making the bitfield type %select{unsigned|signed}0">; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 2594a8f97f7d9..1b2f8cf296d16 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -14298,6 +14298,18 @@ static bool AnalyzeBitFieldAssignment(Sema &S, FieldDecl *Bitfield, Expr *Init, S.Diag(WidthExpr->getExprLoc(), diag::note_widen_bitfield) << BitsNeeded << ED << WidthExpr->getSourceRange(); } + } else if (OriginalInit->getType()->isIntegralType(S.Context)) { + IntRange LikelySourceRange = + GetExprRange(S.Context, Init, S.isConstantEvaluatedContext(), + /*Approximate=*/true); + + if (LikelySourceRange.Width > FieldWidth) { + Expr *WidthExpr = Bitfield->getBitWidth(); + S.Diag(InitLoc, diag::warn_bitfield_too_small_for_integral_type) + << Bitfield << FieldWidth << OriginalInit->getType() + << LikelySourceRange.Width; + S.Diag(WidthExpr->getExprLoc(), diag::note_declared_at); + } } return false; @@ -15195,7 +15207,6 @@ static void CheckImplicitConversion(Sema &S, Expr *E, QualType T, if (LikelySourceRange.Width > TargetRange.Width) { // If the source is a constant, use a default-on diagnostic. - // TODO: this should happen for bitfield stores, too. Expr::EvalResult Result; if (E->EvaluateAsInt(Result, S.Context, Expr::SE_AllowSideEffects, S.isConstantEvaluatedContext())) { diff --git a/clang/test/SemaCXX/bitfield-width.c b/clang/test/SemaCXX/bitfield-width.c new file mode 100644 index 0000000000000..8219054b959e5 --- /dev/null +++ b/clang/test/SemaCXX/bitfield-width.c @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -Wconversion -fsyntax-only -verify %s +// RUN: %clang_cc1 -Wbitfield-conversion -fsyntax-only -verify %s + +typedef struct _xx { + int bf:9; // expected-note 4{{declared here}} + } xx, *pxx; + + xx vxx; + + void foo1(int x) { + vxx.bf = x; // expected-warning{{conversion from 'int' (32 bits) to bit-field 'bf' (9 bits) may change value}} + } + void foo2(short x) { + vxx.bf = x; // expected-warning{{conversion from 'short' (16 bits) to bit-field 'bf' (9 bits) may change value}} + } + void foo3(char x) { + vxx.bf = x; // no warning expected + } + void foo5(void * x) { + vxx.bf = (int)x; // expected-warning{{cast to smaller integer type 'int' from 'void *'}} + // expected-warning@-1{{conversion from 'int' (32 bits) to bit-field 'bf' (9 bits) may change value}} + } + void foo6(short x) { + vxx.bf = 0xff & x; // no warning expected + } + void foo7(short x) { + vxx.bf = 0x1ff & x; // no warning expected + } + void foo8(short x) { + vxx.bf = 0x3ff & x; // expected-warning{{conversion from 'int' (10 bits) to bit-field 'bf' (9 bits) may change value}} + } + int fee(void) { + return 0; + } From 64d78d8b3cd09dff32c97fbefa56bcfc8b676406 Mon Sep 17 00:00:00 2001 From: Tom Yang Date: Thu, 12 Oct 2023 11:21:53 -0700 Subject: [PATCH 012/720] Add `target modules dump separate-debug-info` (#66035) Add a new command ``` target modules dump separate-debug-info [-j] [ [ [...]]] ``` or ``` image dump separate-debug-info [-j] [ [ [...]]] ``` (since `image` is an alias for `target modules`). This lists the separate debug info files and their current status (loaded or not loaded) for the specified modules. This diff implements this command for mach-O files with OSO and ELF files with dwo. Example dwo: ``` (lldb) image dump separate-debug-info Symbol file: /home/toyang/workspace/dwo-scratch/a.out Type: "dwo" Dwo ID Err Dwo Path ------------------ --- ----------------------------------------- 0x9a429da5abb6faae /home/toyang/workspace/scratch-dwo/a-main.dwo 0xbcc129959e76ff33 /home/toyang/workspace/scratch-dwo/a-foo.dwo (lldb) image dump separate-debug-info -j [ { "separate-debug-info-files": [ { "comp_dir": "/home/toyang/workspace/dwo-scratch", "dwo_id": 11115620165179865774, "dwo_name": "a-main.dwo", "loaded": true, "resolved_dwo_path": "/home/toyang/workspace/dwo-scratch/a-main.dwo" }, { "comp_dir": "/home/toyang/workspace/dwo-scratch", "dwo_id": 13601198072221073203, "dwo_name": "a-foo.dwo", "loaded": true, "resolved_dwo_path": "/home/toyang/workspace/dwo-scratch/a-foo.dwo" } ], "symfile": "/home/toyang/workspace/dwo-scratch/a.out", "type": "dwo" } ] ``` Example dwo with missing dwo: ``` (lldb) image dump separate-debug-info Symbol file: /home/toyang/workspace/dwo-scratch/a.out Type: "dwo" Dwo ID Err Dwo Path ------------------ --- ----------------------------------------- 0x9a429da5abb6faae E unable to locate .dwo debug file "/home/toyang/workspace/scratch-dwo/b.out-main.dwo" for skeleton DIE 0x0000000000000014 0xbcc129959e76ff33 E unable to locate .dwo debug file "/home/toyang/workspace/scratch-dwo/b.out-foo.dwo" for skeleton DIE 0x000000000000003c (lldb) image dump separate-debug-info -j [ { "separate-debug-info-files": [ { "comp_dir": "/home/toyang/workspace/dwo-scratch", "dwo_id": 11115620165179865774, "dwo_name": "a-main.dwo", "error": "unable to locate .dwo debug file \"/home/toyang/workspace/dwo-scratch/a-main.dwo\" for skeleton DIE 0x0000000000000014", "loaded": false }, { "comp_dir": "/home/toyang/workspace/dwo-scratch", "dwo_id": 13601198072221073203, "dwo_name": "a-foo.dwo", "error": "unable to locate .dwo debug file \"/home/toyang/workspace/dwo-scratch/a-foo.dwo\" for skeleton DIE 0x000000000000003c", "loaded": false } ], "symfile": "/home/toyang/workspace/dwo-scratch/a.out", "type": "dwo" } ] ``` Example output with dwp: ``` (lldb) image dump separate-debug-info Symbol file: /home/toyang/workspace/dwo-scratch/a.out Type: "dwo" Dwo ID Err Dwo Path ------------------ --- ----------------------------------------- 0x9a429da5abb6faae /home/toyang/workspace/dwo-scratch/a.out.dwp(a-main.dwo) 0xbcc129959e76ff33 /home/toyang/workspace/dwo-scratch/a.out.dwp(a-foo.dwo) (lldb) image dump separate-debug-info -j [ { "separate-debug-info-files": [ { "comp_dir": "/home/toyang/workspace/dwo-scratch", "dwo_id": 11115620165179865774, "dwo_name": "a-main.dwo", "loaded": true, "resolved_dwo_path": "/home/toyang/workspace/dwo-scratch/a.out.dwp" }, { "comp_dir": "/home/toyang/workspace/dwo-scratch", "dwo_id": 13601198072221073203, "dwo_name": "a-foo.dwo", "loaded": true, "resolved_dwo_path": "/home/toyang/workspace/dwo-scratch/a.out.dwp" } ], "symfile": "/home/toyang/workspace/dwo-scratch/a.out", "type": "dwo" } ] ``` Example oso on my Mac: ``` (lldb) image dump separate-debug-info Symbol file: /Users/toyang/workspace/scratch/a.out Type: "oso" Mod Time Err Oso Path ------------------ --- --------------------- 0x0000000064e64868 /Users/toyang/workspace/scratch/foo.a(foo.o) 0x0000000064e64868 /Users/toyang/workspace/scratch/foo.a(main.o) (lldb) image dump separate-debug-info -j [ { "separate-debug-info-files": [ { "loaded": true, "oso_mod_time": 1692813416, "oso_path": "/Users/toyang/workspace/scratch/foo.a(foo.o)", "so_file": "/Users/toyang/workspace/scratch/foo.cpp" }, { "loaded": true, "oso_mod_time": 1692813416, "oso_path": "/Users/toyang/workspace/scratch/foo.a(main.o)", "so_file": "/Users/toyang/workspace/scratch/main.cpp" } ], "symfile": "/Users/toyang/workspace/scratch/a.out", "type": "oso" } ] ``` Test Plan: Tested on Mac OS and Linux. ``` lldb-dotest -p TestDumpDwo lldb-dotest -p TestDumpOso ``` --------- Co-authored-by: Tom Yang --- lldb/include/lldb/Symbol/SymbolFile.h | 13 + lldb/source/Commands/CommandObjectTarget.cpp | 260 +++++++++++++++++- lldb/source/Commands/Options.td | 5 + .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 71 ++++- .../SymbolFile/DWARF/SymbolFileDWARF.h | 5 + .../DWARF/SymbolFileDWARFDebugMap.cpp | 39 ++- .../DWARF/SymbolFileDWARFDebugMap.h | 5 + lldb/source/Symbol/SymbolFile.cpp | 1 + .../dump-separate-debug-info/dwo/Makefile | 4 + .../dwo/TestDumpDwo.py | 122 ++++++++ .../dump-separate-debug-info/dwo/foo.cpp | 3 + .../target/dump-separate-debug-info/dwo/foo.h | 6 + .../dump-separate-debug-info/dwo/main.cpp | 3 + .../dump-separate-debug-info/oso/Makefile | 3 + .../oso/TestDumpOso.py | 120 ++++++++ .../dump-separate-debug-info/oso/foo.cpp | 3 + .../target/dump-separate-debug-info/oso/foo.h | 6 + .../dump-separate-debug-info/oso/main.cpp | 3 + 18 files changed, 667 insertions(+), 5 deletions(-) create mode 100644 lldb/test/API/commands/target/dump-separate-debug-info/dwo/Makefile create mode 100644 lldb/test/API/commands/target/dump-separate-debug-info/dwo/TestDumpDwo.py create mode 100644 lldb/test/API/commands/target/dump-separate-debug-info/dwo/foo.cpp create mode 100644 lldb/test/API/commands/target/dump-separate-debug-info/dwo/foo.h create mode 100644 lldb/test/API/commands/target/dump-separate-debug-info/dwo/main.cpp create mode 100644 lldb/test/API/commands/target/dump-separate-debug-info/oso/Makefile create mode 100644 lldb/test/API/commands/target/dump-separate-debug-info/oso/TestDumpOso.py create mode 100644 lldb/test/API/commands/target/dump-separate-debug-info/oso/foo.cpp create mode 100644 lldb/test/API/commands/target/dump-separate-debug-info/oso/foo.h create mode 100644 lldb/test/API/commands/target/dump-separate-debug-info/oso/main.cpp diff --git a/lldb/include/lldb/Symbol/SymbolFile.h b/lldb/include/lldb/Symbol/SymbolFile.h index 8de752816cf94..512dd9acb86db 100644 --- a/lldb/include/lldb/Symbol/SymbolFile.h +++ b/lldb/include/lldb/Symbol/SymbolFile.h @@ -22,6 +22,7 @@ #include "lldb/Symbol/TypeList.h" #include "lldb/Symbol/TypeSystem.h" #include "lldb/Target/Statistics.h" +#include "lldb/Utility/StructuredData.h" #include "lldb/Utility/XcodeSDK.h" #include "lldb/lldb-private.h" #include "llvm/ADT/DenseSet.h" @@ -434,6 +435,18 @@ class SymbolFile : public PluginInterface { virtual bool GetDebugInfoHadFrameVariableErrors() const = 0; virtual void SetDebugInfoHadFrameVariableErrors() = 0; + /// Return true if separate debug info files are supported and this function + /// succeeded, false otherwise. + /// + /// \param[out] d + /// If this function succeeded, then this will be a dictionary that + /// contains the keys "type", "symfile", and "separate-debug-info-files". + /// "type" can be used to assume the structure of each object in + /// "separate-debug-info-files". + virtual bool GetSeparateDebugInfo(StructuredData::Dictionary &d) { + return false; + }; + virtual lldb::TypeSP MakeType(lldb::user_id_t uid, ConstString name, std::optional byte_size, SymbolContextScope *context, diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index 33330ef0926d6..0c378b069086d 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -52,6 +52,7 @@ #include "lldb/Utility/FileSpec.h" #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/State.h" +#include "lldb/Utility/StructuredData.h" #include "lldb/Utility/Timer.h" #include "lldb/lldb-enumerations.h" #include "lldb/lldb-private-enumerations.h" @@ -61,6 +62,7 @@ #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendActions.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormatAdapters.h" @@ -1462,6 +1464,87 @@ static bool DumpModuleSymbolFile(Stream &strm, Module *module) { return false; } +static bool GetSeparateDebugInfoList(StructuredData::Array &list, + Module *module) { + if (module) { + if (SymbolFile *symbol_file = module->GetSymbolFile(/*can_create=*/true)) { + StructuredData::Dictionary d; + if (symbol_file->GetSeparateDebugInfo(d)) { + list.AddItem( + std::make_shared(std::move(d))); + return true; + } + } + } + return false; +} + +static void DumpDwoFilesTable(Stream &strm, + StructuredData::Array &dwo_listings) { + strm.PutCString("Dwo ID Err Dwo Path"); + strm.EOL(); + strm.PutCString( + "------------------ --- -----------------------------------------"); + strm.EOL(); + dwo_listings.ForEach([&strm](StructuredData::Object *dwo) { + StructuredData::Dictionary *dict = dwo->GetAsDictionary(); + if (!dict) + return false; + + uint64_t dwo_id; + if (dict->GetValueForKeyAsInteger("dwo_id", dwo_id)) + strm.Printf("0x%16.16" PRIx64 " ", dwo_id); + else + strm.Printf("0x???????????????? "); + + llvm::StringRef error; + if (dict->GetValueForKeyAsString("error", error)) + strm << "E " << error; + else { + llvm::StringRef resolved_dwo_path; + if (dict->GetValueForKeyAsString("resolved_dwo_path", + resolved_dwo_path)) { + strm << " " << resolved_dwo_path; + if (resolved_dwo_path.ends_with(".dwp")) { + llvm::StringRef dwo_name; + if (dict->GetValueForKeyAsString("dwo_name", dwo_name)) + strm << "(" << dwo_name << ")"; + } + } + } + strm.EOL(); + return true; + }); +} + +static void DumpOsoFilesTable(Stream &strm, + StructuredData::Array &oso_listings) { + strm.PutCString("Mod Time Err Oso Path"); + strm.EOL(); + strm.PutCString("------------------ --- ---------------------"); + strm.EOL(); + oso_listings.ForEach([&strm](StructuredData::Object *oso) { + StructuredData::Dictionary *dict = oso->GetAsDictionary(); + if (!dict) + return false; + + uint32_t oso_mod_time; + if (dict->GetValueForKeyAsInteger("oso_mod_time", oso_mod_time)) + strm.Printf("0x%16.16" PRIx32 " ", oso_mod_time); + + llvm::StringRef error; + if (dict->GetValueForKeyAsString("error", error)) + strm << "E " << error; + else { + llvm::StringRef oso_path; + if (dict->GetValueForKeyAsString("oso_path", oso_path)) + strm << " " << oso_path; + } + strm.EOL(); + return true; + }); +} + static void DumpAddress(ExecutionContextScope *exe_scope, const Address &so_addr, bool verbose, bool all_ranges, Stream &strm) { @@ -2462,6 +2545,176 @@ class CommandObjectTargetModulesDumpLineTable CommandOptions m_options; }; +#pragma mark CommandObjectTargetModulesDumpSeparateDebugInfoFiles +#define LLDB_OPTIONS_target_modules_dump_separate_debug_info +#include "CommandOptions.inc" + +// Image debug separate debug info dumping command + +class CommandObjectTargetModulesDumpSeparateDebugInfoFiles + : public CommandObjectTargetModulesModuleAutoComplete { +public: + CommandObjectTargetModulesDumpSeparateDebugInfoFiles( + CommandInterpreter &interpreter) + : CommandObjectTargetModulesModuleAutoComplete( + interpreter, "target modules dump separate-debug-info", + "List the separate debug info symbol files for one or more target " + "modules.", + nullptr, eCommandRequiresTarget) {} + + ~CommandObjectTargetModulesDumpSeparateDebugInfoFiles() override = default; + + Options *GetOptions() override { return &m_options; } + + class CommandOptions : public Options { + public: + CommandOptions() = default; + + ~CommandOptions() override = default; + + Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg, + ExecutionContext *execution_context) override { + Status error; + const int short_option = m_getopt_table[option_idx].val; + + switch (short_option) { + case 'j': + m_json.SetCurrentValue(true); + m_json.SetOptionWasSet(); + break; + + default: + llvm_unreachable("Unimplemented option"); + } + return error; + } + + void OptionParsingStarting(ExecutionContext *execution_context) override { + m_json.Clear(); + } + + llvm::ArrayRef GetDefinitions() override { + return llvm::ArrayRef(g_target_modules_dump_separate_debug_info_options); + } + + OptionValueBoolean m_json = false; + }; + +protected: + bool DoExecute(Args &command, CommandReturnObject &result) override { + Target &target = GetSelectedTarget(); + uint32_t num_dumped = 0; + + uint32_t addr_byte_size = target.GetArchitecture().GetAddressByteSize(); + result.GetOutputStream().SetAddressByteSize(addr_byte_size); + result.GetErrorStream().SetAddressByteSize(addr_byte_size); + + StructuredData::Array separate_debug_info_lists_by_module; + if (command.GetArgumentCount() == 0) { + // Dump all sections for all modules images + const ModuleList &target_modules = target.GetImages(); + std::lock_guard guard(target_modules.GetMutex()); + const size_t num_modules = target_modules.GetSize(); + if (num_modules == 0) { + result.AppendError("the target has no associated executable images"); + return false; + } + for (ModuleSP module_sp : target_modules.ModulesNoLocking()) { + if (INTERRUPT_REQUESTED( + GetDebugger(), + "Interrupted in dumping all " + "separate debug info with {0} of {1} modules dumped", + num_dumped, num_modules)) + break; + + if (GetSeparateDebugInfoList(separate_debug_info_lists_by_module, + module_sp.get())) + num_dumped++; + } + } else { + // Dump specified images (by basename or fullpath) + const char *arg_cstr; + for (int arg_idx = 0; + (arg_cstr = command.GetArgumentAtIndex(arg_idx)) != nullptr; + ++arg_idx) { + ModuleList module_list; + const size_t num_matches = + FindModulesByName(&target, arg_cstr, module_list, true); + if (num_matches > 0) { + for (size_t i = 0; i < num_matches; ++i) { + if (INTERRUPT_REQUESTED(GetDebugger(), + "Interrupted dumping {0} " + "of {1} requested modules", + i, num_matches)) + break; + Module *module = module_list.GetModulePointerAtIndex(i); + if (GetSeparateDebugInfoList(separate_debug_info_lists_by_module, + module)) + num_dumped++; + } + } else + result.AppendWarningWithFormat( + "Unable to find an image that matches '%s'.\n", arg_cstr); + } + } + + if (num_dumped > 0) { + Stream &strm = result.GetOutputStream(); + if (m_options.m_json) { + separate_debug_info_lists_by_module.Dump(strm, + /*pretty_print=*/true); + } else { + // List the debug info files in human readable form. + separate_debug_info_lists_by_module.ForEach( + [&result, &strm](StructuredData::Object *obj) { + if (!obj) { + return false; + } + + // Each item in `separate_debug_info_lists_by_module` should be a + // valid structured data dictionary. + StructuredData::Dictionary *separate_debug_info_list = + obj->GetAsDictionary(); + if (!separate_debug_info_list) { + return false; + } + + llvm::StringRef type; + llvm::StringRef symfile; + StructuredData::Array *files; + assert(separate_debug_info_list->GetValueForKeyAsString("type", + type)); + assert(separate_debug_info_list->GetValueForKeyAsString("symfile", + symfile)); + assert(separate_debug_info_list->GetValueForKeyAsArray( + "separate-debug-info-files", files)); + + strm << "Symbol file: " << symfile; + strm.EOL(); + strm << "Type: \"" << type << "\""; + strm.EOL(); + if (type == "dwo") { + DumpDwoFilesTable(strm, *files); + } else if (type == "oso") { + DumpOsoFilesTable(strm, *files); + } else { + result.AppendWarningWithFormat( + "Found unsupported debug info type '%s'.\n", + type.str().c_str()); + } + return true; + }); + } + result.SetStatus(eReturnStatusSuccessFinishResult); + } else { + result.AppendError("no matching executable images found"); + } + return result.Succeeded(); + } + + CommandOptions m_options; +}; + #pragma mark CommandObjectTargetModulesDump // Dump multi-word command for target modules @@ -2475,7 +2728,8 @@ class CommandObjectTargetModulesDump : public CommandObjectMultiword { "Commands for dumping information about one or more target " "modules.", "target modules dump " - "[objfile|symtab|sections|ast|symfile|line-table|pcm-info] " + "[objfile|symtab|sections|ast|symfile|line-table|pcm-info|separate-" + "debug-info] " "[ ...]") { LoadSubCommand("objfile", CommandObjectSP( @@ -2499,6 +2753,10 @@ class CommandObjectTargetModulesDump : public CommandObjectMultiword { "pcm-info", CommandObjectSP( new CommandObjectTargetModulesDumpClangPCMInfo(interpreter))); + LoadSubCommand("separate-debug-info", + CommandObjectSP( + new CommandObjectTargetModulesDumpSeparateDebugInfoFiles( + interpreter))); } ~CommandObjectTargetModulesDump() override = default; diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 7af20e93a16d4..078b23e09e4fa 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -8,6 +8,11 @@ let Command = "target modules dump symtab" in { Desc<"Do not demangle symbol names before showing them.">; } +let Command = "target modules dump separate debug info" in { + def tm_json : Option<"json", "j">, Group<1>, + Desc<"Output the details in JSON format.">; +} + let Command = "help" in { def help_hide_aliases : Option<"hide-aliases", "a">, Desc<"Hide aliases in the command list.">; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index e472074545a6f..f52a095bf1675 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -10,6 +10,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Format.h" #include "llvm/Support/Threading.h" #include "lldb/Core/Module.h" @@ -24,6 +25,7 @@ #include "lldb/Utility/RegularExpression.h" #include "lldb/Utility/Scalar.h" #include "lldb/Utility/StreamString.h" +#include "lldb/Utility/StructuredData.h" #include "lldb/Utility/Timer.h" #include "Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h" @@ -1752,11 +1754,10 @@ SymbolFileDWARF::GetDwoSymbolFileForCompileUnit( // it. Or it's absolute. found = FileSystem::Instance().Exists(dwo_file); + const char *comp_dir = + cu_die.GetAttributeValueAsString(dwarf_cu, DW_AT_comp_dir, nullptr); if (!found) { // It could be a relative path that also uses DW_AT_COMP_DIR. - const char *comp_dir = - cu_die.GetAttributeValueAsString(dwarf_cu, DW_AT_comp_dir, nullptr); - if (comp_dir) { dwo_file.SetFile(comp_dir, FileSpec::Style::native); if (!dwo_file.IsRelative()) { @@ -4226,6 +4227,70 @@ void SymbolFileDWARF::DumpClangAST(Stream &s) { clang->Dump(s.AsRawOstream()); } +bool SymbolFileDWARF::GetSeparateDebugInfo(StructuredData::Dictionary &d) { + StructuredData::Array separate_debug_info_files; + DWARFDebugInfo &info = DebugInfo(); + const size_t num_cus = info.GetNumUnits(); + for (size_t cu_idx = 0; cu_idx < num_cus; cu_idx++) { + DWARFUnit *unit = info.GetUnitAtIndex(cu_idx); + DWARFCompileUnit *dwarf_cu = llvm::dyn_cast(unit); + if (dwarf_cu == nullptr) + continue; + + // Check if this is a DWO unit by checking if it has a DWO ID. + // NOTE: it seems that `DWARFUnit::IsDWOUnit` is always false? + if (!dwarf_cu->GetDWOId().has_value()) + continue; + + StructuredData::DictionarySP dwo_data = + std::make_shared(); + const uint64_t dwo_id = dwarf_cu->GetDWOId().value(); + dwo_data->AddIntegerItem("dwo_id", dwo_id); + + if (const DWARFBaseDIE die = dwarf_cu->GetUnitDIEOnly()) { + const char *dwo_name = GetDWOName(*dwarf_cu, *die.GetDIE()); + if (dwo_name) { + dwo_data->AddStringItem("dwo_name", dwo_name); + } else { + dwo_data->AddStringItem("error", "missing dwo name"); + } + + const char *comp_dir = die.GetDIE()->GetAttributeValueAsString( + dwarf_cu, DW_AT_comp_dir, nullptr); + if (comp_dir) { + dwo_data->AddStringItem("comp_dir", comp_dir); + } + } else { + dwo_data->AddStringItem( + "error", + llvm::formatv("unable to get unit DIE for DWARFUnit at {0:x}", + dwarf_cu->GetOffset()) + .str()); + } + + // If we have a DWO symbol file, that means we were able to successfully + // load it. + SymbolFile *dwo_symfile = dwarf_cu->GetDwoSymbolFile(); + if (dwo_symfile) { + dwo_data->AddStringItem( + "resolved_dwo_path", + dwo_symfile->GetObjectFile()->GetFileSpec().GetPath()); + } else { + dwo_data->AddStringItem("error", + dwarf_cu->GetDwoError().AsCString("unknown")); + } + dwo_data->AddBooleanItem("loaded", dwo_symfile != nullptr); + separate_debug_info_files.AddItem(dwo_data); + } + + d.AddStringItem("type", "dwo"); + d.AddStringItem("symfile", GetMainObjectFile()->GetFileSpec().GetPath()); + d.AddItem("separate-debug-info-files", + std::make_shared( + std::move(separate_debug_info_files))); + return true; +} + SymbolFileDWARFDebugMap *SymbolFileDWARF::GetDebugMapSymfile() { if (m_debug_map_symfile == nullptr) { lldb::ModuleSP module_sp(m_debug_map_module_wp.lock()); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index 5aaf8bd270ef7..a32c0609d3fdb 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -30,6 +30,7 @@ #include "lldb/Utility/ConstString.h" #include "lldb/Utility/Flags.h" #include "lldb/Utility/RangeMap.h" +#include "lldb/Utility/StructuredData.h" #include "lldb/lldb-private.h" #include "DWARFContext.h" @@ -285,6 +286,10 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { void DumpClangAST(lldb_private::Stream &s) override; + /// List separate dwo files. + bool + GetSeparateDebugInfo(lldb_private::StructuredData::Dictionary &d) override; + lldb_private::DWARFContext &GetDWARFContext() { return m_context; } const std::shared_ptr &GetDwpSymbolFile(); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp index eadedd32e1a4a..4e194939814b6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp @@ -18,8 +18,9 @@ #include "lldb/Host/FileSystem.h" #include "lldb/Utility/RangeMap.h" #include "lldb/Utility/RegularExpression.h" -#include "lldb/Utility/Timer.h" #include "lldb/Utility/StreamString.h" +#include "lldb/Utility/StructuredData.h" +#include "lldb/Utility/Timer.h" //#define DEBUG_OSO_DMAP // DO NOT CHECKIN WITH THIS NOT COMMENTED OUT @@ -1271,6 +1272,42 @@ void SymbolFileDWARFDebugMap::DumpClangAST(Stream &s) { }); } +bool SymbolFileDWARFDebugMap::GetSeparateDebugInfo( + lldb_private::StructuredData::Dictionary &d) { + StructuredData::Array separate_debug_info_files; + const uint32_t cu_count = GetNumCompileUnits(); + for (uint32_t cu_idx = 0; cu_idx < cu_count; ++cu_idx) { + const auto &info = m_compile_unit_infos[cu_idx]; + StructuredData::DictionarySP oso_data = + std::make_shared(); + oso_data->AddStringItem("so_file", info.so_file.GetPath()); + oso_data->AddStringItem("oso_path", info.oso_path); + oso_data->AddIntegerItem("oso_mod_time", + (uint32_t)llvm::sys::toTimeT(info.oso_mod_time)); + + bool loaded_successfully = false; + if (GetModuleByOSOIndex(cu_idx)) { + // If we have a valid pointer to the module, we successfully + // loaded the oso if there are no load errors. + if (!info.oso_load_error.Fail()) { + loaded_successfully = true; + } + } + if (!loaded_successfully) { + oso_data->AddStringItem("error", info.oso_load_error.AsCString()); + } + oso_data->AddBooleanItem("loaded", loaded_successfully); + separate_debug_info_files.AddItem(oso_data); + } + + d.AddStringItem("type", "oso"); + d.AddStringItem("symfile", GetMainObjectFile()->GetFileSpec().GetPath()); + d.AddItem("separate-debug-info-files", + std::make_shared( + std::move(separate_debug_info_files))); + return true; +} + lldb::CompUnitSP SymbolFileDWARFDebugMap::GetCompileUnit(SymbolFileDWARF *oso_dwarf, DWARFCompileUnit &dwarf_cu) { if (oso_dwarf) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h index 881fd4c45ff05..0dc4235cf090f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h @@ -19,6 +19,7 @@ #include #include "UniqueDWARFASTType.h" +#include "lldb/Utility/StructuredData.h" class SymbolFileDWARF; class DWARFCompileUnit; @@ -148,6 +149,10 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { void DumpClangAST(lldb_private::Stream &s) override; + /// List separate oso files. + bool + GetSeparateDebugInfo(lldb_private::StructuredData::Dictionary &d) override; + // PluginInterface protocol llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } diff --git a/lldb/source/Symbol/SymbolFile.cpp b/lldb/source/Symbol/SymbolFile.cpp index b271efd07bfe3..7dcee8ced0ea1 100644 --- a/lldb/source/Symbol/SymbolFile.cpp +++ b/lldb/source/Symbol/SymbolFile.cpp @@ -18,6 +18,7 @@ #include "lldb/Symbol/VariableList.h" #include "lldb/Utility/Log.h" #include "lldb/Utility/StreamString.h" +#include "lldb/Utility/StructuredData.h" #include "lldb/lldb-private.h" #include diff --git a/lldb/test/API/commands/target/dump-separate-debug-info/dwo/Makefile b/lldb/test/API/commands/target/dump-separate-debug-info/dwo/Makefile new file mode 100644 index 0000000000000..3b6d788b2b013 --- /dev/null +++ b/lldb/test/API/commands/target/dump-separate-debug-info/dwo/Makefile @@ -0,0 +1,4 @@ +CXX_SOURCES := main.cpp foo.cpp +CFLAGS_EXTRAS := -gsplit-dwarf + +include Makefile.rules diff --git a/lldb/test/API/commands/target/dump-separate-debug-info/dwo/TestDumpDwo.py b/lldb/test/API/commands/target/dump-separate-debug-info/dwo/TestDumpDwo.py new file mode 100644 index 0000000000000..c58ffdefb4587 --- /dev/null +++ b/lldb/test/API/commands/target/dump-separate-debug-info/dwo/TestDumpDwo.py @@ -0,0 +1,122 @@ +""" +Test 'target modules dump separate-debug-info' for dwo files. +""" + +import json +import os + +from lldbsuite.test import lldbtest, lldbutil +from lldbsuite.test.decorators import * + + +class TestDumpDWO(lldbtest.TestBase): + NO_DEBUG_INFO_TESTCASE = True + + def get_dwos_from_json(self): + """Returns a dictionary of `symfile` -> {`dwo_name` -> dwo_info object}.""" + result = {} + output = json.loads(self.res.GetOutput()) + for symfile_entry in output: + dwo_dict = {} + for dwo_entry in symfile_entry["separate-debug-info-files"]: + dwo_dict[dwo_entry["dwo_name"]] = dwo_entry + result[symfile_entry["symfile"]] = dwo_dict + return result + + @skipIfRemote + @skipIfDarwin + def test_dwos_loaded_json_output(self): + self.build() + exe = self.getBuildArtifact("a.out") + main_dwo = self.getBuildArtifact("main.dwo") + foo_dwo = self.getBuildArtifact("foo.dwo") + + # Make sure dwo files exist + self.assertTrue(os.path.exists(main_dwo), f'Make sure "{main_dwo}" file exists') + self.assertTrue(os.path.exists(foo_dwo), f'Make sure "{foo_dwo}" file exists') + + target = self.dbg.CreateTarget(exe) + self.assertTrue(target, lldbtest.VALID_TARGET) + + self.runCmd("target modules dump separate-debug-info --json") + + # Check the output + output = self.get_dwos_from_json() + self.assertTrue(output[exe]["main.dwo"]["loaded"]) + self.assertTrue(output[exe]["foo.dwo"]["loaded"]) + + @skipIfRemote + @skipIfDarwin + def test_dwos_not_loaded_json_output(self): + self.build() + exe = self.getBuildArtifact("a.out") + main_dwo = self.getBuildArtifact("main.dwo") + foo_dwo = self.getBuildArtifact("foo.dwo") + + # REMOVE the dwo files + os.unlink(main_dwo) + os.unlink(foo_dwo) + + target = self.dbg.CreateTarget(exe) + self.assertTrue(target, lldbtest.VALID_TARGET) + + self.runCmd("target modules dump separate-debug-info --json") + + # Check the output + output = self.get_dwos_from_json() + self.assertFalse(output[exe]["main.dwo"]["loaded"]) + self.assertFalse(output[exe]["foo.dwo"]["loaded"]) + self.assertIn("error", output[exe]["main.dwo"]) + self.assertIn("error", output[exe]["foo.dwo"]) + + @skipIfRemote + @skipIfDarwin + def test_dwos_loaded_table_output(self): + self.build() + exe = self.getBuildArtifact("a.out") + main_dwo = self.getBuildArtifact("main.dwo") + foo_dwo = self.getBuildArtifact("foo.dwo") + + # Make sure dwo files exist + self.assertTrue(os.path.exists(main_dwo), f'Make sure "{main_dwo}" file exists') + self.assertTrue(os.path.exists(foo_dwo), f'Make sure "{foo_dwo}" file exists') + + target = self.dbg.CreateTarget(exe) + self.assertTrue(target, lldbtest.VALID_TARGET) + + self.expect( + "target modules dump separate-debug-info", + patterns=[ + "Symbol file: .*?a\.out", + 'Type: "dwo"', + "Dwo ID\s+Err\s+Dwo Path", + "0x[a-zA-Z0-9]{16}\s+.*main\.dwo", + "0x[a-zA-Z0-9]{16}\s+.*foo\.dwo", + ], + ) + + @skipIfRemote + @skipIfDarwin + def test_dwos_not_loaded_table_output(self): + self.build() + exe = self.getBuildArtifact("a.out") + main_dwo = self.getBuildArtifact("main.dwo") + foo_dwo = self.getBuildArtifact("foo.dwo") + + # REMOVE the dwo files + os.unlink(main_dwo) + os.unlink(foo_dwo) + + target = self.dbg.CreateTarget(exe) + self.assertTrue(target, lldbtest.VALID_TARGET) + + self.expect( + "target modules dump separate-debug-info", + patterns=[ + "Symbol file: .*?a\.out", + 'Type: "dwo"', + "Dwo ID\s+Err\s+Dwo Path", + "0x[a-zA-Z0-9]{16}\s+E\s+.*main\.dwo", + "0x[a-zA-Z0-9]{16}\s+E\s+.*foo\.dwo", + ], + ) diff --git a/lldb/test/API/commands/target/dump-separate-debug-info/dwo/foo.cpp b/lldb/test/API/commands/target/dump-separate-debug-info/dwo/foo.cpp new file mode 100644 index 0000000000000..28e2b6e768df4 --- /dev/null +++ b/lldb/test/API/commands/target/dump-separate-debug-info/dwo/foo.cpp @@ -0,0 +1,3 @@ +#include "foo.h" + +int foo() { return 1; } diff --git a/lldb/test/API/commands/target/dump-separate-debug-info/dwo/foo.h b/lldb/test/API/commands/target/dump-separate-debug-info/dwo/foo.h new file mode 100644 index 0000000000000..4ec598ad513eb --- /dev/null +++ b/lldb/test/API/commands/target/dump-separate-debug-info/dwo/foo.h @@ -0,0 +1,6 @@ +#ifndef FOO_H +#define FOO_H + +int foo(); + +#endif diff --git a/lldb/test/API/commands/target/dump-separate-debug-info/dwo/main.cpp b/lldb/test/API/commands/target/dump-separate-debug-info/dwo/main.cpp new file mode 100644 index 0000000000000..8087e68243279 --- /dev/null +++ b/lldb/test/API/commands/target/dump-separate-debug-info/dwo/main.cpp @@ -0,0 +1,3 @@ +#include "foo.h" + +int main() { return foo(); } diff --git a/lldb/test/API/commands/target/dump-separate-debug-info/oso/Makefile b/lldb/test/API/commands/target/dump-separate-debug-info/oso/Makefile new file mode 100644 index 0000000000000..7df22699c57d5 --- /dev/null +++ b/lldb/test/API/commands/target/dump-separate-debug-info/oso/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp foo.cpp + +include Makefile.rules diff --git a/lldb/test/API/commands/target/dump-separate-debug-info/oso/TestDumpOso.py b/lldb/test/API/commands/target/dump-separate-debug-info/oso/TestDumpOso.py new file mode 100644 index 0000000000000..05beed0eacfb0 --- /dev/null +++ b/lldb/test/API/commands/target/dump-separate-debug-info/oso/TestDumpOso.py @@ -0,0 +1,120 @@ +""" +Test 'target modules dump separate-debug-info' for oso files. +""" + +import json +import os + +from lldbsuite.test import lldbtest, lldbutil +from lldbsuite.test.decorators import * + + +class TestDumpOso(lldbtest.TestBase): + NO_DEBUG_INFO_TESTCASE = True + + def get_osos_from_json(self): + """Returns a dictionary of `symfile` -> {`OSO_PATH` -> oso_info object}.""" + result = {} + output = json.loads(self.res.GetOutput()) + for symfile_entry in output: + oso_dict = {} + for oso_entry in symfile_entry["separate-debug-info-files"]: + oso_dict[oso_entry["oso_path"]] = oso_entry + result[symfile_entry["symfile"]] = oso_dict + return result + + @skipIfRemote + @skipUnlessDarwin + def test_shows_oso_loaded_json_output(self): + self.build(debug_info="dwarf") + exe = self.getBuildArtifact("a.out") + main_o = self.getBuildArtifact("main.o") + foo_o = self.getBuildArtifact("foo.o") + + # Make sure o files exist + self.assertTrue(os.path.exists(main_o), f'Make sure "{main_o}" file exists') + self.assertTrue(os.path.exists(foo_o), f'Make sure "{foo_o}" file exists') + + target = self.dbg.CreateTarget(exe) + self.assertTrue(target, lldbtest.VALID_TARGET) + + self.runCmd("target modules dump separate-debug-info --json") + + # Check the output + osos = self.get_osos_from_json() + self.assertTrue(osos[exe][main_o]["loaded"]) + self.assertTrue(osos[exe][foo_o]["loaded"]) + + @skipIfRemote + @skipUnlessDarwin + def test_shows_oso_not_loaded_json_output(self): + self.build(debug_info="dwarf") + exe = self.getBuildArtifact("a.out") + main_o = self.getBuildArtifact("main.o") + foo_o = self.getBuildArtifact("foo.o") + + # REMOVE the o files + os.unlink(main_o) + os.unlink(foo_o) + + target = self.dbg.CreateTarget(exe) + self.assertTrue(target, lldbtest.VALID_TARGET) + + self.runCmd("target modules dump separate-debug-info --json") + + # Check the output + osos = self.get_osos_from_json() + self.assertFalse(osos[exe][main_o]["loaded"]) + self.assertFalse(osos[exe][foo_o]["loaded"]) + + @skipIfRemote + @skipUnlessDarwin + def test_shows_oso_loaded_table_output(self): + self.build(debug_info="dwarf") + exe = self.getBuildArtifact("a.out") + main_o = self.getBuildArtifact("main.o") + foo_o = self.getBuildArtifact("foo.o") + + # Make sure o files exist + self.assertTrue(os.path.exists(main_o), f'Make sure "{main_o}" file exists') + self.assertTrue(os.path.exists(foo_o), f'Make sure "{foo_o}" file exists') + + target = self.dbg.CreateTarget(exe) + self.assertTrue(target, lldbtest.VALID_TARGET) + + self.expect( + "target modules dump separate-debug-info", + patterns=[ + "Symbol file: .*?a\.out", + 'Type: "oso"', + "Mod Time\s+Err\s+Oso Path", + "0x[a-zA-Z0-9]{16}\s+.*main\.o", + "0x[a-zA-Z0-9]{16}\s+.*foo\.o", + ], + ) + + @skipIfRemote + @skipUnlessDarwin + def test_shows_oso_not_loaded_table_output(self): + self.build(debug_info="dwarf") + exe = self.getBuildArtifact("a.out") + main_o = self.getBuildArtifact("main.o") + foo_o = self.getBuildArtifact("foo.o") + + # REMOVE the o files + os.unlink(main_o) + os.unlink(foo_o) + + target = self.dbg.CreateTarget(exe) + self.assertTrue(target, lldbtest.VALID_TARGET) + + self.expect( + "target modules dump separate-debug-info", + patterns=[ + "Symbol file: .*?a\.out", + 'Type: "oso"', + "Mod Time\s+Err\s+Oso Path", + "0x[a-zA-Z0-9]{16}\s+E\s+.*main\.o", + "0x[a-zA-Z0-9]{16}\s+E\s+.*foo\.o", + ], + ) diff --git a/lldb/test/API/commands/target/dump-separate-debug-info/oso/foo.cpp b/lldb/test/API/commands/target/dump-separate-debug-info/oso/foo.cpp new file mode 100644 index 0000000000000..28e2b6e768df4 --- /dev/null +++ b/lldb/test/API/commands/target/dump-separate-debug-info/oso/foo.cpp @@ -0,0 +1,3 @@ +#include "foo.h" + +int foo() { return 1; } diff --git a/lldb/test/API/commands/target/dump-separate-debug-info/oso/foo.h b/lldb/test/API/commands/target/dump-separate-debug-info/oso/foo.h new file mode 100644 index 0000000000000..4ec598ad513eb --- /dev/null +++ b/lldb/test/API/commands/target/dump-separate-debug-info/oso/foo.h @@ -0,0 +1,6 @@ +#ifndef FOO_H +#define FOO_H + +int foo(); + +#endif diff --git a/lldb/test/API/commands/target/dump-separate-debug-info/oso/main.cpp b/lldb/test/API/commands/target/dump-separate-debug-info/oso/main.cpp new file mode 100644 index 0000000000000..8087e68243279 --- /dev/null +++ b/lldb/test/API/commands/target/dump-separate-debug-info/oso/main.cpp @@ -0,0 +1,3 @@ +#include "foo.h" + +int main() { return foo(); } From b0c769a80b5f019f189f67d20e6b24971b435970 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Thu, 12 Oct 2023 13:09:36 -0600 Subject: [PATCH 013/720] [libc++][mdspan] Fix extents CTAD (#68737) extents CTAD was requiring default constructibility of the extent arguments due to the way we implemented a pack expansion. This requirement is not in the standard. Reported in issue #68671 https://github.com/llvm/llvm-project/issues/68671 by @hewillk. Fixes #68671 --- libcxx/include/__mdspan/extents.h | 2 +- .../std/containers/views/mdspan/extents/ctad.pass.cpp | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/libcxx/include/__mdspan/extents.h b/libcxx/include/__mdspan/extents.h index a510220d4096a..f6bcd940ee607 100644 --- a/libcxx/include/__mdspan/extents.h +++ b/libcxx/include/__mdspan/extents.h @@ -456,7 +456,7 @@ using dextents = typename __mdspan_detail::__make_dextents<_IndexType, _Rank>::t // Deduction guide for extents template -extents(_IndexTypes...) -> extents; +extents(_IndexTypes...) -> extents; namespace __mdspan_detail { diff --git a/libcxx/test/std/containers/views/mdspan/extents/ctad.pass.cpp b/libcxx/test/std/containers/views/mdspan/extents/ctad.pass.cpp index 2a3da30bb9366..3fc7c707f036a 100644 --- a/libcxx/test/std/containers/views/mdspan/extents/ctad.pass.cpp +++ b/libcxx/test/std/containers/views/mdspan/extents/ctad.pass.cpp @@ -21,6 +21,13 @@ #include "../ConvertibleToIntegral.h" #include "test_macros.h" +struct NoDefaultCtorIndex { + size_t value; + constexpr NoDefaultCtorIndex() = delete; + constexpr NoDefaultCtorIndex(size_t val) : value(val){}; + constexpr operator size_t() const noexcept { return value; } +}; + template constexpr void test(E e, Expected expected) { ASSERT_SAME_TYPE(E, Expected); @@ -35,6 +42,7 @@ constexpr bool test() { test(std::extents(1, 2u), std::extents(1, 2u)); test(std::extents(1, 2u, 3, 4, 5, 6, 7, 8, 9), std::extents(1, 2u, 3, 4, 5, 6, 7, 8, 9)); + test(std::extents(NoDefaultCtorIndex{1}, NoDefaultCtorIndex{2}), std::extents(1, 2)); return true; } From 457308a46a37fd56af06664ad923a06d50243a56 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Fri, 6 Oct 2023 15:17:58 -0500 Subject: [PATCH 014/720] [ValueTracking] Add more tests for constant ranges; NFC --- .../Analysis/ValueTracking/constant-ranges.ll | 146 ++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 llvm/test/Analysis/ValueTracking/constant-ranges.ll diff --git a/llvm/test/Analysis/ValueTracking/constant-ranges.ll b/llvm/test/Analysis/ValueTracking/constant-ranges.ll new file mode 100644 index 0000000000000..e425c1547bc3a --- /dev/null +++ b/llvm/test/Analysis/ValueTracking/constant-ranges.ll @@ -0,0 +1,146 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=instsimplify < %s -S | FileCheck %s + +define i1 @shl_C_X_ugt(i8 %x) { +; CHECK-LABEL: @shl_C_X_ugt( +; CHECK-NEXT: [[SHL:%.*]] = shl i8 7, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[SHL]], -32 +; CHECK-NEXT: ret i1 [[R]] +; + %shl = shl i8 7, %x + %r = icmp ugt i8 %shl, 224 + ret i1 %r +} + +define i1 @shl_C_X_ugt2(i8 %x) { +; CHECK-LABEL: @shl_C_X_ugt2( +; CHECK-NEXT: [[SHL:%.*]] = shl i8 5, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[SHL]], -64 +; CHECK-NEXT: ret i1 [[R]] +; + %shl = shl i8 5, %x + %r = icmp ugt i8 %shl, 192 + ret i1 %r +} + +define i1 @shl_C_X_ugt_fail(i8 %x) { +; CHECK-LABEL: @shl_C_X_ugt_fail( +; CHECK-NEXT: [[SHL:%.*]] = shl i8 1, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[SHL]], 127 +; CHECK-NEXT: ret i1 [[R]] +; + %shl = shl i8 1, %x + %r = icmp ugt i8 %shl, 127 + ret i1 %r +} + +define i1 @shl_C_X_ugt_fail2(i8 %x) { +; CHECK-LABEL: @shl_C_X_ugt_fail2( +; CHECK-NEXT: [[SHL:%.*]] = shl i8 3, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[SHL]], -66 +; CHECK-NEXT: ret i1 [[R]] +; + %shl = shl i8 3, %x + %r = icmp ugt i8 %shl, 190 + ret i1 %r +} + +define i1 @shl_C_X_ugt_fail3(i8 %x) { +; CHECK-LABEL: @shl_C_X_ugt_fail3( +; CHECK-NEXT: [[SHL:%.*]] = shl i8 -1, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[SHL]], -2 +; CHECK-NEXT: ret i1 [[R]] +; + %shl = shl i8 -1, %x + %r = icmp ugt i8 %shl, 254 + ret i1 %r +} + +define i1 @shl_C_X_ugt_todo(i8 %x) { +; CHECK-LABEL: @shl_C_X_ugt_todo( +; CHECK-NEXT: [[SHL:%.*]] = shl i8 -127, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[SHL]], -116 +; CHECK-NEXT: ret i1 [[R]] +; + %shl = shl i8 129, %x + %r = icmp ugt i8 %shl, 140 + ret i1 %r +} + +define i1 @shl_X_C_ugt(i8 %x) { +; CHECK-LABEL: @shl_X_C_ugt( +; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[X:%.*]], 6 +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[SHL]], -64 +; CHECK-NEXT: ret i1 [[R]] +; + %shl = shl i8 %x, 6 + %r = icmp ugt i8 %shl, 192 + ret i1 %r +} + +define i1 @shl_X_C_ugt_fail(i8 %x) { +; CHECK-LABEL: @shl_X_C_ugt_fail( +; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[X:%.*]], 6 +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[SHL]], -65 +; CHECK-NEXT: ret i1 [[R]] +; + %shl = shl i8 %x, 6 + %r = icmp ugt i8 %shl, 191 + ret i1 %r +} + +define i1 @shl_X_C_ugt_fail2(i8 %x) { +; CHECK-LABEL: @shl_X_C_ugt_fail2( +; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[X:%.*]], 5 +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[SHL]], -64 +; CHECK-NEXT: ret i1 [[R]] +; + %shl = shl i8 %x, 5 + %r = icmp ugt i8 %shl, 192 + ret i1 %r +} + +define i1 @and_ugt(i8 %xx) { +; CHECK-LABEL: @and_ugt( +; CHECK-NEXT: [[X:%.*]] = mul i8 [[XX:%.*]], [[XX]] +; CHECK-NEXT: [[NEGX:%.*]] = sub i8 0, [[X]] +; CHECK-NEXT: [[X_P2:%.*]] = and i8 [[NEGX]], [[X]] +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X_P2]], -128 +; CHECK-NEXT: ret i1 [[R]] +; + %x = mul i8 %xx, %xx ; thwart complexity-based canonicalization + %negx = sub i8 0, %x + %x_p2 = and i8 %negx, %x + %r = icmp ugt i8 %x_p2, 128 + ret i1 %r +} + +define i1 @and_ugt2(i8 %xx) { +; CHECK-LABEL: @and_ugt2( +; CHECK-NEXT: [[X:%.*]] = mul i8 [[XX:%.*]], [[XX]] +; CHECK-NEXT: [[NEGX:%.*]] = sub i8 0, [[X]] +; CHECK-NEXT: [[X_P2:%.*]] = and i8 [[X]], [[NEGX]] +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X_P2]], -128 +; CHECK-NEXT: ret i1 [[R]] +; + %x = mul i8 %xx, %xx ; thwart complexity-based canonicalization + %negx = sub i8 0, %x + %x_p2 = and i8 %x, %negx + %r = icmp ugt i8 %x_p2, 128 + ret i1 %r +} + +define i1 @and_ugt_fail(i8 %xx) { +; CHECK-LABEL: @and_ugt_fail( +; CHECK-NEXT: [[X:%.*]] = mul i8 [[XX:%.*]], [[XX]] +; CHECK-NEXT: [[NEGX:%.*]] = sub i8 0, [[X]] +; CHECK-NEXT: [[X_P2:%.*]] = and i8 [[X]], [[NEGX]] +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X_P2]], 127 +; CHECK-NEXT: ret i1 [[R]] +; + %x = mul i8 %xx, %xx ; thwart complexity-based canonicalization + %negx = sub i8 0, %x + %x_p2 = and i8 %x, %negx + %r = icmp ugt i8 %x_p2, 127 + ret i1 %r +} From 0f8b40a82ebeec65eb560d85368b1540333897f8 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Fri, 6 Oct 2023 15:18:21 -0500 Subject: [PATCH 015/720] [ValueTracking] Add better support for ConstantRange(Shl) 1) If LHS is constant: - The low bits of the LHS is set, the lower bound is non-zero - The upper bound can be capped at popcount(LHS) high bits 2) If RHS is constant: - The upper bound can be capped at (Width - RHS) high bits --- llvm/lib/Analysis/ValueTracking.cpp | 13 +++++++++++++ llvm/test/Analysis/ValueTracking/constant-ranges.ll | 12 +++--------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index e186431934d22..9b29d64c97f79 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -8553,7 +8553,20 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, Lower = *C; Upper = C->shl(ShiftAmount) + 1; } + } else { + // If lowbit is set, value can never be zero. + if ((*C)[0]) + Lower = APInt::getOneBitSet(Width, 0); + // If we are shifting a constant the largest it can be is if the longest + // sequence of consecutive ones is shifted to the highbits (breaking + // ties for which sequence is higher). At the moment we take a liberal + // upper bound on this by just popcounting the constant. + // TODO: There may be a bitwise trick for it longest/highest + // consecutative sequence of ones (naive method is O(Width) loop). + Upper = APInt::getHighBitsSet(Width, C->popcount()) + 1; } + } else if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { + Upper = APInt::getBitsSetFrom(Width, C->getZExtValue()) + 1; } break; diff --git a/llvm/test/Analysis/ValueTracking/constant-ranges.ll b/llvm/test/Analysis/ValueTracking/constant-ranges.ll index e425c1547bc3a..14331c251ff52 100644 --- a/llvm/test/Analysis/ValueTracking/constant-ranges.ll +++ b/llvm/test/Analysis/ValueTracking/constant-ranges.ll @@ -3,9 +3,7 @@ define i1 @shl_C_X_ugt(i8 %x) { ; CHECK-LABEL: @shl_C_X_ugt( -; CHECK-NEXT: [[SHL:%.*]] = shl i8 7, [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[SHL]], -32 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %shl = shl i8 7, %x %r = icmp ugt i8 %shl, 224 @@ -14,9 +12,7 @@ define i1 @shl_C_X_ugt(i8 %x) { define i1 @shl_C_X_ugt2(i8 %x) { ; CHECK-LABEL: @shl_C_X_ugt2( -; CHECK-NEXT: [[SHL:%.*]] = shl i8 5, [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[SHL]], -64 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %shl = shl i8 5, %x %r = icmp ugt i8 %shl, 192 @@ -69,9 +65,7 @@ define i1 @shl_C_X_ugt_todo(i8 %x) { define i1 @shl_X_C_ugt(i8 %x) { ; CHECK-LABEL: @shl_X_C_ugt( -; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[X:%.*]], 6 -; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[SHL]], -64 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %shl = shl i8 %x, 6 %r = icmp ugt i8 %shl, 192 From 50ece4cba949787241b5fbfc94be6cfdc66e90ee Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Fri, 6 Oct 2023 15:18:29 -0500 Subject: [PATCH 016/720] [ValueTracking] Add better support for ConstantRange(And) The fairly common power of two pattern `X & -X` can be capped at the highest power of 2 (signbit set). --- llvm/lib/Analysis/ValueTracking.cpp | 5 +++++ llvm/test/Analysis/ValueTracking/constant-ranges.ll | 12 ++---------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 9b29d64c97f79..11b39751b542f 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -8492,6 +8492,11 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, if (match(BO.getOperand(1), m_APInt(C))) // 'and x, C' produces [0, C]. Upper = *C + 1; + // X & -X is a power of two or zero. So we can cap the value at max power of + // two. + if (match(BO.getOperand(0), m_Neg(m_Specific(BO.getOperand(1)))) || + match(BO.getOperand(1), m_Neg(m_Specific(BO.getOperand(0))))) + Upper = APInt::getSignedMinValue(Width) + 1; break; case Instruction::Or: diff --git a/llvm/test/Analysis/ValueTracking/constant-ranges.ll b/llvm/test/Analysis/ValueTracking/constant-ranges.ll index 14331c251ff52..26e01efedd3df 100644 --- a/llvm/test/Analysis/ValueTracking/constant-ranges.ll +++ b/llvm/test/Analysis/ValueTracking/constant-ranges.ll @@ -96,11 +96,7 @@ define i1 @shl_X_C_ugt_fail2(i8 %x) { define i1 @and_ugt(i8 %xx) { ; CHECK-LABEL: @and_ugt( -; CHECK-NEXT: [[X:%.*]] = mul i8 [[XX:%.*]], [[XX]] -; CHECK-NEXT: [[NEGX:%.*]] = sub i8 0, [[X]] -; CHECK-NEXT: [[X_P2:%.*]] = and i8 [[NEGX]], [[X]] -; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X_P2]], -128 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %x = mul i8 %xx, %xx ; thwart complexity-based canonicalization %negx = sub i8 0, %x @@ -111,11 +107,7 @@ define i1 @and_ugt(i8 %xx) { define i1 @and_ugt2(i8 %xx) { ; CHECK-LABEL: @and_ugt2( -; CHECK-NEXT: [[X:%.*]] = mul i8 [[XX:%.*]], [[XX]] -; CHECK-NEXT: [[NEGX:%.*]] = sub i8 0, [[X]] -; CHECK-NEXT: [[X_P2:%.*]] = and i8 [[X]], [[NEGX]] -; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X_P2]], -128 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %x = mul i8 %xx, %xx ; thwart complexity-based canonicalization %negx = sub i8 0, %x From 1c9035d1b5a9af89fccf06fa0c45f0096b063983 Mon Sep 17 00:00:00 2001 From: Yusra Syeda <99052248+ysyeda@users.noreply.github.com> Date: Thu, 12 Oct 2023 15:42:56 -0400 Subject: [PATCH 017/720] [SystemZ][z/OS] Add UtcClock extension to chrono.h/.cpp (#67846) This PR adds handling for UtcClock to chrono.h/.cpp. --------- Co-authored-by: Yusra Syeda --- llvm/include/llvm/Support/Chrono.h | 26 +++++++++++++++ llvm/lib/Support/Chrono.cpp | 45 +++++++++++++++++++++++--- llvm/unittests/Support/CMakeLists.txt | 1 + llvm/unittests/Support/UTCTimeTest.cpp | 41 +++++++++++++++++++++++ 4 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 llvm/unittests/Support/UTCTimeTest.cpp diff --git a/llvm/include/llvm/Support/Chrono.h b/llvm/include/llvm/Support/Chrono.h index 9c2bd45d2803e..71859af7c7e4a 100644 --- a/llvm/include/llvm/Support/Chrono.h +++ b/llvm/include/llvm/Support/Chrono.h @@ -33,6 +33,19 @@ namespace sys { template using TimePoint = std::chrono::time_point; +// utc_clock and utc_time are only available since C++20. Add enough code to +// support formatting date/time in UTC. +class UtcClock : public std::chrono::system_clock {}; + +template +using UtcTime = std::chrono::time_point; + +/// Convert a std::time_t to a UtcTime +inline UtcTime toUtcTime(std::time_t T) { + using namespace std::chrono; + return UtcTime(seconds(T)); +} + /// Convert a TimePoint to std::time_t inline std::time_t toTimeT(TimePoint<> TP) { using namespace std::chrono; @@ -40,6 +53,13 @@ inline std::time_t toTimeT(TimePoint<> TP) { time_point_cast(TP)); } +/// Convert a UtcTime to std::time_t +inline std::time_t toTimeT(UtcTime<> TP) { + using namespace std::chrono; + return system_clock::to_time_t(time_point( + duration_cast(TP.time_since_epoch()))); +} + /// Convert a std::time_t to a TimePoint inline TimePoint toTimePoint(std::time_t T) { @@ -58,6 +78,7 @@ toTimePoint(std::time_t T, uint32_t nsec) { } // namespace sys raw_ostream &operator<<(raw_ostream &OS, sys::TimePoint<> TP); +raw_ostream &operator<<(raw_ostream &OS, sys::UtcTime<> TP); /// Format provider for TimePoint<> /// @@ -73,6 +94,11 @@ struct format_provider> { StringRef Style); }; +template <> struct format_provider> { + static void format(const sys::UtcTime &TP, + llvm::raw_ostream &OS, StringRef Style); +}; + namespace detail { template struct unit { static const char value[]; }; template const char unit::value[] = ""; diff --git a/llvm/lib/Support/Chrono.cpp b/llvm/lib/Support/Chrono.cpp index 859ece8f55008..993d200675fe5 100644 --- a/llvm/lib/Support/Chrono.cpp +++ b/llvm/lib/Support/Chrono.cpp @@ -40,6 +40,24 @@ static inline struct tm getStructTM(TimePoint<> TP) { return Storage; } +static inline struct tm getStructTMUtc(UtcTime<> TP) { + struct tm Storage; + std::time_t OurTime = toTimeT(TP); + +#if defined(LLVM_ON_UNIX) + struct tm *LT = ::gmtime_r(&OurTime, &Storage); + assert(LT); + (void)LT; +#endif +#if defined(_WIN32) + int Error = ::gmtime_s(&Storage, &OurTime); + assert(!Error); + (void)Error; +#endif + + return Storage; +} + raw_ostream &operator<<(raw_ostream &OS, TimePoint<> TP) { struct tm LT = getStructTM(TP); char Buffer[sizeof("YYYY-MM-DD HH:MM:SS")]; @@ -50,12 +68,10 @@ raw_ostream &operator<<(raw_ostream &OS, TimePoint<> TP) { .count())); } -void format_provider>::format(const TimePoint<> &T, raw_ostream &OS, - StringRef Style) { +template +static void format(const T &Fractional, struct tm <, raw_ostream &OS, + StringRef Style) { using namespace std::chrono; - TimePoint Truncated = time_point_cast(T); - auto Fractional = T - Truncated; - struct tm LT = getStructTM(Truncated); // Handle extensions first. strftime mangles unknown %x on some platforms. if (Style.empty()) Style = "%Y-%m-%d %H:%M:%S.%N"; std::string Format; @@ -90,4 +106,23 @@ void format_provider>::format(const TimePoint<> &T, raw_ostream &OS, OS << (Len ? Buffer : "BAD-DATE-FORMAT"); } +void format_provider>::format( + const UtcTime &T, raw_ostream &OS, StringRef Style) { + using namespace std::chrono; + UtcTime Truncated = + UtcTime(duration_cast(T.time_since_epoch())); + auto Fractional = T - Truncated; + struct tm LT = getStructTMUtc(Truncated); + llvm::format(Fractional, LT, OS, Style); +} + +void format_provider>::format(const TimePoint<> &T, raw_ostream &OS, + StringRef Style) { + using namespace std::chrono; + TimePoint Truncated = time_point_cast(T); + auto Fractional = T - Truncated; + struct tm LT = getStructTM(Truncated); + llvm::format(Fractional, LT, OS, Style); +} + } // namespace llvm diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt index dfd55b228900d..e1bf793536b68 100644 --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -91,6 +91,7 @@ add_llvm_unittest(SupportTests TypeTraitsTest.cpp TrailingObjectsTest.cpp UnicodeTest.cpp + UTCTimeTest.cpp VersionTupleTest.cpp VirtualFileSystemTest.cpp WithColorTest.cpp diff --git a/llvm/unittests/Support/UTCTimeTest.cpp b/llvm/unittests/Support/UTCTimeTest.cpp new file mode 100644 index 0000000000000..64e04d29376c3 --- /dev/null +++ b/llvm/unittests/Support/UTCTimeTest.cpp @@ -0,0 +1,41 @@ +//===- unittests/Support/UTCTimeTest.cpp ----------------- ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Chrono.h" +#include "gtest/gtest.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormatProviders.h" +#include "llvm/Support/FormatVariadic.h" + +namespace llvm { +namespace sys { +namespace { + +TEST(UTCTime, convertutc) { + // Get the current time. + time_t currentTime; + time(¤tTime); + + // Convert with toUtcTime. + SmallString<15> customResultString; + raw_svector_ostream T(customResultString); + T << formatv("{0:%Y-%m-%d %H:%M:%S}", llvm::sys::toUtcTime(currentTime)); + + // Convert with gmtime. + char gmtimeResultString[20]; + std::tm *gmtimeResult = std::gmtime(¤tTime); + assert(gmtimeResult != NULL); + std::strftime(gmtimeResultString, 20, "%Y-%m-%d %H:%M:%S", gmtimeResult); + + // Compare the formatted strings. + EXPECT_EQ(customResultString, StringRef(gmtimeResultString, 19)); + +} +} // namespace +} // namespace sys +} // namespace llvm From 220244b71ba2a0301bb13fb195d64a66418d1c70 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 12 Oct 2023 19:43:07 +0000 Subject: [PATCH 018/720] [gn build] Port 1c9035d1b5a9 --- llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn index c7e2b49f748bd..fddee579547c6 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn @@ -93,6 +93,7 @@ unittest("SupportTests") { "TypeNameTest.cpp", "TypeSizeTest.cpp", "TypeTraitsTest.cpp", + "UTCTimeTest.cpp", "UnicodeTest.cpp", "VersionTupleTest.cpp", "VirtualFileSystemTest.cpp", From f248d0b28dca451d9af74c1bfc8e681919a4d982 Mon Sep 17 00:00:00 2001 From: Peiming Liu <36770114+PeimingLiu@users.noreply.github.com> Date: Thu, 12 Oct 2023 13:22:45 -0700 Subject: [PATCH 019/720] [mlir][sparse] implement sparse_tensor.reorder_coo (#68916) As a side effect of the change, it also unifies the convertOp implementation between lib/codegen path. --- .../mlir/Dialect/SparseTensor/IR/Enums.h | 2 + .../SparseTensor/IR/SparseTensorOps.td | 4 - .../Dialect/SparseTensor/Pipelines/Passes.h | 13 +- .../Dialect/SparseTensor/Transforms/Passes.h | 31 +- .../Dialect/SparseTensor/Transforms/Passes.td | 4 - .../ExecutionEngine/SparseTensor/Storage.h | 70 ++++ .../SparseTensor/IR/SparseTensorDialect.cpp | 19 +- .../Pipelines/SparseTensorPipelines.cpp | 6 +- .../Transforms/SparseTensorCodegen.cpp | 29 +- .../Transforms/SparseTensorConversion.cpp | 270 ++------------ .../Transforms/SparseTensorPasses.cpp | 30 +- .../Transforms/SparseTensorRewriting.cpp | 22 +- .../SparsificationAndBufferizationPass.cpp | 17 +- .../Transforms/StageSparseOperations.cpp | 11 +- .../ExecutionEngine/SparseTensorRuntime.cpp | 6 + .../SparseTensor/convert_dense2sparse.mlir | 327 ++--------------- .../SparseTensor/convert_sparse2dense.mlir | 341 +++--------------- .../SparseTensor/convert_sparse2sparse.mlir | 177 ++------- .../Dialect/SparseTensor/sparse_concat.mlir | 173 +-------- .../CPU/sparse_conversion_element.mlir | 4 +- .../CPU/sparse_conversion_sparse2sparse.mlir | 46 +-- .../SparseTensor/python/test_stress.py | 8 +- 22 files changed, 265 insertions(+), 1345 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h index 2920ef79f461c..ca9555248130f 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -151,6 +151,8 @@ enum class Action : uint32_t { kToCOO = 5, kToIterator = 6, kPack = 7, + // Sort an unordered COO in place. + kSortCOOInPlace = 8, }; /// This enum defines all the sparse representations supportable by diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td index afbabb97eb71f..9016634fa3be8 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -200,10 +200,6 @@ def SparseTensor_ConvertOp : SparseTensor_Op<"convert", // Whether the convert can be done by a single step (either a sort or a foreach), // or it would require a tmp buffer (sort, then foreach). bool directConvertable(); - - // Whether the convert is actually a sort coo - // TODO: The method will be removed when sort_coo operation is introduced. - bool isSortCOOConvert(); }]; let assemblyFormat = "$source attr-dict `:` type($source) `to` type($dest)"; diff --git a/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h index c88963d399c4c..57d8ffb3566f8 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h @@ -88,6 +88,8 @@ struct SparseCompilerOptions *this, "enable-buffer-initialization", desc("Enable zero-initialization of memory buffers"), init(false)}; + // TODO: Delete the option, it should also be false after switching to + // buffer-deallocation-pass PassOptions::Option createSparseDeallocs{ *this, "create-sparse-deallocs", desc("Specify if the temporary buffers created by the sparse " @@ -100,11 +102,6 @@ struct SparseCompilerOptions *this, "vl", desc("Set the vector length (0 disables vectorization)"), init(0)}; - // These options must be kept in sync with `SparseTensorConversionBase`. - PassOptions::Option sparseToSparse{ - *this, "s2s-strategy", - desc("Set the strategy for sparse-to-sparse conversion"), init(0)}; - // These options must be kept in sync with the `ConvertVectorToLLVM` // (defined in include/mlir/Dialect/SparseTensor/Pipelines/Passes.h). PassOptions::Option reassociateFPReductions{ @@ -174,12 +171,6 @@ struct SparseCompilerOptions enableRuntimeLibrary); } - /// Projects out the options for `createSparseTensorConversionPass`. - SparseTensorConversionOptions sparseTensorConversionOptions() const { - return SparseTensorConversionOptions( - sparseToSparseConversionStrategy(sparseToSparse)); - } - /// Projects out the options for `createConvertVectorToLLVMPass`. ConvertVectorToLLVMPassOptions lowerVectorToLLVMOptions() const { ConvertVectorToLLVMPassOptions opts{}; diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h index c537e92a51d53..204bc1ec2def1 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h @@ -119,37 +119,11 @@ class SparseTensorTypeToPtrConverter : public TypeConverter { SparseTensorTypeToPtrConverter(); }; -/// Defines a strategy for implementing sparse-to-sparse conversion. -/// `kAuto` leaves it up to the compiler to automatically determine -/// the method used. `kViaCOO` converts the source tensor to COO and -/// then converts the COO to the target format. `kDirect` converts -/// directly via the algorithm in ; -/// however, beware that there are many formats not supported by this -/// conversion method. -enum class SparseToSparseConversionStrategy { kAuto, kViaCOO, kDirect }; - -/// Converts command-line sparse2sparse flag to the strategy enum. -SparseToSparseConversionStrategy sparseToSparseConversionStrategy(int32_t flag); - -/// SparseTensorConversion options. -struct SparseTensorConversionOptions { - SparseTensorConversionOptions(SparseToSparseConversionStrategy s2s) - : sparseToSparseStrategy(s2s) {} - SparseTensorConversionOptions() - : SparseTensorConversionOptions(SparseToSparseConversionStrategy::kAuto) { - } - SparseToSparseConversionStrategy sparseToSparseStrategy; -}; - /// Sets up sparse tensor conversion rules. -void populateSparseTensorConversionPatterns( - TypeConverter &typeConverter, RewritePatternSet &patterns, - const SparseTensorConversionOptions &options = - SparseTensorConversionOptions()); +void populateSparseTensorConversionPatterns(TypeConverter &typeConverter, + RewritePatternSet &patterns); std::unique_ptr createSparseTensorConversionPass(); -std::unique_ptr -createSparseTensorConversionPass(const SparseTensorConversionOptions &options); //===----------------------------------------------------------------------===// // The SparseTensorCodegen pass. @@ -235,7 +209,6 @@ std::unique_ptr createSparsificationAndBufferizationPass(); std::unique_ptr createSparsificationAndBufferizationPass( const bufferization::OneShotBufferizationOptions &bufferizationOptions, const SparsificationOptions &sparsificationOptions, - const SparseTensorConversionOptions &sparseTensorConversionOptions, bool createSparseDeallocs, bool enableRuntimeLibrary, bool enableBufferInitialization, unsigned vectorLength, bool enableVLAVectorization, bool enableSIMDIndex32); diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td index 8f116bff9b185..3081f07b7bfe1 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td @@ -201,10 +201,6 @@ def SparseTensorConversionPass : Pass<"sparse-tensor-conversion", "ModuleOp"> { "scf::SCFDialect", "sparse_tensor::SparseTensorDialect", ]; - let options = [ - Option<"sparseToSparse", "s2s-strategy", "int32_t", "0", - "Set the strategy for sparse-to-sparse conversion">, - ]; } def SparseTensorCodegen : Pass<"sparse-tensor-codegen", "ModuleOp"> { diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 303a41bc471d5..607be1cbf956a 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -374,6 +374,19 @@ class SparseTensorStorage final : public SparseTensorStorageBase { /// Partially specialize lexicographical insertions based on template types. void lexInsert(const uint64_t *lvlCoords, V val) final { assert(lvlCoords && "Received nullptr for level-coordinates"); + // TODO: get rid of this! canonicalize all-dense "sparse" array into dense + // tensors. + bool allDense = std::all_of(getLvlTypes().begin(), getLvlTypes().end(), + [](DimLevelType lt) { return isDenseDLT(lt); }); + if (allDense) { + uint64_t lvlRank = getLvlRank(); + uint64_t valIdx = 0; + // Linearize the address + for (size_t lvl = 0; lvl < lvlRank; lvl++) + valIdx = valIdx * getLvlSize(lvl) + lvlCoords[lvl]; + values[valIdx] = val; + return; + } // First, wrap up pending insertion path. uint64_t diffLvl = 0; uint64_t full = 0; @@ -457,6 +470,63 @@ class SparseTensorStorage final : public SparseTensorStorageBase { return coo; } + /// Sort the unordered tensor in place, the method assumes that it is + /// an unordered COO tensor. + void sortInPlace() { + uint64_t nnz = values.size(); +#ifndef NDEBUG + for (uint64_t l = 0; l < getLvlRank(); l++) + assert(nnz == coordinates[l].size()); +#endif + + // In-place permutation. + auto applyPerm = [this](std::vector &perm) { + size_t length = perm.size(); + size_t lvlRank = getLvlRank(); + // Cache for the current level coordinates. + std::vector

lvlCrds(lvlRank); + for (size_t i = 0; i < length; i++) { + size_t current = i; + if (i != perm[current]) { + for (size_t l = 0; l < lvlRank; l++) + lvlCrds[l] = coordinates[l][i]; + V val = values[i]; + // Deals with a permutation cycle. + while (i != perm[current]) { + size_t next = perm[current]; + // Swaps the level coordinates and value. + for (size_t l = 0; l < lvlRank; l++) + coordinates[l][current] = coordinates[l][next]; + values[current] = values[next]; + perm[current] = current; + current = next; + } + for (size_t l = 0; l < lvlRank; l++) + coordinates[l][current] = lvlCrds[l]; + values[current] = val; + perm[current] = current; + } + } + }; + + std::vector sortedIdx(nnz, 0); + for (uint64_t i = 0; i < nnz; i++) + sortedIdx[i] = i; + + std::sort(sortedIdx.begin(), sortedIdx.end(), + [this](uint64_t lhs, uint64_t rhs) { + for (uint64_t l = 0; l < getLvlRank(); l++) { + if (coordinates[l][lhs] == coordinates[l][rhs]) + continue; + return coordinates[l][lhs] < coordinates[l][rhs]; + } + assert(false && "duplicate coordinates"); + return false; + }); + + applyPerm(sortedIdx); + } + private: /// Appends an arbitrary new position to `positions[lvl]`. This method /// checks that `pos` is representable in the `P` type; however, it diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index ef9d4fea68628..61522fb0dcd24 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -1060,20 +1060,12 @@ LogicalResult ConvertOp::verify() { } OpFoldResult ConvertOp::fold(FoldAdaptor adaptor) { - Type dstType = getType(); - // Fold trivial dense-to-dense convert and leave trivial sparse-to-sparse - // convert for codegen to remove. This is because we use trivial - // sparse-to-sparse convert to tell bufferization that the sparse codegen - // will expand the tensor buffer into sparse tensor storage. - if (!getSparseTensorEncoding(dstType) && dstType == getSource().getType()) + if (getType() == getSource().getType()) return getSource(); return {}; } bool ConvertOp::directConvertable() { - if (isSortCOOConvert()) - return false; - SparseTensorType srcStt = getSparseTensorType(getSource()); SparseTensorType dstStt = getSparseTensorType(getDest()); @@ -1099,15 +1091,6 @@ bool ConvertOp::directConvertable() { return false; } -bool ConvertOp::isSortCOOConvert() { - // TODO: we should instead use a different sort_coo operation to handle - // the conversion between COOs (but with different ordering). - return isUniqueCOOType(getSource().getType()) && - isUniqueCOOType(getDest().getType()) && - !getSparseTensorType(getSource()).isAllOrdered() && - getSparseTensorType(getDest()).isAllOrdered(); -} - LogicalResult ToPositionsOp::verify() { auto e = getSparseTensorEncoding(getTensor().getType()); if (failed(lvlIsInBounds(getLevel(), getTensor()))) diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp index 7569413546c0a..3ed8bba2514aa 100644 --- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp +++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp @@ -35,9 +35,9 @@ void mlir::sparse_tensor::buildSparseCompiler( pm.addPass(createSparsificationAndBufferizationPass( getBufferizationOptionsForSparsification( options.testBufferizationAnalysisOnly), - options.sparsificationOptions(), options.sparseTensorConversionOptions(), - options.createSparseDeallocs, options.enableRuntimeLibrary, - options.enableBufferInitialization, options.vectorLength, + options.sparsificationOptions(), options.createSparseDeallocs, + options.enableRuntimeLibrary, options.enableBufferInitialization, + options.vectorLength, /*enableVLAVectorization=*/options.armSVE, /*enableSIMDIndex32=*/options.force32BitVectorIndices)); if (options.testBufferizationAnalysisOnly) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 78f5562b392a6..378dd9128839d 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -680,31 +680,26 @@ class SparseDimOpConverter : public OpConversionPattern { }; // TODO: use a new SortCOO operation here instead of reusing convert op. -struct SparseSortCOOConverter : public OpConversionPattern { +struct SparseReorderCOOConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; LogicalResult - matchAndRewrite(ConvertOp op, ConvertOpAdaptor adaptor, + matchAndRewrite(ReorderCOOOp op, ReorderCOOOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - // Direct conversion should have already been lowered. - if (!op.isSortCOOConvert()) - return failure(); - Location loc = op.getLoc(); MLIRContext *ctx = op.getContext(); - SparseTensorType srcStt = getSparseTensorType(op.getSource()); - SparseTensorType dstStt = getSparseTensorType(op.getDest()); + SparseTensorType srcStt = getSparseTensorType(op.getInputCoo()); + SparseTensorType dstStt = getSparseTensorType(op.getResultCoo()); - // TODO: This should be verification rules for sort_coo operation. + // Should have been verified. assert(dstStt.isAllOrdered() && !srcStt.isAllOrdered() && isUniqueCOOType(srcStt.getRankedTensorType()) && isUniqueCOOType(dstStt.getRankedTensorType())); - assert(dstStt.hasSameDimToLvl(srcStt)); // We don't need a mutable descriptor here as we perform sorting in-place. - auto nnz = genValMemSize(rewriter, op.getLoc(), adaptor.getSource()); - auto desc = getDescriptorFromTensorTuple(adaptor.getSource()); + auto nnz = genValMemSize(rewriter, op.getLoc(), adaptor.getInputCoo()); + auto desc = getDescriptorFromTensorTuple(adaptor.getInputCoo()); auto crd = desc.getAOSMemRef(); auto val = desc.getValMemRef(); @@ -715,12 +710,11 @@ struct SparseSortCOOConverter : public OpConversionPattern { auto id = AffineMap::getMultiDimIdentityMap(srcStt.getLvlRank(), ctx); rewriter.create(loc, nnz, crd, ValueRange{val}, id, - rewriter.getIndexAttr(0), - SparseTensorSortKind::HybridQuickSort); + rewriter.getIndexAttr(0), op.getAlgorithm()); // Since we do in-place sorting, the destinate tensor will have the same set // of memrefs as the source tensor. - rewriter.replaceOp(op, adaptor.getSource()); + rewriter.replaceOp(op, adaptor.getInputCoo()); return success(); } }; @@ -1147,9 +1141,6 @@ class SparseConvertConverter : public OpConversionPattern { LogicalResult matchAndRewrite(ConvertOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - if (op.isSortCOOConvert()) - return failure(); - SparseTensorEncodingAttr encDst = getSparseTensorEncoding(op.getType()); SparseTensorEncodingAttr encSrc = getSparseTensorEncoding(op.getSource().getType()); @@ -1603,7 +1594,7 @@ void mlir::populateSparseTensorCodegenPatterns( SparseCastConverter, SparseExtractSliceConverter, SparseTensorLoadConverter, SparseExpandConverter, SparseCompressConverter, SparseInsertConverter, - SparseSortCOOConverter, + SparseReorderCOOConverter, SparseSliceGetterOpConverter, SparseSliceGetterOpConverter name{"delSparseTensorIterator", - primaryTypeFunctionSuffix(elemTp)}; - createFuncCall(builder, loc, name, {}, iter, EmitCInterface::Off); -} - -/// Generates a call that adds one element to a coordinate scheme. -/// In particular, this generates code like the following: -/// val = a[i1,..,ik]; -/// if val != 0 -/// t->add(&val, [i1,..,ik], [p1,..,pk]); -static void genAddEltCall(OpBuilder &builder, Location loc, Type eltType, - Value lvlCOO, Value valPtr, Value dimCoords, - Value dimToLvl) { - SmallString<9> name{"addElt", primaryTypeFunctionSuffix(eltType)}; - SmallVector params{lvlCOO, valPtr, dimCoords, dimToLvl}; - Type pTp = getOpaquePointerType(builder); - createFuncCall(builder, loc, name, pTp, params, EmitCInterface::On); -} - -/// Generates a call to `iter->getNext()`. If there is a next element, -/// then it is copied into the out-parameters `coords` and `elemPtr`, -/// and the return value is true. If there isn't a next element, then -/// the return value is false. -/// -/// The `coords` argument uses the same coordinate-space as the `iter` -/// (which can be either dim- or lvl-coords, depending on context). -static Value genGetNextCall(OpBuilder &builder, Location loc, Value iter, - Value coords, Value elemPtr) { - Type elemTp = cast(elemPtr.getType()).getElementType(); - SmallString<10> name{"getNext", primaryTypeFunctionSuffix(elemTp)}; - SmallVector params{iter, coords, elemPtr}; - Type i1 = builder.getI1Type(); - return createFuncCall(builder, loc, name, i1, params, EmitCInterface::On) - .getResult(0); -} - -/// Loads the value stored in `elemPtr`, and stores it at the coordinates -/// `cvs` into a dense tensor created by `allocDenseTensor`. -static void insertScalarIntoDenseTensor(OpBuilder &builder, Location loc, - Value elemPtr, Value tensor, - ValueRange cvs) { - Value elemV = builder.create(loc, elemPtr); - builder.create(loc, elemV, tensor, cvs); -} - -/// Determine if the runtime library supports direct conversion to the -/// given target `dimTypes`. -static bool canUseDirectConversion(ArrayRef dimTypes) { - bool alreadyCompressed = false; - for (const auto dlt : dimTypes) { - if (isCompressedDLT(dlt)) { - if (alreadyCompressed) - return false; // Multiple compressed dimensions not yet supported. - alreadyCompressed = true; - } else if (isDenseDLT(dlt)) { - if (alreadyCompressed) - return false; // Dense after Compressed not yet supported. - } else if (isSingletonDLT(dlt)) { - // Direct conversion doesn't have any particular problems with - // singleton after compressed. - } else { // TODO: investigate - return false; - } - } - return true; -} - //===----------------------------------------------------------------------===// // Conversion rules. //===----------------------------------------------------------------------===// @@ -540,179 +470,27 @@ class SparseTensorEmptyConverter : public OpConversionPattern { }; /// Sparse conversion rule for the convert operator. -class SparseTensorConvertConverter : public OpConversionPattern { +class SparseTensorReorderCOOConverter + : public OpConversionPattern { public: using OpConversionPattern::OpConversionPattern; - SparseTensorConvertConverter(MLIRContext *context, - SparseTensorConversionOptions o) - : OpConversionPattern(context), options(o) {} - SparseTensorConvertConverter(TypeConverter &typeConv, MLIRContext *context, - SparseTensorConversionOptions o) - : OpConversionPattern(typeConv, context), options(o) {} LogicalResult - matchAndRewrite(ConvertOp op, OpAdaptor adaptor, + matchAndRewrite(ReorderCOOOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { const Location loc = op->getLoc(); - const auto srcTp = getSparseTensorType(op.getSource()); + const auto srcTp = getSparseTensorType(op.getInputCoo()); const auto dstTp = getSparseTensorType(op); - if (!srcTp.hasEncoding() && !dstTp.hasEncoding()) - return failure(); - const Dimension dimRank = srcTp.getDimRank(); - const Type elemTp = srcTp.getElementType(); - const Value src = adaptor.getOperands()[0]; - if (srcTp.hasEncoding() && dstTp.hasEncoding()) { - const auto srcEnc = srcTp.getEncoding(); - const auto dstEnc = dstTp.getEncoding(); - // This is a sparse => sparse conversion, which is handled as follows: - // t = src->toCOO(); ; src to COO in dst order - // dst = newSparseTensor(t) - // Using the coordinate scheme as an intermediate does not always - // yield the fastest conversion but avoids the need for a full - // O(N^2) conversion matrix. - if (dstEnc == srcEnc) { - rewriter.replaceOp(op, adaptor.getOperands()); // hidden nop cast - return success(); - } - NewCallParams params(rewriter, loc); - SmallVector dimSizes = getDimSizes(rewriter, loc, srcTp, src); - bool useDirectConversion; - switch (options.sparseToSparseStrategy) { - case SparseToSparseConversionStrategy::kViaCOO: - useDirectConversion = false; - break; - case SparseToSparseConversionStrategy::kDirect: - useDirectConversion = true; - assert(canUseDirectConversion(dstEnc.getLvlTypes()) && - "Unsupported target for direct sparse-to-sparse conversion"); - break; - case SparseToSparseConversionStrategy::kAuto: - useDirectConversion = canUseDirectConversion(dstEnc.getLvlTypes()); - break; - } - if (useDirectConversion) { - rewriter.replaceOp( - op, params.genBuffers(srcTp.withEncoding(dstEnc), dimSizes) - .genNewCall(Action::kSparseToSparse, src)); - } else { // use via-COO conversion. - // Set up encoding with right mix of src and dst so that the two - // method calls can share most parameters, while still providing - // the correct sparsity information to either of them. - const auto mixedEnc = - dstEnc.withBitWidths(srcEnc.getPosWidth(), srcEnc.getCrdWidth()); - // TODO: This is the only place where `kToCOO` (or `kToIterator`) - // is called with a non-identity permutation. Is there any clean - // way to push the permutation over to the `kFromCOO` side instead? - Value coo = params.genBuffers(srcTp.withEncoding(mixedEnc), dimSizes) - .genNewCall(Action::kToCOO, src); - Value dst = params.setTemplateTypes(srcTp.withEncoding(dstEnc)) - .genNewCall(Action::kFromCOO, coo); - genDelCOOCall(rewriter, loc, elemTp, coo); - rewriter.replaceOp(op, dst); - } - return success(); - } - if (srcTp.hasEncoding() && !dstTp.hasEncoding()) { - const auto srcEnc = srcTp.getEncoding(); - // This is sparse => dense conversion, which is handled as follows: - // dst = new Tensor(0); - // iter = new SparseTensorIterator(src); - // while (elem = iter->getNext()) { - // dst[elem.coords] = elem.value; - // } - // delete iter; - // - // Fabricate a no-permutation encoding for NewCallParams - // The position/coordinate types must be those of `src`. - // The dimLevelTypes aren't actually used by Action::kToIterator. - const auto dstEnc = SparseTensorEncodingAttr::get( - op->getContext(), - SmallVector(dimRank, DimLevelType::Dense), AffineMap(), - AffineMap(), srcEnc.getPosWidth(), srcEnc.getCrdWidth()); - SmallVector dimSizes = getDimSizes(rewriter, loc, srcTp, src); - Value iter = NewCallParams(rewriter, loc) - .genBuffers(dstTp.withEncoding(dstEnc), dimSizes) - .genNewCall(Action::kToIterator, src); - const Type iTp = rewriter.getIndexType(); - Value dimCoords = genAlloca(rewriter, loc, dimRank, iTp); - Value elemPtr = genAllocaScalar(rewriter, loc, elemTp); - // TODO: Dense buffers should be allocated/deallocated via the callback - // in BufferizationOptions. - Value dst = allocDenseTensor(rewriter, loc, dstTp, dimSizes); - const SmallVector noArgs; - const SmallVector noTypes; - auto whileOp = rewriter.create(loc, noTypes, noArgs); - Block *before = rewriter.createBlock(&whileOp.getBefore(), {}, noTypes); - rewriter.setInsertionPointToEnd(before); - Value cond = genGetNextCall(rewriter, loc, iter, dimCoords, elemPtr); - rewriter.create(loc, cond, before->getArguments()); - Block *after = rewriter.createBlock(&whileOp.getAfter(), {}, noTypes); - rewriter.setInsertionPointToStart(after); - const auto dcvs = loadAll(rewriter, loc, dimRank, dimCoords); - insertScalarIntoDenseTensor(rewriter, loc, elemPtr, dst, dcvs); - rewriter.create(loc); - rewriter.setInsertionPointAfter(whileOp); - genDelIteratorCall(rewriter, loc, elemTp, iter); - rewriter.replaceOpWithNewOp( - op, dstTp.getRankedTensorType(), dst); - return success(); - } - assert(!srcTp.hasEncoding() && dstTp.hasEncoding()); - // This is a dense => sparse conversion or a sparse constant in COO => - // sparse conversion, which is handled as follows: - // t = newSparseCOO() - // ...code to fill the COO tensor t... - // s = newSparseTensor(t) - // - // To fill the COO tensor from a dense tensor: - // for i1 in dim1 - // .. - // for ik in dimk - // val = a[i1,..,ik] - // if val != 0 - // t->add(val, [i1,..,ik], [p1,..,pk]) - // - // To fill the COO tensor from a sparse constant in COO format: - // for i in range(NNZ) - // val = values[i] - // [i1,..,ik] = coordinates[i] - // t->add(val, [i1,..,ik], [p1,..,pk]) - // - // Note that the dense tensor traversal code is actually implemented - // using MLIR IR to avoid having to expose too much low-level - // memref traversal details to the runtime support library. - // Also note that the code below only generates the "new" ops and - // the loop-nest per se; whereas the entire body of the innermost - // loop is generated by genAddElt(). - SmallVector dimSizes; - sizesFromSrc(rewriter, dimSizes, loc, src); + const Value src = adaptor.getInputCoo(); + NewCallParams params(rewriter, loc); - Value coo = - params.genBuffers(dstTp, dimSizes).genNewCall(Action::kEmptyCOO); - const Type iTp = rewriter.getIndexType(); - Value dimCoords = genAlloca(rewriter, loc, dimRank, iTp); - Value dimToLvl = params.getDimToLvl(); - Value elemPtr = genAllocaScalar(rewriter, loc, elemTp); - genDenseTensorOrSparseConstantIterLoop( - rewriter, loc, src, dimRank, - [&](OpBuilder &builder, Location loc, Value val, ValueRange dcvs) { - assert(dcvs.size() == static_cast(dimRank)); - storeAll(builder, loc, dimCoords, dcvs); - builder.create(loc, val, elemPtr); - genAddEltCall(builder, loc, elemTp, coo, elemPtr, dimCoords, - dimToLvl); - }); - // Final call to construct sparse tensor storage. - Value dst = params.genNewCall(Action::kFromCOO, coo); - genDelCOOCall(rewriter, loc, elemTp, coo); - rewriter.replaceOp(op, dst); + SmallVector dimSizes = getDimSizes(rewriter, loc, srcTp, src); + rewriter.replaceOp(op, params.genBuffers(dstTp, dimSizes) + .genNewCall(Action::kSortCOOInPlace, src)); + return success(); } - -private: - /// Options to control sparse code generation. - SparseTensorConversionOptions options; }; /// Sparse conversion rule for the dealloc operator. @@ -1013,19 +791,17 @@ mlir::SparseTensorTypeToPtrConverter::SparseTensorTypeToPtrConverter() { /// Populates the given patterns list with conversion rules required for /// the sparsification of linear algebra operations. -void mlir::populateSparseTensorConversionPatterns( - TypeConverter &typeConverter, RewritePatternSet &patterns, - const SparseTensorConversionOptions &options) { - patterns.add( - typeConverter, patterns.getContext()); - patterns.add(typeConverter, - patterns.getContext(), options); +void mlir::populateSparseTensorConversionPatterns(TypeConverter &typeConverter, + RewritePatternSet &patterns) { + patterns + .add( + typeConverter, patterns.getContext()); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp index e1f88ad9c0e11..eaf15ff29dd72 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp @@ -132,9 +132,6 @@ struct SparseTensorConversionPass SparseTensorConversionPass() = default; SparseTensorConversionPass(const SparseTensorConversionPass &pass) = default; - SparseTensorConversionPass(const SparseTensorConversionOptions &options) { - sparseToSparse = static_cast(options.sparseToSparseStrategy); - } void runOnOperation() override { auto *ctx = &getContext(); @@ -187,16 +184,14 @@ struct SparseTensorConversionPass target.addLegalDialect< arith::ArithDialect, bufferization::BufferizationDialect, LLVM::LLVMDialect, memref::MemRefDialect, scf::SCFDialect>(); - // Translate strategy flags to strategy options. - SparseTensorConversionOptions options( - sparseToSparseConversionStrategy(sparseToSparse)); + // Populate with rules and apply rewriting rules. populateFunctionOpInterfaceTypeConversionPattern(patterns, converter); populateCallOpTypeConversionPattern(patterns, converter); scf::populateSCFStructuralTypeConversionsAndLegality(converter, patterns, target); - populateSparseTensorConversionPatterns(converter, patterns, options); + populateSparseTensorConversionPatterns(converter, patterns); if (failed(applyPartialConversion(getOperation(), target, std::move(patterns)))) signalPassFailure(); @@ -364,22 +359,6 @@ struct StorageSpecifierToLLVMPass } // namespace -//===----------------------------------------------------------------------===// -// Strategy flag methods. -//===----------------------------------------------------------------------===// - -SparseToSparseConversionStrategy -mlir::sparseToSparseConversionStrategy(int32_t flag) { - switch (flag) { - default: - return SparseToSparseConversionStrategy::kAuto; - case 1: - return SparseToSparseConversionStrategy::kViaCOO; - case 2: - return SparseToSparseConversionStrategy::kDirect; - } -} - //===----------------------------------------------------------------------===// // Pass creation methods. //===----------------------------------------------------------------------===// @@ -416,11 +395,6 @@ std::unique_ptr mlir::createSparseTensorConversionPass() { return std::make_unique(); } -std::unique_ptr mlir::createSparseTensorConversionPass( - const SparseTensorConversionOptions &options) { - return std::make_unique(options); -} - std::unique_ptr mlir::createSparseTensorCodegenPass() { return std::make_unique(); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp index 592852f87ba1e..f16d08b86a1a1 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -971,7 +971,10 @@ struct ConcatenateRewriter : public OpRewritePattern { Value tmpCoo = dst; Type dstCooTp = getCOOType(dstRTT, true); // TODO: this should be a sort_coo operation. - dst = rewriter.create(loc, dstCooTp, tmpCoo).getResult(); + dst = rewriter + .create(loc, dstCooTp, tmpCoo, + SparseTensorSortKind::HybridQuickSort) + .getResult(); dst = rewriter.create(loc, dstRTT, dst).getResult(); rewriter.create(loc, tmpCoo); } @@ -1028,11 +1031,8 @@ struct DirectConvertRewriter : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(ConvertOp op, PatternRewriter &rewriter) const override { - if (!op.directConvertable() && !op.isSortCOOConvert()) - return op.emitError("ConvertOp not in conanical form."); - - if (op.isSortCOOConvert()) - return failure(); + if (!op.directConvertable()) + return op.emitError("ConvertOp not staged."); // TODO: Maybe we want a different operation for this too. auto encDst = getSparseTensorEncoding(op.getType()); @@ -1338,12 +1338,8 @@ void mlir::populatePostSparsificationRewriting(RewritePatternSet &patterns, TensorReshapeRewriter>(patterns.getContext()); if (enableForeach) patterns.add(patterns.getContext()); - - if (!enableRT) { + if (enableConvert) + patterns.add(patterns.getContext()); + if (!enableRT) patterns.add(patterns.getContext()); - // TODO: Move this to a common path for both lib/codegen when libgen support - // lowering sort_coo. - if (enableConvert) - patterns.add(patterns.getContext()); - } } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp index 552a29f667693..d8a24ea3527b1 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp @@ -63,13 +63,11 @@ class SparsificationAndBufferizationPass SparsificationAndBufferizationPass( const bufferization::OneShotBufferizationOptions &bufferizationOptions, const SparsificationOptions &sparsificationOptions, - const SparseTensorConversionOptions &sparseTensorConversionOptions, bool createSparseDeallocs, bool enableRuntimeLibrary, bool enableBufferInitialization, unsigned vectorLength, bool enableVLAVectorization, bool enableSIMDIndex32) : bufferizationOptions(bufferizationOptions), sparsificationOptions(sparsificationOptions), - sparseTensorConversionOptions(sparseTensorConversionOptions), createSparseDeallocs(createSparseDeallocs), enableRuntimeLibrary(enableRuntimeLibrary), enableBufferInitialization(enableBufferInitialization), @@ -150,8 +148,7 @@ class SparsificationAndBufferizationPass vectorLength, enableVLAVectorization, enableSIMDIndex32)); } if (enableRuntimeLibrary) { - pm.addPass( - createSparseTensorConversionPass(sparseTensorConversionOptions)); + pm.addPass(createSparseTensorConversionPass()); } else { pm.addPass(createSparseTensorCodegenPass(createSparseDeallocs, enableBufferInitialization)); @@ -169,7 +166,6 @@ class SparsificationAndBufferizationPass private: bufferization::OneShotBufferizationOptions bufferizationOptions; SparsificationOptions sparsificationOptions; - SparseTensorConversionOptions sparseTensorConversionOptions; bool createSparseDeallocs; bool enableRuntimeLibrary; bool enableBufferInitialization; @@ -201,10 +197,9 @@ mlir::getBufferizationOptionsForSparsification(bool analysisOnly) { std::unique_ptr mlir::createSparsificationAndBufferizationPass() { SparsificationOptions sparseOptions; - SparseTensorConversionOptions convOptions; return createSparsificationAndBufferizationPass( getBufferizationOptionsForSparsification(/*analysisOnly=*/false), - sparseOptions, convOptions, + sparseOptions, /*createSparseDeallocs=*/false, /*enableRuntimeLibrary=*/false, /*enableBufferInitialization=*/false, @@ -216,14 +211,12 @@ std::unique_ptr mlir::createSparsificationAndBufferizationPass() { std::unique_ptr mlir::createSparsificationAndBufferizationPass( const bufferization::OneShotBufferizationOptions &bufferizationOptions, const SparsificationOptions &sparsificationOptions, - const SparseTensorConversionOptions &sparseTensorConversionOptions, bool createSparseDeallocs, bool enableRuntimeLibrary, bool enableBufferInitialization, unsigned vectorLength, bool enableVLAVectorization, bool enableSIMDIndex32) { return std::make_unique< mlir::sparse_tensor::SparsificationAndBufferizationPass>( - bufferizationOptions, sparsificationOptions, - sparseTensorConversionOptions, createSparseDeallocs, enableRuntimeLibrary, - enableBufferInitialization, vectorLength, enableVLAVectorization, - enableSIMDIndex32); + bufferizationOptions, sparsificationOptions, createSparseDeallocs, + enableRuntimeLibrary, enableBufferInitialization, vectorLength, + enableVLAVectorization, enableSIMDIndex32); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/StageSparseOperations.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/StageSparseOperations.cpp index 4ab4b05a7a420..4c163ea6e067b 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/StageSparseOperations.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/StageSparseOperations.cpp @@ -22,8 +22,7 @@ struct StageUnorderedConvert : public OpRewritePattern { PatternRewriter &rewriter) const override { // TODO: Implement it as an Interface, this can be reused from other // operations too (e.g., concatenate, reshape, etc). - - if (op.directConvertable() || op.isSortCOOConvert()) + if (op.directConvertable()) return failure(); Location loc = op.getLoc(); @@ -40,13 +39,15 @@ struct StageUnorderedConvert : public OpRewritePattern { Type srcCOOTp = getCOOFromTypeWithOrdering( dstStt.getRankedTensorType(), dstStt.getDimToLvl(), /*ordered=*/false); - Value srcCOO = rewriter.create(loc, srcCOOTp, op.getSource()); + Value srcCOO = op.getSource(); + if (srcCOO.getType() != srcCOOTp) + srcCOO = rewriter.create(loc, srcCOOTp, op.getSource()); // -> sort Type dstCOOTp = getCOOFromTypeWithOrdering( dstStt.getRankedTensorType(), dstStt.getDimToLvl(), /*ordered=*/true); - // TODO: this should be a sort_coo operation. - Value dstCOO = rewriter.create(loc, dstCOOTp, srcCOO); + Value dstCOO = rewriter.create( + loc, dstCOOTp, srcCOO, SparseTensorSortKind::HybridQuickSort); // -> dest. if (dstCOO.getType() == op.getType()) { diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index bc6d4ad2c7401..83ceecaf5a30e 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -213,6 +213,12 @@ extern "C" { dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim, \ dimRank, buffers); \ } \ + case Action::kSortCOOInPlace: { \ + assert(ptr && "Received nullptr for SparseTensorStorage object"); \ + auto &tensor = *static_cast *>(ptr); \ + tensor.sortInPlace(); \ + return ptr; \ + } \ } \ MLIR_SPARSETENSOR_FATAL("unknown action: %d\n", \ static_cast(action)); \ diff --git a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir index 1a69c80f7ecad..4dba16df39f5c 100644 --- a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --sparse-tensor-conversion --canonicalize --cse | FileCheck %s +// RUN: mlir-opt %s --stage-sparse-ops --post-sparsification-rewrite="enable-foreach=false" --canonicalize --cse | FileCheck %s #SparseVector = #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) @@ -16,187 +16,45 @@ map = (d0, d1, d2) -> (d2 : dense, d0 : compressed, d1 : compressed) }> -// CHECK-LABEL: func.func @sparse_convert_1d( -// CHECK-SAME: %[[VAL_0:.*]]: tensor) -> !llvm.ptr { -// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 4 : i32 -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 6 : i32 -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 8 : i8 -// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_8:.*]] = tensor.dim %[[VAL_0]], %[[VAL_7]] : tensor -// CHECK: %[[VAL_9:.*]] = memref.alloca() : memref<1xi8> -// CHECK: %[[VAL_10:.*]] = memref.cast %[[VAL_9]] : memref<1xi8> to memref -// CHECK: memref.store %[[VAL_6]], %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<1xi8> -// CHECK: %[[VAL_11:.*]] = memref.alloca() : memref<1xindex> -// CHECK: %[[VAL_12:.*]] = memref.cast %[[VAL_11]] : memref<1xindex> to memref -// CHECK: memref.store %[[VAL_8]], %[[VAL_11]]{{\[}}%[[VAL_7]]] : memref<1xindex> -// CHECK: %[[VAL_13:.*]] = memref.alloca() : memref<1xindex> -// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<1xindex> to memref -// CHECK: memref.store %[[VAL_7]], %[[VAL_13]]{{\[}}%[[VAL_7]]] : memref<1xindex> -// CHECK: %[[VAL_15:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[VAL_16:.*]] = call @newSparseTensor(%[[VAL_12]], %[[VAL_12]], %[[VAL_10]], %[[VAL_14]], %[[VAL_14]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_2]], %[[VAL_15]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: %[[VAL_17:.*]] = memref.alloca() : memref<1xindex> -// CHECK: %[[VAL_18:.*]] = memref.cast %[[VAL_17]] : memref<1xindex> to memref -// CHECK: %[[VAL_19:.*]] = memref.alloca() : memref -// CHECK: scf.for %[[VAL_20:.*]] = %[[VAL_7]] to %[[VAL_8]] step %[[VAL_5]] { -// CHECK: %[[VAL_21:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_20]]] : tensor -// CHECK: %[[VAL_22:.*]] = arith.cmpi ne, %[[VAL_21]], %[[VAL_4]] : i32 -// CHECK: scf.if %[[VAL_22]] { -// CHECK: memref.store %[[VAL_20]], %[[VAL_17]]{{\[}}%[[VAL_7]]] : memref<1xindex> -// CHECK: memref.store %[[VAL_21]], %[[VAL_19]][] : memref -// CHECK: %[[VAL_23:.*]] = func.call @addEltI32(%[[VAL_16]], %[[VAL_19]], %[[VAL_18]], %[[VAL_14]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr -// CHECK: } -// CHECK: } -// CHECK: %[[VAL_24:.*]] = call @newSparseTensor(%[[VAL_12]], %[[VAL_12]], %[[VAL_10]], %[[VAL_14]], %[[VAL_14]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_1]], %[[VAL_16]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: call @delSparseTensorCOOI32(%[[VAL_16]]) : (!llvm.ptr) -> () -// CHECK: return %[[VAL_24]] : !llvm.ptr -// CHECK: } +// CHECK-LABEL: func.func @sparse_convert_1d +// CHECK: sparse_tensor.foreach +// CHECK: scf.if +// CHECK: sparse_tensor.insert +// CHECK-NOT: sparse_tensor.reorder_coo +// CHECK: sparse_tensor.load func.func @sparse_convert_1d(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor } -// CHECK-LABEL: func.func @sparse_convert_complex( -// CHECK-SAME: %[[VAL_0:.*]]: tensor<100xcomplex>) -> !llvm.ptr { -// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 -// CHECK-DAG: %[[VAL_2:.*]] = complex.constant [0.000000e+00, 0.000000e+00] : complex -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 4 : i32 -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 9 : i32 -// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 100 : index -// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 8 : i8 -// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_10:.*]] = memref.alloca() : memref<1xi8> -// CHECK: %[[VAL_11:.*]] = memref.cast %[[VAL_10]] : memref<1xi8> to memref -// CHECK: memref.store %[[VAL_8]], %[[VAL_10]]{{\[}}%[[VAL_6]]] : memref<1xi8> -// CHECK: %[[VAL_12:.*]] = memref.alloca() : memref<1xindex> -// CHECK: %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<1xindex> to memref -// CHECK: memref.store %[[VAL_7]], %[[VAL_12]]{{\[}}%[[VAL_6]]] : memref<1xindex> -// CHECK: %[[VAL_14:.*]] = memref.alloca() : memref<1xindex> -// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<1xindex> to memref -// CHECK: memref.store %[[VAL_6]], %[[VAL_14]]{{\[}}%[[VAL_6]]] : memref<1xindex> -// CHECK: %[[VAL_16:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[VAL_17:.*]] = call @newSparseTensor(%[[VAL_13]], %[[VAL_13]], %[[VAL_11]], %[[VAL_15]], %[[VAL_15]], %[[VAL_5]], %[[VAL_5]], %[[VAL_4]], %[[VAL_3]], %[[VAL_16]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: %[[VAL_18:.*]] = memref.alloca() : memref<1xindex> -// CHECK: %[[VAL_19:.*]] = memref.cast %[[VAL_18]] : memref<1xindex> to memref -// CHECK: %[[VAL_20:.*]] = memref.alloca() : memref> -// CHECK: scf.for %[[VAL_21:.*]] = %[[VAL_6]] to %[[VAL_7]] step %[[VAL_9]] { -// CHECK: %[[VAL_22:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_21]]] : tensor<100xcomplex> -// CHECK: %[[VAL_23:.*]] = complex.neq %[[VAL_22]], %[[VAL_2]] : complex -// CHECK: scf.if %[[VAL_23]] { -// CHECK: memref.store %[[VAL_21]], %[[VAL_18]]{{\[}}%[[VAL_6]]] : memref<1xindex> -// CHECK: memref.store %[[VAL_22]], %[[VAL_20]][] : memref> -// CHECK: %[[VAL_24:.*]] = func.call @addEltC64(%[[VAL_17]], %[[VAL_20]], %[[VAL_19]], %[[VAL_15]]) : (!llvm.ptr, memref>, memref, memref) -> !llvm.ptr -// CHECK: } -// CHECK: } -// CHECK: %[[VAL_25:.*]] = call @newSparseTensor(%[[VAL_13]], %[[VAL_13]], %[[VAL_11]], %[[VAL_15]], %[[VAL_15]], %[[VAL_5]], %[[VAL_5]], %[[VAL_4]], %[[VAL_1]], %[[VAL_17]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: call @delSparseTensorCOOC64(%[[VAL_17]]) : (!llvm.ptr) -> () -// CHECK: return %[[VAL_25]] : !llvm.ptr -// CHECK: } +// CHECK-LABEL: func.func @sparse_convert_complex +// CHECK: sparse_tensor.foreach +// CHECK: scf.if +// CHECK: sparse_tensor.insert +// CHECK-NOT: sparse_tensor.reorder_coo +// CHECK: sparse_tensor.load func.func @sparse_convert_complex(%arg0: tensor<100xcomplex>) -> tensor<100xcomplex, #SparseVector> { %0 = sparse_tensor.convert %arg0 : tensor<100xcomplex> to tensor<100xcomplex, #SparseVector> return %0 : tensor<100xcomplex, #SparseVector> } -// CHECK-LABEL: func.func @sparse_convert_2d( -// CHECK-SAME: %[[VAL_0:.*]]: tensor<2x4xf64>) -> !llvm.ptr { -// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 4 : i32 -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1 : i32 -// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[VAL_10:.*]] = arith.constant 4 : i8 -// CHECK-DAG: %[[VAL_11:.*]] = arith.constant 8 : i8 -// CHECK: %[[VAL_12:.*]] = memref.alloca() : memref<2xi8> -// CHECK: %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<2xi8> to memref -// CHECK: memref.store %[[VAL_10]], %[[VAL_12]]{{\[}}%[[VAL_6]]] : memref<2xi8> -// CHECK: memref.store %[[VAL_11]], %[[VAL_12]]{{\[}}%[[VAL_8]]] : memref<2xi8> -// CHECK: %[[VAL_14:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_7]], %[[VAL_14]]{{\[}}%[[VAL_6]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_9]], %[[VAL_14]]{{\[}}%[[VAL_8]]] : memref<2xindex> -// CHECK: %[[VAL_16:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_17:.*]] = memref.cast %[[VAL_16]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_6]], %[[VAL_16]]{{\[}}%[[VAL_6]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_8]], %[[VAL_16]]{{\[}}%[[VAL_8]]] : memref<2xindex> -// CHECK: %[[VAL_18:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[VAL_19:.*]] = call @newSparseTensor(%[[VAL_15]], %[[VAL_15]], %[[VAL_13]], %[[VAL_17]], %[[VAL_17]], %[[VAL_5]], %[[VAL_5]], %[[VAL_4]], %[[VAL_3]], %[[VAL_18]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: %[[VAL_20:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_21:.*]] = memref.cast %[[VAL_20]] : memref<2xindex> to memref -// CHECK: %[[VAL_22:.*]] = memref.alloca() : memref -// CHECK: scf.for %[[VAL_23:.*]] = %[[VAL_6]] to %[[VAL_7]] step %[[VAL_8]] { -// CHECK: scf.for %[[VAL_24:.*]] = %[[VAL_6]] to %[[VAL_9]] step %[[VAL_8]] { -// CHECK: %[[VAL_25:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_23]], %[[VAL_24]]] : tensor<2x4xf64> -// CHECK: %[[VAL_26:.*]] = arith.cmpf une, %[[VAL_25]], %[[VAL_2]] : f64 -// CHECK: scf.if %[[VAL_26]] { -// CHECK: memref.store %[[VAL_23]], %[[VAL_20]]{{\[}}%[[VAL_6]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_24]], %[[VAL_20]]{{\[}}%[[VAL_8]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_25]], %[[VAL_22]][] : memref -// CHECK: %[[VAL_27:.*]] = func.call @addEltF64(%[[VAL_19]], %[[VAL_22]], %[[VAL_21]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr -// CHECK: } -// CHECK: } -// CHECK: } -// CHECK: %[[VAL_28:.*]] = call @newSparseTensor(%[[VAL_15]], %[[VAL_15]], %[[VAL_13]], %[[VAL_17]], %[[VAL_17]], %[[VAL_5]], %[[VAL_5]], %[[VAL_4]], %[[VAL_1]], %[[VAL_19]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: call @delSparseTensorCOOF64(%[[VAL_19]]) : (!llvm.ptr) -> () -// CHECK: return %[[VAL_28]] : !llvm.ptr -// CHECK: } +// CHECK-LABEL: func.func @sparse_convert_2d +// CHECK: sparse_tensor.foreach +// CHECK: scf.if +// CHECK: sparse_tensor.insert +// CHECK-NOT: sparse_tensor.reorder_coo +// CHECK: sparse_tensor.load func.func @sparse_convert_2d(%arg0: tensor<2x4xf64>) -> tensor<2x4xf64, #CSR> { %0 = sparse_tensor.convert %arg0 : tensor<2x4xf64> to tensor<2x4xf64, #CSR> return %0 : tensor<2x4xf64, #CSR> } -// CHECK-LABEL: func.func @sparse_constant() -> !llvm.ptr { -// CHECK-DAG: %[[VAL_0:.*]] = arith.constant dense<[1.000000e+00, 5.000000e+00]> : tensor<2xf32> -// CHECK-DAG: %[[VAL_1:.*]] = arith.constant dense<{{\[\[}}0, 0], [1, 6]]> : tensor<2x2xi64> -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 4 : i32 -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 2 : i32 -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 8 : index -// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 7 : index -// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 4 : i8 -// CHECK-DAG: %[[VAL_10:.*]] = arith.constant 8 : i8 -// CHECK-DAG: %[[VAL_11:.*]] = arith.constant 2 : index -// CHECK: %[[VAL_12:.*]] = memref.alloca() : memref<2xi8> -// CHECK: %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<2xi8> to memref -// CHECK: memref.store %[[VAL_9]], %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref<2xi8> -// CHECK: memref.store %[[VAL_10]], %[[VAL_12]]{{\[}}%[[VAL_7]]] : memref<2xi8> -// CHECK: %[[VAL_14:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_6]], %[[VAL_14]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_8]], %[[VAL_14]]{{\[}}%[[VAL_7]]] : memref<2xindex> -// CHECK: %[[VAL_16:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_17:.*]] = memref.cast %[[VAL_16]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_5]], %[[VAL_16]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_7]], %[[VAL_16]]{{\[}}%[[VAL_7]]] : memref<2xindex> -// CHECK: %[[VAL_18:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[VAL_19:.*]] = call @newSparseTensor(%[[VAL_15]], %[[VAL_15]], %[[VAL_13]], %[[VAL_17]], %[[VAL_17]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_2]], %[[VAL_18]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: %[[VAL_20:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_21:.*]] = memref.cast %[[VAL_20]] : memref<2xindex> to memref -// CHECK: %[[VAL_22:.*]] = memref.alloca() : memref -// CHECK: scf.for %[[VAL_23:.*]] = %[[VAL_5]] to %[[VAL_11]] step %[[VAL_7]] { -// CHECK: %[[VAL_24:.*]] = tensor.extract %[[VAL_1]]{{\[}}%[[VAL_23]], %[[VAL_5]]] : tensor<2x2xi64> -// CHECK: %[[VAL_25:.*]] = arith.index_cast %[[VAL_24]] : i64 to index -// CHECK: %[[VAL_26:.*]] = tensor.extract %[[VAL_1]]{{\[}}%[[VAL_23]], %[[VAL_7]]] : tensor<2x2xi64> -// CHECK: %[[VAL_27:.*]] = arith.index_cast %[[VAL_26]] : i64 to index -// CHECK: %[[VAL_28:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_23]]] : tensor<2xf32> -// CHECK: memref.store %[[VAL_25]], %[[VAL_20]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_27]], %[[VAL_20]]{{\[}}%[[VAL_7]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_28]], %[[VAL_22]][] : memref -// CHECK: %[[VAL_29:.*]] = func.call @addEltF32(%[[VAL_19]], %[[VAL_22]], %[[VAL_21]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr -// CHECK: } -// CHECK: %[[VAL_30:.*]] = call @newSparseTensor(%[[VAL_15]], %[[VAL_15]], %[[VAL_13]], %[[VAL_17]], %[[VAL_17]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_3]], %[[VAL_19]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: call @delSparseTensorCOOF32(%[[VAL_19]]) : (!llvm.ptr) -> () -// CHECK: return %[[VAL_30]] : !llvm.ptr -// CHECK: } +// CHECK-LABEL: func.func @sparse_constant +// CHECK: sparse_tensor.foreach +// CHECK-NOT: scf.if +// CHECK: sparse_tensor.insert +// CHECK-NOT: sparse_tensor.reorder_coo +// CHECK: sparse_tensor.load func.func @sparse_constant() -> tensor<8x7xf32, #CSR>{ // Initialize a tensor. %0 = arith.constant sparse<[[0, 0], [1, 6]], [1.0, 5.0]> : tensor<8x7xf32> @@ -205,59 +63,12 @@ func.func @sparse_constant() -> tensor<8x7xf32, #CSR>{ return %1 : tensor<8x7xf32, #CSR> } -// CHECK-LABEL: func.func @sparse_constant_csc() -> !llvm.ptr { -// CHECK-DAG: %[[VAL_0:.*]] = arith.constant dense<[1.000000e+00, 5.000000e+00]> : tensor<2xf32> -// CHECK-DAG: %[[VAL_1:.*]] = arith.constant dense<{{\[\[}}0, 0], [1, 6]]> : tensor<2x2xi64> -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 4 : i32 -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 2 : i32 -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 8 : index -// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 7 : index -// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 4 : i8 -// CHECK-DAG: %[[VAL_10:.*]] = arith.constant 8 : i8 -// CHECK-DAG: %[[VAL_11:.*]] = arith.constant 2 : index -// CHECK: %[[VAL_12:.*]] = memref.alloca() : memref<2xi8> -// CHECK: %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<2xi8> to memref -// CHECK: memref.store %[[VAL_9]], %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref<2xi8> -// CHECK: memref.store %[[VAL_10]], %[[VAL_12]]{{\[}}%[[VAL_7]]] : memref<2xi8> -// CHECK: %[[VAL_14:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_6]], %[[VAL_14]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_8]], %[[VAL_14]]{{\[}}%[[VAL_7]]] : memref<2xindex> -// CHECK: %[[VAL_16:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_17:.*]] = memref.cast %[[VAL_16]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_7]], %[[VAL_16]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_5]], %[[VAL_16]]{{\[}}%[[VAL_7]]] : memref<2xindex> -// CHECK: %[[VAL_18:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_19:.*]] = memref.cast %[[VAL_18]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_7]], %[[VAL_18]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_5]], %[[VAL_18]]{{\[}}%[[VAL_7]]] : memref<2xindex> -// CHECK: %[[VAL_20:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_21:.*]] = memref.cast %[[VAL_20]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_8]], %[[VAL_20]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_6]], %[[VAL_20]]{{\[}}%[[VAL_7]]] : memref<2xindex> -// CHECK: %[[VAL_22:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[VAL_23:.*]] = call @newSparseTensor(%[[VAL_15]], %[[VAL_21]], %[[VAL_13]], %[[VAL_17]], %[[VAL_19]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_2]], %[[VAL_22]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: %[[VAL_24:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_25:.*]] = memref.cast %[[VAL_24]] : memref<2xindex> to memref -// CHECK: %[[VAL_26:.*]] = memref.alloca() : memref -// CHECK: scf.for %[[VAL_27:.*]] = %[[VAL_5]] to %[[VAL_11]] step %[[VAL_7]] { -// CHECK: %[[VAL_28:.*]] = tensor.extract %[[VAL_1]]{{\[}}%[[VAL_27]], %[[VAL_5]]] : tensor<2x2xi64> -// CHECK: %[[VAL_29:.*]] = arith.index_cast %[[VAL_28]] : i64 to index -// CHECK: %[[VAL_30:.*]] = tensor.extract %[[VAL_1]]{{\[}}%[[VAL_27]], %[[VAL_7]]] : tensor<2x2xi64> -// CHECK: %[[VAL_31:.*]] = arith.index_cast %[[VAL_30]] : i64 to index -// CHECK: %[[VAL_32:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_27]]] : tensor<2xf32> -// CHECK: memref.store %[[VAL_29]], %[[VAL_24]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_31]], %[[VAL_24]]{{\[}}%[[VAL_7]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_32]], %[[VAL_26]][] : memref -// CHECK: %[[VAL_33:.*]] = func.call @addEltF32(%[[VAL_23]], %[[VAL_26]], %[[VAL_25]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr -// CHECK: } -// CHECK: %[[VAL_34:.*]] = call @newSparseTensor(%[[VAL_15]], %[[VAL_21]], %[[VAL_13]], %[[VAL_17]], %[[VAL_19]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_3]], %[[VAL_23]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: call @delSparseTensorCOOF32(%[[VAL_23]]) : (!llvm.ptr) -> () -// CHECK: return %[[VAL_34]] : !llvm.ptr -// CHECK: } +// CHECK-LABEL: func.func @sparse_constant_csc +// CHECK: sparse_tensor.foreach +// CHECK-NOT: scf.if +// CHECK: sparse_tensor.insert +// CHECK-NOT: sparse_tensor.reorder_coo +// CHECK: sparse_tensor.load func.func @sparse_constant_csc() -> tensor<8x7xf32, #CSC>{ // Initialize a tensor. %0 = arith.constant sparse<[[0, 0], [1, 6]], [1.0, 5.0]> : tensor<8x7xf32> @@ -266,73 +77,15 @@ func.func @sparse_constant_csc() -> tensor<8x7xf32, #CSC>{ return %1 : tensor<8x7xf32, #CSC> } -// CHECK-LABEL: func.func @sparse_convert_3d( -// CHECK-SAME: %[[VAL_0:.*]]: tensor) -> !llvm.ptr { -// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 4 : i32 -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1 : i32 -// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 8 : i8 -// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 4 : i8 -// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_10:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_11:.*]] = tensor.dim %[[VAL_0]], %[[VAL_10]] : tensor -// CHECK: %[[VAL_12:.*]] = tensor.dim %[[VAL_0]], %[[VAL_9]] : tensor -// CHECK: %[[VAL_13:.*]] = tensor.dim %[[VAL_0]], %[[VAL_8]] : tensor -// CHECK: %[[VAL_14:.*]] = memref.alloca() : memref<3xi8> -// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<3xi8> to memref -// CHECK: memref.store %[[VAL_7]], %[[VAL_14]]{{\[}}%[[VAL_10]]] : memref<3xi8> -// CHECK: memref.store %[[VAL_6]], %[[VAL_14]]{{\[}}%[[VAL_9]]] : memref<3xi8> -// CHECK: memref.store %[[VAL_6]], %[[VAL_14]]{{\[}}%[[VAL_8]]] : memref<3xi8> -// CHECK: %[[VAL_16:.*]] = memref.alloca() : memref<3xindex> -// CHECK: %[[VAL_17:.*]] = memref.cast %[[VAL_16]] : memref<3xindex> to memref -// CHECK: memref.store %[[VAL_11]], %[[VAL_16]]{{\[}}%[[VAL_10]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_12]], %[[VAL_16]]{{\[}}%[[VAL_9]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_13]], %[[VAL_16]]{{\[}}%[[VAL_8]]] : memref<3xindex> -// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_8]]] : memref<3xindex> -// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_10]]] : memref<3xindex> -// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_9]]] : memref<3xindex> -// CHECK: %[[VAL_21:.*]] = memref.alloca() : memref<3xindex> -// CHECK: %[[VAL_22:.*]] = memref.cast %[[VAL_21]] : memref<3xindex> to memref -// CHECK: memref.store %[[VAL_9]], %[[VAL_21]]{{\[}}%[[VAL_10]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_8]], %[[VAL_21]]{{\[}}%[[VAL_9]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_10]], %[[VAL_21]]{{\[}}%[[VAL_8]]] : memref<3xindex> -// CHECK: %[[VAL_23:.*]] = memref.alloca() : memref<3xindex> -// CHECK: %[[VAL_24:.*]] = memref.cast %[[VAL_23]] : memref<3xindex> to memref -// CHECK: memref.store %[[VAL_8]], %[[VAL_23]]{{\[}}%[[VAL_10]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_10]], %[[VAL_23]]{{\[}}%[[VAL_9]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_9]], %[[VAL_23]]{{\[}}%[[VAL_8]]] : memref<3xindex> -// CHECK: %[[VAL_25:.*]] = memref.alloca() : memref<3xindex> -// CHECK: %[[VAL_26:.*]] = memref.cast %[[VAL_25]] : memref<3xindex> to memref -// CHECK: memref.store %[[VAL_18]], %[[VAL_25]]{{\[}}%[[VAL_10]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_19]], %[[VAL_25]]{{\[}}%[[VAL_9]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_20]], %[[VAL_25]]{{\[}}%[[VAL_8]]] : memref<3xindex> -// CHECK: %[[VAL_27:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[VAL_28:.*]] = call @newSparseTensor(%[[VAL_17]], %[[VAL_26]], %[[VAL_15]], %[[VAL_22]], %[[VAL_24]], %[[VAL_5]], %[[VAL_5]], %[[VAL_4]], %[[VAL_3]], %[[VAL_27]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: %[[VAL_29:.*]] = memref.alloca() : memref<3xindex> -// CHECK: %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<3xindex> to memref -// CHECK: %[[VAL_31:.*]] = memref.alloca() : memref -// CHECK: scf.for %[[VAL_32:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_9]] { -// CHECK: scf.for %[[VAL_33:.*]] = %[[VAL_10]] to %[[VAL_12]] step %[[VAL_9]] { -// CHECK: scf.for %[[VAL_34:.*]] = %[[VAL_10]] to %[[VAL_13]] step %[[VAL_9]] { -// CHECK: %[[VAL_35:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_32]], %[[VAL_33]], %[[VAL_34]]] : tensor -// CHECK: %[[VAL_36:.*]] = arith.cmpf une, %[[VAL_35]], %[[VAL_2]] : f64 -// CHECK: scf.if %[[VAL_36]] { -// CHECK: memref.store %[[VAL_32]], %[[VAL_29]]{{\[}}%[[VAL_10]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_33]], %[[VAL_29]]{{\[}}%[[VAL_9]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_34]], %[[VAL_29]]{{\[}}%[[VAL_8]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_35]], %[[VAL_31]][] : memref -// CHECK: %[[VAL_37:.*]] = func.call @addEltF64(%[[VAL_28]], %[[VAL_31]], %[[VAL_30]], %[[VAL_22]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr -// CHECK: } -// CHECK: } -// CHECK: } -// CHECK: } -// CHECK: %[[VAL_38:.*]] = call @newSparseTensor(%[[VAL_17]], %[[VAL_26]], %[[VAL_15]], %[[VAL_22]], %[[VAL_24]], %[[VAL_5]], %[[VAL_5]], %[[VAL_4]], %[[VAL_1]], %[[VAL_28]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: call @delSparseTensorCOOF64(%[[VAL_28]]) : (!llvm.ptr) -> () -// CHECK: return %[[VAL_38]] : !llvm.ptr -// CHECK: } +// CHECK-LABEL: func.func @sparse_convert_3d +// CHECK: sparse_tensor.foreach +// CHECK: scf.if +// CHECK: sparse_tensor.insert +// CHECK: sparse_tensor.load +// CHECK: sparse_tensor.reorder_coo +// CHECK: sparse_tensor.foreach +// CHECK: sparse_tensor.insert +// CHECK: sparse_tensor.load func.func @sparse_convert_3d(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir index ffc0f57a23110..c22f051a0d585 100644 --- a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --sparse-tensor-conversion --canonicalize --cse | FileCheck %s +// RUN: mlir-opt %s --stage-sparse-ops --post-sparsification-rewrite="enable-foreach=false" --canonicalize --cse | FileCheck %s #SparseVector = #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) @@ -12,326 +12,85 @@ map = (d0, d1, d2) -> (d2 : dense, d0 : compressed, d1 : compressed) }> -// CHECK-LABEL: func.func @sparse_convert_1d( -// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) -> tensor<13xi32> { -// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 6 : i32 -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 13 : index -// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 4 : i8 -// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<1xi8> -// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<1xi8> to memref -// CHECK: memref.store %[[VAL_5]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<1xi8> -// CHECK: %[[VAL_8:.*]] = memref.alloca() : memref<1xindex> -// CHECK: %[[VAL_9:.*]] = memref.cast %[[VAL_8]] : memref<1xindex> to memref -// CHECK: memref.store %[[VAL_4]], %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<1xindex> -// CHECK: %[[VAL_10:.*]] = memref.alloca() : memref<1xindex> -// CHECK: %[[VAL_11:.*]] = memref.cast %[[VAL_10]] : memref<1xindex> to memref -// CHECK: memref.store %[[VAL_3]], %[[VAL_10]]{{\[}}%[[VAL_3]]] : memref<1xindex> -// CHECK: %[[VAL_12:.*]] = call @newSparseTensor(%[[VAL_9]], %[[VAL_9]], %[[VAL_7]], %[[VAL_11]], %[[VAL_11]], %[[VAL_2]], %[[VAL_2]], %[[VAL_1]], %[[VAL_1]], %[[VAL_0]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: %[[VAL_13:.*]] = memref.alloca() : memref<1xindex> -// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<1xindex> to memref -// CHECK: %[[VAL_15:.*]] = memref.alloca() : memref -// CHECK: %[[VAL_16:.*]] = memref.alloc() : memref<13xi32> -// CHECK: linalg.fill ins(%[[VAL_2]] : i32) outs(%[[VAL_16]] : memref<13xi32>) -// CHECK: scf.while : () -> () { -// CHECK: %[[VAL_17:.*]] = func.call @getNextI32(%[[VAL_12]], %[[VAL_14]], %[[VAL_15]]) : (!llvm.ptr, memref, memref) -> i1 -// CHECK: scf.condition(%[[VAL_17]]) -// CHECK: } do { -// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_3]]] : memref<1xindex> -// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_15]][] : memref -// CHECK: memref.store %[[VAL_19]], %[[VAL_16]]{{\[}}%[[VAL_18]]] : memref<13xi32> -// CHECK: scf.yield -// CHECK: } -// CHECK: call @delSparseTensorIteratorI32(%[[VAL_12]]) : (!llvm.ptr) -> () -// CHECK: %[[VAL_20:.*]] = bufferization.to_tensor %[[VAL_16]] : memref<13xi32> -// CHECK: return %[[VAL_20]] : tensor<13xi32> -// CHECK: } +// CHECK-LABEL: func.func @sparse_convert_1d +// CHECK-NOT: sparse_tensor.reorder_coo +// CHECK: memref.alloc +// CHECK: linalg.fill +// CHECK: sparse_tensor.foreach +// CHECK: memref.store +// CHECK: bufferization.to_tensor func.func @sparse_convert_1d(%arg0: tensor<13xi32, #SparseVector>) -> tensor<13xi32> { %0 = sparse_tensor.convert %arg0 : tensor<13xi32, #SparseVector> to tensor<13xi32> return %0 : tensor<13xi32> } -// CHECK-LABEL: func.func @sparse_convert_1d_dyn( -// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) -> tensor { -// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 6 : i32 -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 4 : i8 -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_5:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_4]]) : (!llvm.ptr, index) -> index -// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<1xi8> -// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<1xi8> to memref -// CHECK: memref.store %[[VAL_3]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<1xi8> -// CHECK: %[[VAL_8:.*]] = memref.alloca() : memref<1xindex> -// CHECK: %[[VAL_9:.*]] = memref.cast %[[VAL_8]] : memref<1xindex> to memref -// CHECK: memref.store %[[VAL_5]], %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<1xindex> -// CHECK: %[[VAL_10:.*]] = memref.alloca() : memref<1xindex> -// CHECK: %[[VAL_11:.*]] = memref.cast %[[VAL_10]] : memref<1xindex> to memref -// CHECK: memref.store %[[VAL_4]], %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<1xindex> -// CHECK: %[[VAL_12:.*]] = call @newSparseTensor(%[[VAL_9]], %[[VAL_9]], %[[VAL_7]], %[[VAL_11]], %[[VAL_11]], %[[VAL_2]], %[[VAL_2]], %[[VAL_1]], %[[VAL_1]], %[[VAL_0]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: %[[VAL_13:.*]] = memref.alloca() : memref<1xindex> -// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<1xindex> to memref -// CHECK: %[[VAL_15:.*]] = memref.alloca() : memref -// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_5]]) : memref -// CHECK: linalg.fill ins(%[[VAL_2]] : i32) outs(%[[VAL_16]] : memref) -// CHECK: scf.while : () -> () { -// CHECK: %[[VAL_17:.*]] = func.call @getNextI32(%[[VAL_12]], %[[VAL_14]], %[[VAL_15]]) : (!llvm.ptr, memref, memref) -> i1 -// CHECK: scf.condition(%[[VAL_17]]) -// CHECK: } do { -// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_4]]] : memref<1xindex> -// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_15]][] : memref -// CHECK: memref.store %[[VAL_19]], %[[VAL_16]]{{\[}}%[[VAL_18]]] : memref -// CHECK: scf.yield -// CHECK: } -// CHECK: call @delSparseTensorIteratorI32(%[[VAL_12]]) : (!llvm.ptr) -> () -// CHECK: %[[VAL_20:.*]] = bufferization.to_tensor %[[VAL_16]] : memref -// CHECK: return %[[VAL_20]] : tensor -// CHECK: } +// CHECK-LABEL: func.func @sparse_convert_1d_dyn +// CHECK-NOT: sparse_tensor.reorder_coo +// CHECK: memref.alloc +// CHECK: linalg.fill +// CHECK: sparse_tensor.foreach +// CHECK: memref.store +// CHECK: bufferization.to_tensor func.func @sparse_convert_1d_dyn(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor } -// CHECK-LABEL: func.func @sparse_convert_2d( -// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) -> tensor<2x4xf64> { -// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 6 : i32 -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : i32 -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 4 : i8 -// CHECK: %[[VAL_10:.*]] = memref.alloca() : memref<2xi8> -// CHECK: %[[VAL_11:.*]] = memref.cast %[[VAL_10]] : memref<2xi8> to memref -// CHECK: memref.store %[[VAL_9]], %[[VAL_10]]{{\[}}%[[VAL_6]]] : memref<2xi8> -// CHECK: memref.store %[[VAL_9]], %[[VAL_10]]{{\[}}%[[VAL_5]]] : memref<2xi8> -// CHECK: %[[VAL_12:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_7]], %[[VAL_12]]{{\[}}%[[VAL_6]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_8]], %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: %[[VAL_14:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_6]], %[[VAL_14]]{{\[}}%[[VAL_6]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_5]], %[[VAL_14]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: %[[VAL_16:.*]] = call @newSparseTensor(%[[VAL_13]], %[[VAL_13]], %[[VAL_11]], %[[VAL_15]], %[[VAL_15]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_2]], %[[VAL_0]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: %[[VAL_17:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_18:.*]] = memref.cast %[[VAL_17]] : memref<2xindex> to memref -// CHECK: %[[VAL_19:.*]] = memref.alloca() : memref -// CHECK: %[[VAL_20:.*]] = memref.alloc() : memref<2x4xf64> -// CHECK: linalg.fill ins(%[[VAL_1]] : f64) outs(%[[VAL_20]] : memref<2x4xf64>) -// CHECK: scf.while : () -> () { -// CHECK: %[[VAL_21:.*]] = func.call @getNextF64(%[[VAL_16]], %[[VAL_18]], %[[VAL_19]]) : (!llvm.ptr, memref, memref) -> i1 -// CHECK: scf.condition(%[[VAL_21]]) -// CHECK: } do { -// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_6]]] : memref<2xindex> -// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: %[[VAL_24:.*]] = memref.load %[[VAL_19]][] : memref -// CHECK: memref.store %[[VAL_24]], %[[VAL_20]]{{\[}}%[[VAL_22]], %[[VAL_23]]] : memref<2x4xf64> -// CHECK: scf.yield -// CHECK: } -// CHECK: call @delSparseTensorIteratorF64(%[[VAL_16]]) : (!llvm.ptr) -> () -// CHECK: %[[VAL_25:.*]] = bufferization.to_tensor %[[VAL_20]] : memref<2x4xf64> -// CHECK: return %[[VAL_25]] : tensor<2x4xf64> -// CHECK: } +// CHECK-LABEL: func.func @sparse_convert_2d +// CHECK-NOT: sparse_tensor.reorder_coo +// CHECK: memref.alloc +// CHECK: linalg.fill +// CHECK: sparse_tensor.foreach +// CHECK: memref.store +// CHECK: bufferization.to_tensor func.func @sparse_convert_2d(%arg0: tensor<2x4xf64, #SparseMatrix>) -> tensor<2x4xf64> { %0 = sparse_tensor.convert %arg0 : tensor<2x4xf64, #SparseMatrix> to tensor<2x4xf64> return %0 : tensor<2x4xf64> } -// CHECK-LABEL: func.func @sparse_convert_2d_dyn0( -// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) -> tensor { -// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 6 : i32 -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : i32 -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 4 : i8 -// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_9:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_8]]) : (!llvm.ptr, index) -> index -// CHECK: %[[VAL_10:.*]] = memref.alloca() : memref<2xi8> -// CHECK: %[[VAL_11:.*]] = memref.cast %[[VAL_10]] : memref<2xi8> to memref -// CHECK: memref.store %[[VAL_6]], %[[VAL_10]]{{\[}}%[[VAL_8]]] : memref<2xi8> -// CHECK: memref.store %[[VAL_6]], %[[VAL_10]]{{\[}}%[[VAL_5]]] : memref<2xi8> -// CHECK: %[[VAL_12:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_9]], %[[VAL_12]]{{\[}}%[[VAL_8]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_7]], %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: %[[VAL_14:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_8]], %[[VAL_14]]{{\[}}%[[VAL_8]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_5]], %[[VAL_14]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: %[[VAL_16:.*]] = call @newSparseTensor(%[[VAL_13]], %[[VAL_13]], %[[VAL_11]], %[[VAL_15]], %[[VAL_15]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_2]], %[[VAL_0]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: %[[VAL_17:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_18:.*]] = memref.cast %[[VAL_17]] : memref<2xindex> to memref -// CHECK: %[[VAL_19:.*]] = memref.alloca() : memref -// CHECK: %[[VAL_20:.*]] = memref.alloc(%[[VAL_9]]) : memref -// CHECK: linalg.fill ins(%[[VAL_1]] : f64) outs(%[[VAL_20]] : memref) -// CHECK: scf.while : () -> () { -// CHECK: %[[VAL_21:.*]] = func.call @getNextF64(%[[VAL_16]], %[[VAL_18]], %[[VAL_19]]) : (!llvm.ptr, memref, memref) -> i1 -// CHECK: scf.condition(%[[VAL_21]]) -// CHECK: } do { -// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_8]]] : memref<2xindex> -// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: %[[VAL_24:.*]] = memref.load %[[VAL_19]][] : memref -// CHECK: memref.store %[[VAL_24]], %[[VAL_20]]{{\[}}%[[VAL_22]], %[[VAL_23]]] : memref -// CHECK: scf.yield -// CHECK: } -// CHECK: call @delSparseTensorIteratorF64(%[[VAL_16]]) : (!llvm.ptr) -> () -// CHECK: %[[VAL_25:.*]] = bufferization.to_tensor %[[VAL_20]] : memref -// CHECK: return %[[VAL_25]] : tensor -// CHECK: } +// CHECK-LABEL: func.func @sparse_convert_2d_dyn +// CHECK-NOT: sparse_tensor.reorder_coo +// CHECK: memref.alloc +// CHECK: linalg.fill +// CHECK: sparse_tensor.foreach +// CHECK: memref.store +// CHECK: bufferization.to_tensor func.func @sparse_convert_2d_dyn0(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor } -// CHECK-LABEL: func.func @sparse_convert_2d_dyn1( -// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) -> tensor<2x?xf64> { -// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 6 : i32 -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : i32 -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 4 : i8 -// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_9:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_8]]) : (!llvm.ptr, index) -> index -// CHECK: %[[VAL_10:.*]] = memref.alloca() : memref<2xi8> -// CHECK: %[[VAL_11:.*]] = memref.cast %[[VAL_10]] : memref<2xi8> to memref -// CHECK: memref.store %[[VAL_6]], %[[VAL_10]]{{\[}}%[[VAL_5]]] : memref<2xi8> -// CHECK: memref.store %[[VAL_6]], %[[VAL_10]]{{\[}}%[[VAL_8]]] : memref<2xi8> -// CHECK: %[[VAL_12:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_7]], %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_9]], %[[VAL_12]]{{\[}}%[[VAL_8]]] : memref<2xindex> -// CHECK: %[[VAL_14:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_5]], %[[VAL_14]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_8]], %[[VAL_14]]{{\[}}%[[VAL_8]]] : memref<2xindex> -// CHECK: %[[VAL_16:.*]] = call @newSparseTensor(%[[VAL_13]], %[[VAL_13]], %[[VAL_11]], %[[VAL_15]], %[[VAL_15]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_2]], %[[VAL_0]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: %[[VAL_17:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_18:.*]] = memref.cast %[[VAL_17]] : memref<2xindex> to memref -// CHECK: %[[VAL_19:.*]] = memref.alloca() : memref -// CHECK: %[[VAL_20:.*]] = memref.alloc(%[[VAL_9]]) : memref<2x?xf64> -// CHECK: linalg.fill ins(%[[VAL_1]] : f64) outs(%[[VAL_20]] : memref<2x?xf64>) -// CHECK: scf.while : () -> () { -// CHECK: %[[VAL_21:.*]] = func.call @getNextF64(%[[VAL_16]], %[[VAL_18]], %[[VAL_19]]) : (!llvm.ptr, memref, memref) -> i1 -// CHECK: scf.condition(%[[VAL_21]]) -// CHECK: } do { -// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_8]]] : memref<2xindex> -// CHECK: %[[VAL_24:.*]] = memref.load %[[VAL_19]][] : memref -// CHECK: memref.store %[[VAL_24]], %[[VAL_20]]{{\[}}%[[VAL_22]], %[[VAL_23]]] : memref<2x?xf64> -// CHECK: scf.yield -// CHECK: } -// CHECK: call @delSparseTensorIteratorF64(%[[VAL_16]]) : (!llvm.ptr) -> () -// CHECK: %[[VAL_25:.*]] = bufferization.to_tensor %[[VAL_20]] : memref<2x?xf64> -// CHECK: return %[[VAL_25]] : tensor<2x?xf64> -// CHECK: } +// CHECK-LABEL: func.func @sparse_convert_2d_dyn1 +// CHECK-NOT: sparse_tensor.reorder_coo +// CHECK: memref.alloc +// CHECK: linalg.fill +// CHECK: sparse_tensor.foreach +// CHECK: memref.store +// CHECK: bufferization.to_tensor func.func @sparse_convert_2d_dyn1(%arg0: tensor<2x?xf64, #SparseMatrix>) -> tensor<2x?xf64> { %0 = sparse_tensor.convert %arg0 : tensor<2x?xf64, #SparseMatrix> to tensor<2x?xf64> return %0 : tensor<2x?xf64> } -// CHECK-LABEL: func.func @sparse_convert_2d_dyn2( -// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) -> tensor { -// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 6 : i32 -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : i32 -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 4 : i8 -// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_8:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_7]]) : (!llvm.ptr, index) -> index -// CHECK: %[[VAL_9:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr, index) -> index -// CHECK: %[[VAL_10:.*]] = memref.alloca() : memref<2xi8> -// CHECK: %[[VAL_11:.*]] = memref.cast %[[VAL_10]] : memref<2xi8> to memref -// CHECK: memref.store %[[VAL_5]], %[[VAL_10]]{{\[}}%[[VAL_7]]] : memref<2xi8> -// CHECK: memref.store %[[VAL_5]], %[[VAL_10]]{{\[}}%[[VAL_6]]] : memref<2xi8> -// CHECK: %[[VAL_12:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_8]], %[[VAL_12]]{{\[}}%[[VAL_7]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_9]], %[[VAL_12]]{{\[}}%[[VAL_6]]] : memref<2xindex> -// CHECK: %[[VAL_14:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_7]], %[[VAL_14]]{{\[}}%[[VAL_7]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_6]], %[[VAL_14]]{{\[}}%[[VAL_6]]] : memref<2xindex> -// CHECK: %[[VAL_16:.*]] = call @newSparseTensor(%[[VAL_13]], %[[VAL_13]], %[[VAL_11]], %[[VAL_15]], %[[VAL_15]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_2]], %[[VAL_0]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: %[[VAL_17:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_18:.*]] = memref.cast %[[VAL_17]] : memref<2xindex> to memref -// CHECK: %[[VAL_19:.*]] = memref.alloca() : memref -// CHECK: %[[VAL_20:.*]] = memref.alloc(%[[VAL_8]], %[[VAL_9]]) : memref -// CHECK: linalg.fill ins(%[[VAL_1]] : f64) outs(%[[VAL_20]] : memref) -// CHECK: scf.while : () -> () { -// CHECK: %[[VAL_21:.*]] = func.call @getNextF64(%[[VAL_16]], %[[VAL_18]], %[[VAL_19]]) : (!llvm.ptr, memref, memref) -> i1 -// CHECK: scf.condition(%[[VAL_21]]) -// CHECK: } do { -// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_7]]] : memref<2xindex> -// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_6]]] : memref<2xindex> -// CHECK: %[[VAL_24:.*]] = memref.load %[[VAL_19]][] : memref -// CHECK: memref.store %[[VAL_24]], %[[VAL_20]]{{\[}}%[[VAL_22]], %[[VAL_23]]] : memref -// CHECK: scf.yield -// CHECK: } -// CHECK: call @delSparseTensorIteratorF64(%[[VAL_16]]) : (!llvm.ptr) -> () -// CHECK: %[[VAL_25:.*]] = bufferization.to_tensor %[[VAL_20]] : memref -// CHECK: return %[[VAL_25]] : tensor -// CHECK: } +// CHECK-LABEL: func.func @sparse_convert_2d_dyn2 +// CHECK-NOT: sparse_tensor.reorder_coo +// CHECK: memref.alloc +// CHECK: linalg.fill +// CHECK: sparse_tensor.foreach +// CHECK: memref.store +// CHECK: bufferization.to_tensor func.func @sparse_convert_2d_dyn2(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor } -// CHECK-LABEL: func.func @sparse_convert_3d( -// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) -> tensor<2x3x4xf64> { -// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 6 : i32 -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : i32 -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 3 : index -// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[VAL_10:.*]] = arith.constant 4 : i8 -// CHECK: %[[VAL_11:.*]] = memref.alloca() : memref<3xi8> -// CHECK: %[[VAL_12:.*]] = memref.cast %[[VAL_11]] : memref<3xi8> to memref -// CHECK: memref.store %[[VAL_10]], %[[VAL_11]]{{\[}}%[[VAL_6]]] : memref<3xi8> -// CHECK: memref.store %[[VAL_10]], %[[VAL_11]]{{\[}}%[[VAL_5]]] : memref<3xi8> -// CHECK: memref.store %[[VAL_10]], %[[VAL_11]]{{\[}}%[[VAL_7]]] : memref<3xi8> -// CHECK: %[[VAL_13:.*]] = memref.alloca() : memref<3xindex> -// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<3xindex> to memref -// CHECK: memref.store %[[VAL_7]], %[[VAL_13]]{{\[}}%[[VAL_6]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_8]], %[[VAL_13]]{{\[}}%[[VAL_5]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_9]], %[[VAL_13]]{{\[}}%[[VAL_7]]] : memref<3xindex> -// CHECK: %[[VAL_15:.*]] = memref.alloca() : memref<3xindex> -// CHECK: %[[VAL_16:.*]] = memref.cast %[[VAL_15]] : memref<3xindex> to memref -// CHECK: memref.store %[[VAL_6]], %[[VAL_15]]{{\[}}%[[VAL_6]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_5]], %[[VAL_15]]{{\[}}%[[VAL_5]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_7]], %[[VAL_15]]{{\[}}%[[VAL_7]]] : memref<3xindex> -// CHECK: %[[VAL_17:.*]] = call @newSparseTensor(%[[VAL_14]], %[[VAL_14]], %[[VAL_12]], %[[VAL_16]], %[[VAL_16]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_2]], %[[VAL_0]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: %[[VAL_18:.*]] = memref.alloca() : memref<3xindex> -// CHECK: %[[VAL_19:.*]] = memref.cast %[[VAL_18]] : memref<3xindex> to memref -// CHECK: %[[VAL_20:.*]] = memref.alloca() : memref -// CHECK: %[[VAL_21:.*]] = memref.alloc() : memref<2x3x4xf64> -// CHECK: linalg.fill ins(%[[VAL_1]] : f64) outs(%[[VAL_21]] : memref<2x3x4xf64>) -// CHECK: scf.while : () -> () { -// CHECK: %[[VAL_22:.*]] = func.call @getNextF64(%[[VAL_17]], %[[VAL_19]], %[[VAL_20]]) : (!llvm.ptr, memref, memref) -> i1 -// CHECK: scf.condition(%[[VAL_22]]) -// CHECK: } do { -// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_18]]{{\[}}%[[VAL_6]]] : memref<3xindex> -// CHECK: %[[VAL_24:.*]] = memref.load %[[VAL_18]]{{\[}}%[[VAL_5]]] : memref<3xindex> -// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_18]]{{\[}}%[[VAL_7]]] : memref<3xindex> -// CHECK: %[[VAL_26:.*]] = memref.load %[[VAL_20]][] : memref -// CHECK: memref.store %[[VAL_26]], %[[VAL_21]]{{\[}}%[[VAL_23]], %[[VAL_24]], %[[VAL_25]]] : memref<2x3x4xf64> -// CHECK: scf.yield -// CHECK: } -// CHECK: call @delSparseTensorIteratorF64(%[[VAL_17]]) : (!llvm.ptr) -> () -// CHECK: %[[VAL_27:.*]] = bufferization.to_tensor %[[VAL_21]] : memref<2x3x4xf64> -// CHECK: return %[[VAL_27]] : tensor<2x3x4xf64> -// CHECK: } +// CHECK-LABEL: func.func @sparse_convert_3d +// CHECK-NOT: sparse_tensor.reorder_coo +// CHECK: memref.alloc +// CHECK: linalg.fill +// CHECK: sparse_tensor.foreach +// CHECK: memref.store +// CHECK: bufferization.to_tensor func.func @sparse_convert_3d(%arg0: tensor<2x3x4xf64, #SparseTensor>) -> tensor<2x3x4xf64> { %0 = sparse_tensor.convert %arg0 : tensor<2x3x4xf64, #SparseTensor> to tensor<2x3x4xf64> return %0 : tensor<2x3x4xf64> diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir index e8e69dc861015..658e8aa40022e 100644 --- a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --sparse-tensor-conversion --canonicalize --cse | FileCheck %s +// RUN: mlir-opt %s --stage-sparse-ops --post-sparsification-rewrite="enable-foreach=false" --canonicalize --cse | FileCheck %s #SparseVector64 = #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), @@ -33,185 +33,56 @@ map = (d0 : #sparse_tensor, d1 : #sparse_tensor) -> (d0 : compressed(nonunique), d1 : singleton) }> -// CHECK-LABEL: func.func @sparse_nop_convert( -// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) -> !llvm.ptr { -// CHECK: return %[[VAL_0]] : !llvm.ptr -// CHECK: } +// CHECK-LABEL: func.func @sparse_nop_convert +// CHECK-NEXT: return func.func @sparse_nop_convert(%arg0: tensor<64xf32, #SparseVector>) -> tensor<64xf32, #SparseVector> { %0 = sparse_tensor.convert %arg0 : tensor<64xf32, #SparseVector> to tensor<64xf32, #SparseVector> return %0 : tensor<64xf32, #SparseVector> } -// CHECK-LABEL: func.func @sparse_hidden_nop_cast( -// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) -> !llvm.ptr { -// CHECK: return %[[VAL_0]] : !llvm.ptr -// CHECK: } +// CHECK-LABEL: func.func @sparse_hidden_nop_cast +// TODO: The following convert should be a cast instead. +// CHECK: sparse_tensor.convert +// CHECK: return func.func @sparse_hidden_nop_cast(%arg0: tensor<32xf32, #SparseVector>) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor<32xf32, #SparseVector> to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_convert_1d_ss( -// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) -> !llvm.ptr { -// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 3 : i32 -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 8 : i8 -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_5:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_4]]) : (!llvm.ptr, index) -> index -// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<1xi8> -// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<1xi8> to memref -// CHECK: memref.store %[[VAL_3]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<1xi8> -// CHECK: %[[VAL_8:.*]] = memref.alloca() : memref<1xindex> -// CHECK: %[[VAL_9:.*]] = memref.cast %[[VAL_8]] : memref<1xindex> to memref -// CHECK: memref.store %[[VAL_5]], %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<1xindex> -// CHECK: %[[VAL_10:.*]] = memref.alloca() : memref<1xindex> -// CHECK: %[[VAL_11:.*]] = memref.cast %[[VAL_10]] : memref<1xindex> to memref -// CHECK: memref.store %[[VAL_4]], %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<1xindex> -// CHECK: %[[VAL_12:.*]] = call @newSparseTensor(%[[VAL_9]], %[[VAL_9]], %[[VAL_7]], %[[VAL_11]], %[[VAL_11]], %[[VAL_2]], %[[VAL_2]], %[[VAL_2]], %[[VAL_1]], %[[VAL_0]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: return %[[VAL_12]] : !llvm.ptr -// CHECK: } +// TODO: libgen path need to support efficient format conversion (e.g., 32 bit pos -> 64 bit pos). +// Maybe we should use a different operator as well to be clear. func.func @sparse_convert_1d_ss(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_convert( -// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) -> !llvm.ptr { -// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 3 : i32 -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 8 : i8 -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_5:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_4]]) : (!llvm.ptr, index) -> index -// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<1xi8> -// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<1xi8> to memref -// CHECK: memref.store %[[VAL_3]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<1xi8> -// CHECK: %[[VAL_8:.*]] = memref.alloca() : memref<1xindex> -// CHECK: %[[VAL_9:.*]] = memref.cast %[[VAL_8]] : memref<1xindex> to memref -// CHECK: memref.store %[[VAL_5]], %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<1xindex> -// CHECK: %[[VAL_10:.*]] = memref.alloca() : memref<1xindex> -// CHECK: %[[VAL_11:.*]] = memref.cast %[[VAL_10]] : memref<1xindex> to memref -// CHECK: memref.store %[[VAL_4]], %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<1xindex> -// CHECK: %[[VAL_12:.*]] = call @newSparseTensor(%[[VAL_9]], %[[VAL_9]], %[[VAL_7]], %[[VAL_11]], %[[VAL_11]], %[[VAL_2]], %[[VAL_2]], %[[VAL_2]], %[[VAL_1]], %[[VAL_0]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: return %[[VAL_12]] : !llvm.ptr -// CHECK: } +// TODO: libgen path need to support efficient format conversion (e.g., 32 bit pos -> 64 bit pos). +// Maybe we should use a different operator as well to be clear. func.func @sparse_convert(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor } -#SparseSingleton64 = #sparse_tensor.encoding<{ - map = (d0) -> (d0 : singleton), - posWidth = 64, - crdWidth = 64 -}> - -#SparseSingleton32 = #sparse_tensor.encoding<{ - map = (d0) -> (d0 : singleton), - posWidth = 32, - crdWidth = 32 -}> - -// -// CHECK-LABEL: func.func @sparse_convert_singleton( -// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) -> !llvm.ptr { -// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 3 : i32 -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 16 : i8 -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_5:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_4]]) : (!llvm.ptr, index) -> index -// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<1xi8> -// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<1xi8> to memref -// CHECK: memref.store %[[VAL_3]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<1xi8> -// CHECK: %[[VAL_8:.*]] = memref.alloca() : memref<1xindex> -// CHECK: %[[VAL_9:.*]] = memref.cast %[[VAL_8]] : memref<1xindex> to memref -// CHECK: memref.store %[[VAL_5]], %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<1xindex> -// CHECK: %[[VAL_10:.*]] = memref.alloca() : memref<1xindex> -// CHECK: %[[VAL_11:.*]] = memref.cast %[[VAL_10]] : memref<1xindex> to memref -// CHECK: memref.store %[[VAL_4]], %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<1xindex> -// CHECK: %[[VAL_12:.*]] = call @newSparseTensor(%[[VAL_9]], %[[VAL_9]], %[[VAL_7]], %[[VAL_11]], %[[VAL_11]], %[[VAL_2]], %[[VAL_2]], %[[VAL_2]], %[[VAL_1]], %[[VAL_0]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: return %[[VAL_12]] : !llvm.ptr -// CHECK: } -func.func @sparse_convert_singleton(%arg0: tensor) -> tensor { - %0 = sparse_tensor.convert %arg0 : tensor to tensor - return %0 : tensor -} - -// CHECK-LABEL: func.func @sparse_convert_permuted( -// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) -> !llvm.ptr { -// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 5 : i32 -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 8 : i8 -// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_8:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_7]]) : (!llvm.ptr, index) -> index -// CHECK: %[[VAL_9:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr, index) -> index -// CHECK: %[[VAL_10:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> index -// CHECK: %[[VAL_11:.*]] = memref.alloca() : memref<3xi8> -// CHECK: %[[VAL_12:.*]] = memref.cast %[[VAL_11]] : memref<3xi8> to memref -// CHECK: memref.store %[[VAL_4]], %[[VAL_11]]{{\[}}%[[VAL_7]]] : memref<3xi8> -// CHECK: memref.store %[[VAL_4]], %[[VAL_11]]{{\[}}%[[VAL_6]]] : memref<3xi8> -// CHECK: memref.store %[[VAL_4]], %[[VAL_11]]{{\[}}%[[VAL_5]]] : memref<3xi8> -// CHECK: %[[VAL_13:.*]] = memref.alloca() : memref<3xindex> -// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<3xindex> to memref -// CHECK: memref.store %[[VAL_8]], %[[VAL_13]]{{\[}}%[[VAL_7]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_9]], %[[VAL_13]]{{\[}}%[[VAL_6]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_10]], %[[VAL_13]]{{\[}}%[[VAL_5]]] : memref<3xindex> -// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_5]]] : memref<3xindex> -// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_7]]] : memref<3xindex> -// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_6]]] : memref<3xindex> -// CHECK: %[[VAL_18:.*]] = memref.alloca() : memref<3xindex> -// CHECK: %[[VAL_19:.*]] = memref.cast %[[VAL_18]] : memref<3xindex> to memref -// CHECK: memref.store %[[VAL_6]], %[[VAL_18]]{{\[}}%[[VAL_7]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_5]], %[[VAL_18]]{{\[}}%[[VAL_6]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_7]], %[[VAL_18]]{{\[}}%[[VAL_5]]] : memref<3xindex> -// CHECK: %[[VAL_20:.*]] = memref.alloca() : memref<3xindex> -// CHECK: %[[VAL_21:.*]] = memref.cast %[[VAL_20]] : memref<3xindex> to memref -// CHECK: memref.store %[[VAL_5]], %[[VAL_20]]{{\[}}%[[VAL_7]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_7]], %[[VAL_20]]{{\[}}%[[VAL_6]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_6]], %[[VAL_20]]{{\[}}%[[VAL_5]]] : memref<3xindex> -// CHECK: %[[VAL_22:.*]] = memref.alloca() : memref<3xindex> -// CHECK: %[[VAL_23:.*]] = memref.cast %[[VAL_22]] : memref<3xindex> to memref -// CHECK: memref.store %[[VAL_15]], %[[VAL_22]]{{\[}}%[[VAL_7]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_16]], %[[VAL_22]]{{\[}}%[[VAL_6]]] : memref<3xindex> -// CHECK: memref.store %[[VAL_17]], %[[VAL_22]]{{\[}}%[[VAL_5]]] : memref<3xindex> -// CHECK: %[[VAL_24:.*]] = call @newSparseTensor(%[[VAL_14]], %[[VAL_23]], %[[VAL_12]], %[[VAL_19]], %[[VAL_21]], %[[VAL_3]], %[[VAL_3]], %[[VAL_2]], %[[VAL_1]], %[[VAL_0]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: %[[VAL_25:.*]] = call @newSparseTensor(%[[VAL_14]], %[[VAL_23]], %[[VAL_12]], %[[VAL_19]], %[[VAL_21]], %[[VAL_3]], %[[VAL_3]], %[[VAL_2]], %[[VAL_2]], %[[VAL_24]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: call @delSparseTensorCOOF32(%[[VAL_24]]) : (!llvm.ptr) -> () -// CHECK: return %[[VAL_25]] : !llvm.ptr -// CHECK: } +// CHECK-LABEL: func.func @sparse_convert_permuted +// CHECK: sparse_tensor.foreach +// CHECK: sparse_tensor.insert +// CHECK: sparse_tensor.load +// CHECK: sparse_tensor.reorder_coo +// CHECK: sparse_tensor.foreach +// CHECK: sparse_tensor.insert +// CHECK: sparse_tensor.load func.func @sparse_convert_permuted(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor } -// CHECK-LABEL: func.func @sparse_convert_slice( -// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) -> !llvm.ptr { -// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 3 : i32 -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 6 : i32 -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 13 : index -// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 9 : i8 -// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 16 : i8 -// CHECK: %[[VAL_10:.*]] = memref.alloca() : memref<2xi8> -// CHECK: %[[VAL_11:.*]] = memref.cast %[[VAL_10]] : memref<2xi8> to memref -// CHECK: memref.store %[[VAL_8]], %[[VAL_10]]{{\[}}%[[VAL_5]]] : memref<2xi8> -// CHECK: memref.store %[[VAL_9]], %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<2xi8> -// CHECK: %[[VAL_12:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_6]], %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_7]], %[[VAL_12]]{{\[}}%[[VAL_4]]] : memref<2xindex> -// CHECK: %[[VAL_14:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_5]], %[[VAL_14]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_4]], %[[VAL_14]]{{\[}}%[[VAL_4]]] : memref<2xindex> -// CHECK: %[[VAL_16:.*]] = call @newSparseTensor(%[[VAL_13]], %[[VAL_13]], %[[VAL_11]], %[[VAL_15]], %[[VAL_15]], %[[VAL_3]], %[[VAL_3]], %[[VAL_2]], %[[VAL_1]], %[[VAL_0]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr -// CHECK: return %[[VAL_16]] : !llvm.ptr -// CHECK: } +// CHECK-LABEL: func.func @sparse_convert_slice +// CHECK: sparse_tensor.foreach +// CHECK: sparse_tensor.insert +// CHECK: sparse_tensor.load +// CHECK-NOT: sparse_tensor.reorder_coo func.func @sparse_convert_slice(%arg0: tensor<2x13xi32, #COOSlice>) -> (tensor<2x13xi32, #SortedCOO2D>) { %0 = sparse_tensor.convert %arg0 : tensor<2x13xi32, #COOSlice> to tensor<2x13xi32, #SortedCOO2D> return %0 : tensor<2x13xi32, #SortedCOO2D> diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir index 2fb4529e5695e..bdfab54dc6dae 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir @@ -5,8 +5,7 @@ #DCSR = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed, d1 : compressed)}> -#DENSE = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : dense, d1 : dense)}> -#DENSE_P = #sparse_tensor.encoding<{map = (d0, d1) -> (d1 : dense, d0 : dense)}> + // CHECK-LABEL: @concat_sparse_sparse( // CHECK-SAME: %[[TMP_arg0:.*]]: tensor<2x4xf64, #sparse_tensor // CHECK-SAME: %[[TMP_arg1:.*]]: tensor<3x4xf64, #sparse_tensor @@ -258,173 +257,3 @@ func.func @concat_sparse_sparse_dense(%arg0: tensor<2x4xf64, #DCSR>, tensor<4x4xf64, #DCSR> to tensor return %0 : tensor } - -// CHECK-LABEL: @concat_sparse_sparse_annotated_dense( -// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<2x4xf64, #sparse_tensor -// CHECK-SAME: %[[TMP_arg1:.*]]: tensor<3x4xf64, #sparse_tensor -// CHECK-SAME: %[[TMP_arg2:.*]]: tensor<4x4xf64, #sparse_tensor -// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[TMP_c5:.*]] = arith.constant 5 : index -// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[TMP_c9:.*]] = arith.constant 9 : index -// CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index -// CHECK: %[[TMP_0:.*]] = bufferization.alloc_tensor(%[[TMP_c9]], %[[TMP_c4]]) : tensor> to memref -// CHECK: %[[DIM_0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: memref.store %[[TMP_c9]], %[[DIM_0]][%[[TMP_c0]]] : memref<2xindex> -// CHECK: memref.store %[[TMP_c4]], %[[DIM_0]][%[[TMP_c1]]] : memref<2xindex> -// CHECK: %[[VAL_1:.*]] = memref.reshape %[[VAL_0]](%[[DIM_0]]) : (memref, memref<2xindex>) -> memref -// CHECK: %[[TMP_1:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_2:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_3:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_4:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_1]][%[[TMP_c0]]] : memref -// CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_1]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] -// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_2]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_3]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index -// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_3]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] -// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_5]][%[[TMP_arg4]]] : memref -// CHECK: memref.store %[[TMP_28]], %[[VAL_1]][%[[TMP_23]], %[[TMP_27]]] : memref -// CHECK: } -// CHECK: } -// CHECK: %[[TMP_8:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_9:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_10:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_11:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref -// CHECK: %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] -// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_9]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_10]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index -// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_10]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] -// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_12]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index -// CHECK: memref.store %[[TMP_28]], %[[VAL_1]][%[[TMP_29]], %[[TMP_27]]] : memref -// CHECK: } -// CHECK: } -// CHECK: %[[TMP_15:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_16:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_17:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_18:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_20:.*]] = memref.load %[[TMP_15]][%[[TMP_c0]]] : memref -// CHECK: %[[TMP_21:.*]] = memref.load %[[TMP_15]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] -// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_16]][%[[TMP_arg3]]] : memref -// CHECK: %[[TMP_25:.*]] = memref.load %[[TMP_17]][%[[TMP_arg3]]] : memref -// CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index -// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_17]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] -// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_19]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c5]] : index -// CHECK: memref.store %[[TMP_28]], %[[VAL_1]][%[[TMP_29]], %[[TMP_27]]] : memref -// CHECK: } -// CHECK: } -// CHECK: %[[R:.*]] = sparse_tensor.convert %[[TMP_0]] -// CHECK: return %[[R]] : tensor> -func.func @concat_sparse_sparse_annotated_dense(%arg0: tensor<2x4xf64, #DCSR>, - %arg1: tensor<3x4xf64, #DCSR>, - %arg2: tensor<4x4xf64, #DCSR>) - -> tensor { - %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index} - : tensor<2x4xf64, #DCSR>, - tensor<3x4xf64, #DCSR>, - tensor<4x4xf64, #DCSR> to tensor - return %0 : tensor -} - -// CHECK-LABEL: @concat_sparse_sparse_annotated_dense_permute( -// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<2x4xf64, #sparse_tensor -// CHECK-SAME: %[[TMP_arg1:.*]]: tensor<3x4xf64, #sparse_tensor -// CHECK-SAME: %[[TMP_arg2:.*]]: tensor<4x4xf64, #sparse_tensor -// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[TMP_c5:.*]] = arith.constant 5 : index -// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[TMP_c9:.*]] = arith.constant 9 : index -// CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index -// CHECK: %[[TMP_0:.*]] = bufferization.alloc_tensor(%[[TMP_c9]], %[[TMP_c4]]) : tensor -// CHECK: memref.store %[[TMP_c4]], %[[DIM_0]][%[[TMP_c0]]] : memref<2xindex> -// CHECK: memref.store %[[TMP_c9]], %[[DIM_0]][%[[TMP_c1]]] : memref<2xindex> -// CHECK: %[[VAL_1:.*]] = memref.reshape %[[VAL_0]](%[[DIM_0]]) : (memref, memref<2xindex>) -> memref -// CHECK: %[[TMP_1:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_2:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_3:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_4:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_1]][%[[TMP_c0]]] : memref -// CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_1]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] -// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_2]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_3]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index -// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_3]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] -// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_5]][%[[TMP_arg4]]] : memref -// CHECK: memref.store %[[TMP_28]], %[[VAL_1]][%[[TMP_27]], %[[TMP_23]]] : memref -// CHECK: } -// CHECK: } -// CHECK: %[[TMP_8:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_9:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_10:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_11:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref -// CHECK: %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] -// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_9]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_10]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index -// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_10]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] -// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_12]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index -// CHECK: memref.store %[[TMP_28]], %[[VAL_1]][%[[TMP_27]], %[[TMP_29]]] : memref -// CHECK: } -// CHECK: } -// CHECK: %[[TMP_15:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_16:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_17:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_18:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_20:.*]] = memref.load %[[TMP_15]][%[[TMP_c0]]] : memref -// CHECK: %[[TMP_21:.*]] = memref.load %[[TMP_15]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] -// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_16]][%[[TMP_arg3]]] : memref -// CHECK: %[[TMP_25:.*]] = memref.load %[[TMP_17]][%[[TMP_arg3]]] : memref -// CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index -// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_17]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] -// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_19]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c5]] : index -// CHECK: memref.store %[[TMP_28]], %[[VAL_1]][%[[TMP_27]], %[[TMP_29]]] : memref -// CHECK: } -// CHECK: } -// CHECK: %[[R:.*]] = sparse_tensor.convert %[[TMP_0]] -// CHECK: return %[[R]] : tensor> -func.func @concat_sparse_sparse_annotated_dense_permute(%arg0: tensor<2x4xf64, #DCSR>, - %arg1: tensor<3x4xf64, #DCSR>, - %arg2: tensor<4x4xf64, #DCSR>) - -> tensor { - %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index} - : tensor<2x4xf64, #DCSR>, - tensor<3x4xf64, #DCSR>, - tensor<4x4xf64, #DCSR> to tensor - return %0 : tensor -} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_element.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_element.mlir index 16eaca7663aaf..a28f9057ae974 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_element.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_element.mlir @@ -17,12 +17,12 @@ // DEFINE: %{env} = //-------------------------------------------------------------------------------------------------- -// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false s2s-strategy=2 +// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false // RUN: %{compile} | %{run} | FileCheck %s // // Do the same run, but now with vectorization. -// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false s2s-strategy=2 vl=2 reassociate-fp-reductions=true enable-index-optimizations=true +// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true // RUN: %{compile} | %{run} | FileCheck %s // // Do the same run, but now with VLA vectorization. diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2sparse.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2sparse.mlir index 1c74b6827d980..c151a8c902f31 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2sparse.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2sparse.mlir @@ -17,15 +17,15 @@ // DEFINE: %{env} = //-------------------------------------------------------------------------------------------------- -// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=true s2s-strategy=2 +// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=true // RUN: %{compile} | %{run} | FileCheck %s // // Do the same run, but now with direct IR generation. -// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false s2s-strategy=2 +// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false // RUN: %{compile} | %{run} | FileCheck %s // // Do the same run, but now with direct IR generation and vectorization. -// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false s2s-strategy=2 vl=2 reassociate-fp-reductions=true enable-index-optimizations=true +// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true // RUN: %{compile} | %{run} | FileCheck %s // // Do the same run, but now with direct IR generation and VLA vectorization. @@ -49,13 +49,7 @@ }> #SingletonTensor1 = #sparse_tensor.encoding<{ - map = (d0, d1, d2) -> (d0 : dense, d1 : compressed, d2 : singleton) - -}> - -// This also checks the compressed->dense conversion (when there are zeros). -#SingletonTensor2 = #sparse_tensor.encoding<{ - map = (d0, d1, d2) -> (d0 : dense, d1 : dense, d2 : singleton) + map = (d0, d1, d2) -> (d0 : dense, d1 : compressed(nonunique), d2 : singleton) }> @@ -97,44 +91,34 @@ module { // Convert dense tensor directly to various sparse tensors. // %s1 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor1> - %s2 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor2> %s3 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor3> // // Convert sparse tensor directly to another sparse format. // %t13 = sparse_tensor.convert %s1 : tensor<2x3x4xf64, #Tensor1> to tensor<2x3x4xf64, #Tensor3> - %t21 = sparse_tensor.convert %s2 : tensor<2x3x4xf64, #Tensor2> to tensor<2x3x4xf64, #Tensor1> - %t23 = sparse_tensor.convert %s2 : tensor<2x3x4xf64, #Tensor2> to tensor<2x3x4xf64, #Tensor3> %t31 = sparse_tensor.convert %s3 : tensor<2x3x4xf64, #Tensor3> to tensor<2x3x4xf64, #Tensor1> // // Convert sparse tensor back to dense. // %d13 = sparse_tensor.convert %t13 : tensor<2x3x4xf64, #Tensor3> to tensor<2x3x4xf64> - %d21 = sparse_tensor.convert %t21 : tensor<2x3x4xf64, #Tensor1> to tensor<2x3x4xf64> - %d23 = sparse_tensor.convert %t23 : tensor<2x3x4xf64, #Tensor3> to tensor<2x3x4xf64> %d31 = sparse_tensor.convert %t31 : tensor<2x3x4xf64, #Tensor1> to tensor<2x3x4xf64> // // Check round-trip equality. And release dense tensors. // - // CHECK-COUNT-5: ( ( ( 1, 2, 3, 4 ), ( 5, 6, 7, 8 ), ( 9, 10, 11, 12 ) ), ( ( 13, 14, 15, 16 ), ( 17, 18, 19, 20 ), ( 21, 22, 23, 24 ) ) ) + // CHECK-COUNT-3: ( ( ( 1, 2, 3, 4 ), ( 5, 6, 7, 8 ), ( 9, 10, 11, 12 ) ), ( ( 13, 14, 15, 16 ), ( 17, 18, 19, 20 ), ( 21, 22, 23, 24 ) ) ) call @dump(%src) : (tensor<2x3x4xf64>) -> () call @dump(%d13) : (tensor<2x3x4xf64>) -> () - call @dump(%d21) : (tensor<2x3x4xf64>) -> () - call @dump(%d23) : (tensor<2x3x4xf64>) -> () call @dump(%d31) : (tensor<2x3x4xf64>) -> () // // Release sparse tensors. // bufferization.dealloc_tensor %t13 : tensor<2x3x4xf64, #Tensor3> - bufferization.dealloc_tensor %t21 : tensor<2x3x4xf64, #Tensor1> - bufferization.dealloc_tensor %t23 : tensor<2x3x4xf64, #Tensor3> bufferization.dealloc_tensor %t31 : tensor<2x3x4xf64, #Tensor1> bufferization.dealloc_tensor %s1 : tensor<2x3x4xf64, #Tensor1> - bufferization.dealloc_tensor %s2 : tensor<2x3x4xf64, #Tensor2> bufferization.dealloc_tensor %s3 : tensor<2x3x4xf64, #Tensor3> return @@ -160,52 +144,34 @@ module { // Convert dense tensor directly to various sparse tensors. // %s1 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x3x4xf64, #SingletonTensor1> - %s2 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x3x4xf64, #SingletonTensor2> %s3 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x3x4xf64, #SingletonTensor3> // // Convert sparse tensor directly to another sparse format. // - %t12 = sparse_tensor.convert %s1 : tensor<2x3x4xf64, #SingletonTensor1> to tensor<2x3x4xf64, #SingletonTensor2> %t13 = sparse_tensor.convert %s1 : tensor<2x3x4xf64, #SingletonTensor1> to tensor<2x3x4xf64, #SingletonTensor3> - %t21 = sparse_tensor.convert %s2 : tensor<2x3x4xf64, #SingletonTensor2> to tensor<2x3x4xf64, #SingletonTensor1> - %t23 = sparse_tensor.convert %s2 : tensor<2x3x4xf64, #SingletonTensor2> to tensor<2x3x4xf64, #SingletonTensor3> %t31 = sparse_tensor.convert %s3 : tensor<2x3x4xf64, #SingletonTensor3> to tensor<2x3x4xf64, #SingletonTensor1> - %t32 = sparse_tensor.convert %s3 : tensor<2x3x4xf64, #SingletonTensor3> to tensor<2x3x4xf64, #SingletonTensor2> // // Convert sparse tensor back to dense. // - %d12 = sparse_tensor.convert %t12 : tensor<2x3x4xf64, #SingletonTensor2> to tensor<2x3x4xf64> %d13 = sparse_tensor.convert %t13 : tensor<2x3x4xf64, #SingletonTensor3> to tensor<2x3x4xf64> - %d21 = sparse_tensor.convert %t21 : tensor<2x3x4xf64, #SingletonTensor1> to tensor<2x3x4xf64> - %d23 = sparse_tensor.convert %t23 : tensor<2x3x4xf64, #SingletonTensor3> to tensor<2x3x4xf64> %d31 = sparse_tensor.convert %t31 : tensor<2x3x4xf64, #SingletonTensor1> to tensor<2x3x4xf64> - %d32 = sparse_tensor.convert %t32 : tensor<2x3x4xf64, #SingletonTensor2> to tensor<2x3x4xf64> // // Check round-trip equality. And release dense tensors. // - // CHECK-COUNT-7: ( ( ( 1, 0, 0, 0 ), ( 0, 6, 0, 0 ), ( 0, 0, 11, 0 ) ), ( ( 0, 14, 0, 0 ), ( 0, 0, 0, 20 ), ( 21, 0, 0, 0 ) ) ) + // CHECK-COUNT-3: ( ( ( 1, 0, 0, 0 ), ( 0, 6, 0, 0 ), ( 0, 0, 11, 0 ) ), ( ( 0, 14, 0, 0 ), ( 0, 0, 0, 20 ), ( 21, 0, 0, 0 ) ) ) call @dump(%src) : (tensor<2x3x4xf64>) -> () - call @dump(%d12) : (tensor<2x3x4xf64>) -> () call @dump(%d13) : (tensor<2x3x4xf64>) -> () - call @dump(%d21) : (tensor<2x3x4xf64>) -> () - call @dump(%d23) : (tensor<2x3x4xf64>) -> () call @dump(%d31) : (tensor<2x3x4xf64>) -> () - call @dump(%d32) : (tensor<2x3x4xf64>) -> () // // Release sparse tensors. // - bufferization.dealloc_tensor %t12 : tensor<2x3x4xf64, #SingletonTensor2> bufferization.dealloc_tensor %t13 : tensor<2x3x4xf64, #SingletonTensor3> - bufferization.dealloc_tensor %t21 : tensor<2x3x4xf64, #SingletonTensor1> - bufferization.dealloc_tensor %t23 : tensor<2x3x4xf64, #SingletonTensor3> bufferization.dealloc_tensor %t31 : tensor<2x3x4xf64, #SingletonTensor1> - bufferization.dealloc_tensor %t32 : tensor<2x3x4xf64, #SingletonTensor2> bufferization.dealloc_tensor %s1 : tensor<2x3x4xf64, #SingletonTensor1> - bufferization.dealloc_tensor %s2 : tensor<2x3x4xf64, #SingletonTensor2> bufferization.dealloc_tensor %s3 : tensor<2x3x4xf64, #SingletonTensor3> return diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py index 7425a229106ba..ef266672ce42a 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py +++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py @@ -196,13 +196,7 @@ def main(): # CHECK-LABEL: TEST: test_stress print("\nTEST: test_stress") with ir.Context() as ctx, ir.Location.unknown(): - # Disable direct sparse2sparse conversion, because it doubles the time! - # TODO: While direct s2s is far too slow for per-commit testing, - # we should have some framework ensure that we run this test with - # `s2s=0` on a regular basis, to ensure that it does continue to work. - # TODO: be sure to test s2s=0 together with singletons. - s2s = 1 - sparsification_options = f"parallelization-strategy=none " f"s2s-strategy={s2s}" + sparsification_options = f"parallelization-strategy=none " compiler = sparse_compiler.SparseCompiler( options=sparsification_options, opt_level=0, shared_libs=[support_lib] ) From ef388334ee5a3584255b9ef5b3fefdb244fa3fd7 Mon Sep 17 00:00:00 2001 From: Leonard Chan Date: Thu, 12 Oct 2023 20:22:38 +0000 Subject: [PATCH 020/720] Revert "Reapply "InstCombine: Introduce SimplifyDemandedUseFPClass"" This reverts commit 5a36904c515b. Reverted because this breaks some floating point operations. See the comment on https://github.com/llvm/llvm-project/commit/5a36904c515b. --- clang/test/Headers/__clang_hip_math.hip | 68 ++---- llvm/include/llvm/Analysis/ValueTracking.h | 4 - .../InstCombine/InstCombineInternal.h | 9 - .../InstCombineSimplifyDemanded.cpp | 140 +----------- .../InstCombine/InstructionCombining.cpp | 18 +- .../InstCombine/simplify-demanded-fpclass.ll | 203 +++++++++++------- 6 files changed, 150 insertions(+), 292 deletions(-) diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index 15eccc3b2baba..fc18e14d82296 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -231,8 +231,8 @@ extern "C" __device__ uint64_t test___make_mantissa(const char *p) { // CHECK-LABEL: @test_abs( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true) -// CHECK-NEXT: ret i32 [[TMP0]] +// CHECK-NEXT: [[ABS_I:%.*]] = tail call noundef i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true) +// CHECK-NEXT: ret i32 [[ABS_I]] // extern "C" __device__ int test_abs(int x) { return abs(x); @@ -240,8 +240,8 @@ extern "C" __device__ int test_abs(int x) { // CHECK-LABEL: @test_labs( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) -// CHECK-NEXT: ret i64 [[TMP0]] +// CHECK-NEXT: [[ABS_I:%.*]] = tail call noundef i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) +// CHECK-NEXT: ret i64 [[ABS_I]] // extern "C" __device__ long test_labs(long x) { return labs(x); @@ -249,8 +249,8 @@ extern "C" __device__ long test_labs(long x) { // CHECK-LABEL: @test_llabs( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) -// CHECK-NEXT: ret i64 [[TMP0]] +// CHECK-NEXT: [[ABS_I:%.*]] = tail call noundef i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) +// CHECK-NEXT: ret i64 [[ABS_I]] // extern "C" __device__ long long test_llabs(long x) { return llabs(x); @@ -2557,65 +2557,33 @@ extern "C" __device__ double test_nan(const char *tag) { return nan(tag); } -// DEFAULT-LABEL: @test_nanf_emptystr( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: ret float 0x7FF8000000000000 -// -// FINITEONLY-LABEL: @test_nanf_emptystr( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: ret float poison -// -// APPROX-LABEL: @test_nanf_emptystr( -// APPROX-NEXT: entry: -// APPROX-NEXT: ret float 0x7FF8000000000000 +// CHECK-LABEL: @test_nanf_emptystr( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret float 0x7FF8000000000000 // extern "C" __device__ float test_nanf_emptystr() { return nanf(""); } -// DEFAULT-LABEL: @test_nan_emptystr( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: ret double 0x7FF8000000000000 -// -// FINITEONLY-LABEL: @test_nan_emptystr( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: ret double poison -// -// APPROX-LABEL: @test_nan_emptystr( -// APPROX-NEXT: entry: -// APPROX-NEXT: ret double 0x7FF8000000000000 +// CHECK-LABEL: @test_nan_emptystr( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret double 0x7FF8000000000000 // extern "C" __device__ double test_nan_emptystr() { return nan(""); } -// DEFAULT-LABEL: @test_nanf_fill( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: ret float 0x7FF8000000000000 -// -// FINITEONLY-LABEL: @test_nanf_fill( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: ret float poison -// -// APPROX-LABEL: @test_nanf_fill( -// APPROX-NEXT: entry: -// APPROX-NEXT: ret float 0x7FF8000000000000 +// CHECK-LABEL: @test_nanf_fill( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret float 0x7FF8000000000000 // extern "C" __device__ float test_nanf_fill() { return nanf("0x456"); } -// DEFAULT-LABEL: @test_nan_fill( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: ret double 0x7FF8000000000000 -// -// FINITEONLY-LABEL: @test_nan_fill( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: ret double poison -// -// APPROX-LABEL: @test_nan_fill( -// APPROX-NEXT: entry: -// APPROX-NEXT: ret double 0x7FF8000000000000 +// CHECK-LABEL: @test_nan_fill( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret double 0x7FF8000000000000 // extern "C" __device__ double test_nan_fill() { return nan("0x123"); diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index d970ffee5db64..25272e0581c93 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -240,10 +240,6 @@ struct KnownFPClass { /// definitely set or false if the sign bit is definitely unset. std::optional SignBit; - bool operator==(KnownFPClass Other) const { - return KnownFPClasses == Other.KnownFPClasses && SignBit == Other.SignBit; - } - /// Return true if it's known this can never be one of the mask entries. bool isKnownNever(FPClassTest Mask) const { return (KnownFPClasses & Mask) == fcNone; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 6d72d3ee380d3..83c127a0ef012 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -548,15 +548,6 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final APInt &UndefElts, unsigned Depth = 0, bool AllowMultipleUsers = false) override; - /// Attempts to replace V with a simpler value based on the demanded - /// floating-point classes - Value *SimplifyDemandedUseFPClass(Value *V, FPClassTest DemandedMask, - KnownFPClass &Known, unsigned Depth, - Instruction *CxtI); - bool SimplifyDemandedFPClass(Instruction *I, unsigned Op, - FPClassTest DemandedMask, KnownFPClass &Known, - unsigned Depth = 0); - /// Canonicalize the position of binops relative to shufflevector. Instruction *foldVectorBinop(BinaryOperator &Inst); Instruction *foldVectorSelect(SelectInst &Sel); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 5b5d2da041f14..be005e61a8d2d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -461,8 +461,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (InputKnown.isNonNegative() || DemandedMask.getActiveBits() <= SrcBitWidth) { // Convert to ZExt cast. - CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy); - NewCast->takeName(I); + CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); return InsertNewInstWith(NewCast, I->getIterator()); } @@ -771,7 +770,6 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0), I->getOperand(1)); LShr->setIsExact(cast(I)->isExact()); - LShr->takeName(I); return InsertNewInstWith(LShr, I->getIterator()); } else if (Known.One[BitWidth-ShiftAmt-1]) { // New bits are known one. Known.One |= HighBits; @@ -1783,139 +1781,3 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, return MadeChange ? I : nullptr; } - -/// For floating-point classes that resolve to a single bit pattern, return that -/// value. -static Constant *getFPClassConstant(Type *Ty, FPClassTest Mask) { - switch (Mask) { - case fcPosZero: - return ConstantFP::getZero(Ty); - case fcNegZero: - return ConstantFP::getZero(Ty, true); - case fcPosInf: - return ConstantFP::getInfinity(Ty); - case fcNegInf: - return ConstantFP::getInfinity(Ty, true); - case fcNone: - return PoisonValue::get(Ty); - default: - return nullptr; - } -} - -Value *InstCombinerImpl::SimplifyDemandedUseFPClass( - Value *V, const FPClassTest DemandedMask, KnownFPClass &Known, - unsigned Depth, Instruction *CxtI) { - assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); - Type *VTy = V->getType(); - - assert(Known == KnownFPClass() && "expected uninitialized state"); - - if (DemandedMask == fcNone) - return isa(V) ? nullptr : PoisonValue::get(VTy); - - if (Depth == MaxAnalysisRecursionDepth) - return nullptr; - - Instruction *I = dyn_cast(V); - if (!I) { - // Handle constants and arguments - Known = computeKnownFPClass(V, fcAllFlags, CxtI, Depth + 1); - Value *FoldedToConst = - getFPClassConstant(VTy, DemandedMask & Known.KnownFPClasses); - return FoldedToConst == V ? nullptr : FoldedToConst; - } - - if (!I->hasOneUse()) - return nullptr; - - // TODO: Should account for nofpclass/FastMathFlags on current instruction - switch (I->getOpcode()) { - case Instruction::FNeg: { - if (SimplifyDemandedFPClass(I, 0, llvm::fneg(DemandedMask), Known, - Depth + 1)) - return I; - Known.fneg(); - break; - } - case Instruction::Call: { - CallInst *CI = cast(I); - switch (CI->getIntrinsicID()) { - case Intrinsic::fabs: - if (SimplifyDemandedFPClass(I, 0, llvm::inverse_fabs(DemandedMask), Known, - Depth + 1)) - return I; - Known.fabs(); - break; - case Intrinsic::arithmetic_fence: - if (SimplifyDemandedFPClass(I, 0, DemandedMask, Known, Depth + 1)) - return I; - break; - case Intrinsic::copysign: { - // Flip on more potentially demanded classes - const FPClassTest DemandedMaskAnySign = llvm::unknown_sign(DemandedMask); - if (SimplifyDemandedFPClass(I, 0, DemandedMaskAnySign, Known, Depth + 1)) - return I; - - if ((DemandedMask & fcPositive) == fcNone) { - // Roundabout way of replacing with fneg(fabs) - I->setOperand(1, ConstantFP::get(VTy, -1.0)); - return I; - } - - if ((DemandedMask & fcNegative) == fcNone) { - // Roundabout way of replacing with fabs - I->setOperand(1, ConstantFP::getZero(VTy)); - return I; - } - - KnownFPClass KnownSign = - computeKnownFPClass(I->getOperand(1), fcAllFlags, CxtI, Depth + 1); - Known.copysign(KnownSign); - break; - } - default: - Known = computeKnownFPClass(I, ~DemandedMask, CxtI, Depth + 1); - break; - } - - break; - } - case Instruction::Select: { - KnownFPClass KnownLHS, KnownRHS; - if (SimplifyDemandedFPClass(I, 2, DemandedMask, KnownRHS, Depth + 1) || - SimplifyDemandedFPClass(I, 1, DemandedMask, KnownLHS, Depth + 1)) - return I; - - if (KnownLHS.isKnownNever(DemandedMask)) - return I->getOperand(2); - if (KnownRHS.isKnownNever(DemandedMask)) - return I->getOperand(1); - - // TODO: Recognize clamping patterns - Known = KnownLHS | KnownRHS; - break; - } - default: - Known = computeKnownFPClass(I, ~DemandedMask, CxtI, Depth + 1); - break; - } - - return getFPClassConstant(VTy, DemandedMask & Known.KnownFPClasses); -} - -bool InstCombinerImpl::SimplifyDemandedFPClass(Instruction *I, unsigned OpNo, - FPClassTest DemandedMask, - KnownFPClass &Known, - unsigned Depth) { - Use &U = I->getOperandUse(OpNo); - Value *NewVal = - SimplifyDemandedUseFPClass(U.get(), DemandedMask, Known, Depth, I); - if (!NewVal) - return false; - if (Instruction *OpInst = dyn_cast(U)) - salvageDebugInfo(*OpInst); - - replaceUse(U, NewVal); - return true; -} diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 4736df40951af..8a6f66e36bd80 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2748,22 +2748,8 @@ Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) { } Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) { - Value *RetVal = RI.getReturnValue(); - if (!RetVal || !AttributeFuncs::isNoFPClassCompatibleType(RetVal->getType())) - return nullptr; - - Function *F = RI.getFunction(); - FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass(); - if (ReturnClass == fcNone) - return nullptr; - - KnownFPClass KnownClass; - Value *Simplified = - SimplifyDemandedUseFPClass(RetVal, ~ReturnClass, KnownClass, 0, &RI); - if (!Simplified) - return nullptr; - - return ReturnInst::Create(RI.getContext(), Simplified); + // Nothing for now. + return nullptr; } // WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()! diff --git a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll index 4f9396add2370..9817b6e13ca8a 100644 --- a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll +++ b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll @@ -42,7 +42,7 @@ define nofpclass(inf) float @ret_nofpclass_inf_undef() { define nofpclass(all) float @ret_nofpclass_all_var(float %arg) { ; CHECK-LABEL: define nofpclass(all) float @ret_nofpclass_all_var ; CHECK-SAME: (float [[ARG:%.*]]) { -; CHECK-NEXT: ret float poison +; CHECK-NEXT: ret float [[ARG]] ; ret float %arg } @@ -51,7 +51,7 @@ define nofpclass(all) float @ret_nofpclass_all_var(float %arg) { define nofpclass(all) <2 x float> @ret_nofpclass_all_var_vector(<2 x float> %arg) { ; CHECK-LABEL: define nofpclass(all) <2 x float> @ret_nofpclass_all_var_vector ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { -; CHECK-NEXT: ret <2 x float> poison +; CHECK-NEXT: ret <2 x float> [[ARG]] ; ret <2 x float> %arg } @@ -65,14 +65,14 @@ define nofpclass(inf) float @ret_nofpclass_inf__0() { define nofpclass(inf) float @ret_nofpclass_inf__pinf() { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__pinf() { -; CHECK-NEXT: ret float poison +; CHECK-NEXT: ret float 0x7FF0000000000000 ; ret float 0x7FF0000000000000 } define nofpclass(pinf) float @ret_nofpclass_pinf__pinf() { ; CHECK-LABEL: define nofpclass(pinf) float @ret_nofpclass_pinf__pinf() { -; CHECK-NEXT: ret float poison +; CHECK-NEXT: ret float 0x7FF0000000000000 ; ret float 0x7FF0000000000000 } @@ -86,7 +86,7 @@ define nofpclass(pinf) float @ret_nofpclass_pinf__ninf() { define nofpclass(inf) float @ret_nofpclass_inf__ninf() { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__ninf() { -; CHECK-NEXT: ret float poison +; CHECK-NEXT: ret float 0xFFF0000000000000 ; ret float 0xFFF0000000000000 } @@ -106,7 +106,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__select_nofpclass_inf_lhs(i1 %con define nofpclass(inf) float @ret_nofpclass_inf__select_nofpclass_arg_only_inf_lhs(i1 %cond, float nofpclass(nan norm zero sub) %x, float %y) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__select_nofpclass_arg_only_inf_lhs ; CHECK-SAME: (i1 [[COND:%.*]], float nofpclass(nan zero sub norm) [[X:%.*]], float [[Y:%.*]]) { -; CHECK-NEXT: ret float [[Y]] +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float [[Y]] +; CHECK-NEXT: ret float [[SELECT]] ; %select = select i1 %cond, float %x, float %y ret float %select @@ -116,7 +117,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__select_nofpclass_arg_only_inf_lh define nofpclass(inf) float @ret_nofpclass_inf__select_nofpclass_arg_only_inf_rhs(i1 %cond, float %x, float nofpclass(nan norm zero sub) %y) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__select_nofpclass_arg_only_inf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], float nofpclass(nan zero sub norm) [[Y:%.*]]) { -; CHECK-NEXT: ret float [[X]] +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float [[Y]] +; CHECK-NEXT: ret float [[SELECT]] ; %select = select i1 %cond, float %x, float %y ret float %select @@ -126,7 +128,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__select_nofpclass_arg_only_inf_rh define nofpclass(inf) [3 x [2 x float]] @ret_float_array(i1 %cond, [3 x [2 x float]] nofpclass(nan norm zero sub) %x, [3 x [2 x float]] %y) { ; CHECK-LABEL: define nofpclass(inf) [3 x [2 x float]] @ret_float_array ; CHECK-SAME: (i1 [[COND:%.*]], [3 x [2 x float]] nofpclass(nan zero sub norm) [[X:%.*]], [3 x [2 x float]] [[Y:%.*]]) { -; CHECK-NEXT: ret [3 x [2 x float]] [[Y]] +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], [3 x [2 x float]] [[X]], [3 x [2 x float]] [[Y]] +; CHECK-NEXT: ret [3 x [2 x float]] [[SELECT]] ; %select = select i1 %cond, [3 x [2 x float]] %x, [3 x [2 x float]] %y ret [3 x [2 x float ]] %select @@ -136,7 +139,8 @@ define nofpclass(inf) [3 x [2 x float]] @ret_float_array(i1 %cond, [3 x [2 x flo define nofpclass(inf) float @ret_nofpclass_inf__select_pinf_lhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__select_pinf_lhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: ret float [[X]] +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float 0x7FF0000000000000, float [[X]] +; CHECK-NEXT: ret float [[SELECT]] ; %select = select i1 %cond, float 0x7FF0000000000000, float %x ret float %select @@ -146,7 +150,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__select_pinf_lhs(i1 %cond, float define nofpclass(inf) float @ret_nofpclass_inf__select_pinf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: ret float [[X]] +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 +; CHECK-NEXT: ret float [[SELECT]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 ret float %select @@ -156,7 +161,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__select_pinf_rhs(i1 %cond, float define nofpclass(inf) float @ret_nofpclass_inf__select_pinf_or_ninf(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__select_pinf_or_ninf ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: ret float poison +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float 0x7FF0000000000000, float 0xFFF0000000000000 +; CHECK-NEXT: ret float [[SELECT]] ; %select = select i1 %cond, float 0x7FF0000000000000, float 0xFFF0000000000000 ret float %select @@ -166,7 +172,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__select_pinf_or_ninf(i1 %cond, fl define nofpclass(inf) float @ret_nofpclass_inf__select_ninf_or_pinf(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__select_ninf_or_pinf ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: ret float poison +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float 0xFFF0000000000000, float 0x7FF0000000000000 +; CHECK-NEXT: ret float [[SELECT]] ; %select = select i1 %cond, float 0xFFF0000000000000, float 0x7FF0000000000000 ret float %select @@ -176,7 +183,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__select_ninf_or_pinf(i1 %cond, fl define nofpclass(ninf) float @ret_nofpclass_ninf__select_ninf_or_pinf(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(ninf) float @ret_nofpclass_ninf__select_ninf_or_pinf ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: ret float 0x7FF0000000000000 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float 0xFFF0000000000000, float 0x7FF0000000000000 +; CHECK-NEXT: ret float [[SELECT]] ; %select = select i1 %cond, float 0xFFF0000000000000, float 0x7FF0000000000000 ret float %select @@ -186,7 +194,8 @@ define nofpclass(ninf) float @ret_nofpclass_ninf__select_ninf_or_pinf(i1 %cond, define nofpclass(pinf) float @ret_nofpclass_pinf__select_ninf_or_pinf(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(pinf) float @ret_nofpclass_pinf__select_ninf_or_pinf ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: ret float 0xFFF0000000000000 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float 0xFFF0000000000000, float 0x7FF0000000000000 +; CHECK-NEXT: ret float [[SELECT]] ; %select = select i1 %cond, float 0xFFF0000000000000, float 0x7FF0000000000000 ret float %select @@ -196,7 +205,8 @@ define nofpclass(pinf) float @ret_nofpclass_pinf__select_ninf_or_pinf(i1 %cond, define nofpclass(zero) float @ret_nofpclass_zero__select_pzero_or_nzero(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(zero) float @ret_nofpclass_zero__select_pzero_or_nzero ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: ret float poison +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float 0.000000e+00, float -0.000000e+00 +; CHECK-NEXT: ret float [[SELECT]] ; %select = select i1 %cond, float 0.0, float -0.0 ret float %select @@ -206,7 +216,8 @@ define nofpclass(zero) float @ret_nofpclass_zero__select_pzero_or_nzero(i1 %cond define nofpclass(nzero) float @ret_nofpclass_nzero__select_pzero_or_nzero(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(nzero) float @ret_nofpclass_nzero__select_pzero_or_nzero ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: ret float 0.000000e+00 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float 0.000000e+00, float -0.000000e+00 +; CHECK-NEXT: ret float [[SELECT]] ; %select = select i1 %cond, float 0.0, float -0.0 ret float %select @@ -216,7 +227,8 @@ define nofpclass(nzero) float @ret_nofpclass_nzero__select_pzero_or_nzero(i1 %co define nofpclass(pzero) float @ret_nofpclass_pzero__select_pzero_or_nzero(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(pzero) float @ret_nofpclass_pzero__select_pzero_or_nzero ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: ret float -0.000000e+00 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float 0.000000e+00, float -0.000000e+00 +; CHECK-NEXT: ret float [[SELECT]] ; %select = select i1 %cond, float 0.0, float -0.0 ret float %select @@ -226,7 +238,8 @@ define nofpclass(pzero) float @ret_nofpclass_pzero__select_pzero_or_nzero(i1 %co define nofpclass(inf) <2 x float> @ret_nofpclass_inf__select_pinf_lhs_vector(<2 x i1> %cond, <2 x float> %x) { ; CHECK-LABEL: define nofpclass(inf) <2 x float> @ret_nofpclass_inf__select_pinf_lhs_vector ; CHECK-SAME: (<2 x i1> [[COND:%.*]], <2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> [[X]] +; CHECK-NEXT: [[SELECT:%.*]] = select <2 x i1> [[COND]], <2 x float> , <2 x float> [[X]] +; CHECK-NEXT: ret <2 x float> [[SELECT]] ; %select = select <2 x i1> %cond, <2 x float> , <2 x float> %x ret <2 x float> %select @@ -236,7 +249,8 @@ define nofpclass(inf) <2 x float> @ret_nofpclass_inf__select_pinf_lhs_vector(<2 define nofpclass(inf) <2 x float> @ret_nofpclass_inf__select_pinf_lhs_vector_undef(<2 x i1> %cond, <2 x float> %x) { ; CHECK-LABEL: define nofpclass(inf) <2 x float> @ret_nofpclass_inf__select_pinf_lhs_vector_undef ; CHECK-SAME: (<2 x i1> [[COND:%.*]], <2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> [[X]] +; CHECK-NEXT: [[SELECT:%.*]] = select <2 x i1> [[COND]], <2 x float> , <2 x float> [[X]] +; CHECK-NEXT: ret <2 x float> [[SELECT]] ; %select = select <2 x i1> %cond, <2 x float> , <2 x float> %x ret <2 x float> %select @@ -246,7 +260,8 @@ define nofpclass(inf) <2 x float> @ret_nofpclass_inf__select_pinf_lhs_vector_und define nofpclass(inf) <2 x float> @ret_nofpclass_inf__select_mixed_inf_lhs_vector(<2 x i1> %cond, <2 x float> %x) { ; CHECK-LABEL: define nofpclass(inf) <2 x float> @ret_nofpclass_inf__select_mixed_inf_lhs_vector ; CHECK-SAME: (<2 x i1> [[COND:%.*]], <2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> [[X]] +; CHECK-NEXT: [[SELECT:%.*]] = select <2 x i1> [[COND]], <2 x float> , <2 x float> [[X]] +; CHECK-NEXT: ret <2 x float> [[SELECT]] ; %select = select <2 x i1> %cond, <2 x float> , <2 x float> %x ret <2 x float> %select @@ -312,7 +327,8 @@ define nofpclass(nan) float @ret_nofpclass_nan__select_pinf_rhs(i1 %cond, float define nofpclass(inf nan) float @ret_nofpclass_inf_nan__select_chain_inf_nan_0(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(nan inf) float @ret_nofpclass_inf_nan__select_chain_inf_nan_0 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: ret float [[X]] +; CHECK-NEXT: [[SELECT1:%.*]] = select i1 [[COND]], float 0x7FF0000000000000, float [[X]] +; CHECK-NEXT: ret float [[SELECT1]] ; %select0 = select i1 %cond, float 0x7FF8000000000000, float %x %select1 = select i1 %cond, float 0x7FF0000000000000, float %select0 @@ -322,7 +338,8 @@ define nofpclass(inf nan) float @ret_nofpclass_inf_nan__select_chain_inf_nan_0(i define nofpclass(inf nan) float @ret_nofpclass_inf_nan__select_chain_inf_nan_1(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(nan inf) float @ret_nofpclass_inf_nan__select_chain_inf_nan_1 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: ret float poison +; CHECK-NEXT: [[SELECT1:%.*]] = select i1 [[COND]], float 0x7FF0000000000000, float 0x7FF8000000000000 +; CHECK-NEXT: ret float [[SELECT1]] ; %select0 = select i1 %cond, float %x, float 0x7FF8000000000000 %select1 = select i1 %cond, float 0x7FF0000000000000, float %select0 @@ -343,7 +360,8 @@ define nofpclass(nan) float @ret_nofpclass_nan__select_chain_inf_nan(i1 %cond, f define nofpclass(inf) float @ret_nofpclass_inf__select_chain_inf_nan_0(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__select_chain_inf_nan_0 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: ret float [[X]] +; CHECK-NEXT: [[SELECT1:%.*]] = select i1 [[COND]], float 0x7FF0000000000000, float [[X]] +; CHECK-NEXT: ret float [[SELECT1]] ; %select0 = select i1 %cond, float 0x7FF8000000000000, float %x %select1 = select i1 %cond, float 0x7FF0000000000000, float %select0 @@ -353,7 +371,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__select_chain_inf_nan_0(i1 %cond, define nofpclass(inf) float @ret_nofpclass_inf__select_chain_inf_nan_1(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__select_chain_inf_nan_1 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: ret float 0x7FF8000000000000 +; CHECK-NEXT: [[SELECT1:%.*]] = select i1 [[COND]], float 0x7FF8000000000000, float 0x7FF0000000000000 +; CHECK-NEXT: ret float [[SELECT1]] ; %select0 = select i1 %cond, float 0x7FF8000000000000, float %x %select1 = select i1 %cond, float %select0, float 0x7FF0000000000000 @@ -364,7 +383,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__select_chain_inf_nan_1(i1 %cond, define nofpclass(inf) float @ret_nofpclass_inf__fabs_select_ninf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__fabs_select_ninf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0xFFF0000000000000 +; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) ; CHECK-NEXT: ret float [[FABS]] ; %select = select i1 %cond, float %x, float 0xFFF0000000000000 @@ -376,7 +396,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__fabs_select_ninf_rhs(i1 %cond, f define nofpclass(inf) float @ret_nofpclass_inf__fabs_select_pinf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__fabs_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 +; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) ; CHECK-NEXT: ret float [[FABS]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 @@ -400,7 +421,8 @@ define nofpclass(ninf nnorm nsub nzero) float @ret_nofpclass_no_negatives__fabs_ define nofpclass(pinf pnorm psub pzero) float @ret_nofpclass_no_positives__fabs_select_pinf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_nofpclass_no_positives__fabs_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 +; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) ; CHECK-NEXT: ret float [[FABS]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 @@ -424,7 +446,9 @@ define nofpclass(nan ninf nnorm nsub nzero) float @ret_nofpclass_no_negatives_na define nofpclass(nan pinf pnorm psub pzero) float @ret_nofpclass_no_positives_nan__fabs_select_pinf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(nan pinf pzero psub pnorm) float @ret_nofpclass_no_positives_nan__fabs_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: ret float poison +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 +; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) +; CHECK-NEXT: ret float [[FABS]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 %fabs = call float @llvm.fabs.f32(float %select) @@ -435,7 +459,8 @@ define nofpclass(nan pinf pnorm psub pzero) float @ret_nofpclass_no_positives_na define nofpclass(inf) float @ret_nofpclass_inf__fneg_select_ninf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__fneg_select_ninf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[X]] +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0xFFF0000000000000 +; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[SELECT]] ; CHECK-NEXT: ret float [[FNEG]] ; %select = select i1 %cond, float %x, float 0xFFF0000000000000 @@ -447,7 +472,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__fneg_select_ninf_rhs(i1 %cond, f define nofpclass(inf nnorm nsub nzero) float @ret_nofpclass_nonegatives_noinf___fneg_select_pinf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf nzero nsub nnorm) float @ret_nofpclass_nonegatives_noinf___fneg_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[X]] +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 +; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[SELECT]] ; CHECK-NEXT: ret float [[FNEG]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 @@ -459,7 +485,8 @@ define nofpclass(inf nnorm nsub nzero) float @ret_nofpclass_nonegatives_noinf___ define nofpclass(inf nnorm nsub nzero) float @ret_nofpclass_nonegatives_noinf___fneg_select_ninf_lhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf nzero nsub nnorm) float @ret_nofpclass_nonegatives_noinf___fneg_select_ninf_lhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[X]] +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float 0xFFF0000000000000, float [[X]] +; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[SELECT]] ; CHECK-NEXT: ret float [[FNEG]] ; %select = select i1 %cond, float 0xFFF0000000000000, float %x @@ -483,7 +510,8 @@ define nofpclass(pzero psub pnorm pinf) float @ret_nofpclass_nopositives___fneg_ define nofpclass(inf) float @ret_nofpclass_inf__fneg_fabs_select_pinf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__fneg_fabs_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 +; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) ; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[FABS]] ; CHECK-NEXT: ret float [[FNEG]] ; @@ -497,7 +525,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__fneg_fabs_select_pinf_rhs(i1 %co define nofpclass(ninf nnorm nsub nzero) float @ret_nofpclass_nonegatives__fneg_fabs_select_pinf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_nofpclass_nonegatives__fneg_fabs_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 +; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) ; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[FABS]] ; CHECK-NEXT: ret float [[FNEG]] ; @@ -512,7 +541,10 @@ define nofpclass(ninf nnorm nsub nzero) float @ret_nofpclass_nonegatives__fneg_f define nofpclass(nan ninf nnorm nsub nzero) float @ret_nofpclass_nonegatives_nonan__fneg_fabs_select_pinf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(nan ninf nzero nsub nnorm) float @ret_nofpclass_nonegatives_nonan__fneg_fabs_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: ret float poison +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 +; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) +; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[FABS]] +; CHECK-NEXT: ret float [[FNEG]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 %fabs = call float @llvm.fabs.f32(float %select) @@ -524,7 +556,8 @@ define nofpclass(nan ninf nnorm nsub nzero) float @ret_nofpclass_nonegatives_non define nofpclass(inf) float @ret_nofpclass_inf__copysign_unknown_select_pinf_rhs(i1 %cond, float %x, float %unknown.sign) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__copysign_unknown_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], float [[UNKNOWN_SIGN:%.*]]) { -; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[X]], float [[UNKNOWN_SIGN]]) +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 +; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[SELECT]], float [[UNKNOWN_SIGN]]) ; CHECK-NEXT: ret float [[COPYSIGN]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 @@ -535,7 +568,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__copysign_unknown_select_pinf_rhs define nofpclass(inf) float @ret_nofpclass_inf__copysign_positive_select_pinf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__copysign_positive_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 +; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) ; CHECK-NEXT: ret float [[COPYSIGN]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 @@ -546,7 +580,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__copysign_positive_select_pinf_rh define nofpclass(inf) float @ret_nofpclass_inf__copysign_negative_select_pinf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__copysign_negative_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) ; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[TMP1]] ; CHECK-NEXT: ret float [[COPYSIGN]] ; @@ -559,8 +594,7 @@ define nofpclass(inf) float @ret_nofpclass_inf__copysign_negative_select_pinf_rh define nofpclass(pinf pnorm psub pzero) float @ret_nofpclass_nopositives_copysign(float %x, float %unknown.sign) { ; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_nofpclass_nopositives_copysign ; CHECK-SAME: (float [[X:%.*]], float [[UNKNOWN_SIGN:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]]) -; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[TMP1]] +; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[X]], float [[UNKNOWN_SIGN]]) ; CHECK-NEXT: ret float [[COPYSIGN]] ; %copysign = call float @llvm.copysign.f32(float %x, float %unknown.sign) @@ -571,8 +605,7 @@ define nofpclass(pinf pnorm psub pzero) float @ret_nofpclass_nopositives_copysig define nofpclass(pinf pnorm psub pzero) float @ret_nofpclass_nopositives_copysign_nnan_flag(float %x, float %unknown.sign) { ; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_nofpclass_nopositives_copysign_nnan_flag ; CHECK-SAME: (float [[X:%.*]], float [[UNKNOWN_SIGN:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call nnan float @llvm.fabs.f32(float [[X]]) -; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg nnan float [[TMP1]] +; CHECK-NEXT: [[COPYSIGN:%.*]] = call nnan float @llvm.copysign.f32(float [[X]], float [[UNKNOWN_SIGN]]) ; CHECK-NEXT: ret float [[COPYSIGN]] ; %copysign = call nnan float @llvm.copysign.f32(float %x, float %unknown.sign) @@ -583,8 +616,7 @@ define nofpclass(pinf pnorm psub pzero) float @ret_nofpclass_nopositives_copysig define nofpclass(nan pinf pnorm psub pzero) float @ret_nofpclass_nopositives_nonan_copysign(float %x, float %unknown.sign) { ; CHECK-LABEL: define nofpclass(nan pinf pzero psub pnorm) float @ret_nofpclass_nopositives_nonan_copysign ; CHECK-SAME: (float [[X:%.*]], float [[UNKNOWN_SIGN:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]]) -; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[TMP1]] +; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[X]], float [[UNKNOWN_SIGN]]) ; CHECK-NEXT: ret float [[COPYSIGN]] ; %copysign = call float @llvm.copysign.f32(float %x, float %unknown.sign) @@ -595,7 +627,7 @@ define nofpclass(nan pinf pnorm psub pzero) float @ret_nofpclass_nopositives_non define nofpclass(ninf nnorm nsub nzero) float @ret_nofpclass_nonegatives_copysign(float %x, float %unknown.sign) { ; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_nofpclass_nonegatives_copysign ; CHECK-SAME: (float [[X:%.*]], float [[UNKNOWN_SIGN:%.*]]) { -; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[X]], float [[UNKNOWN_SIGN]]) ; CHECK-NEXT: ret float [[COPYSIGN]] ; %copysign = call float @llvm.copysign.f32(float %x, float %unknown.sign) @@ -606,7 +638,7 @@ define nofpclass(ninf nnorm nsub nzero) float @ret_nofpclass_nonegatives_copysig define nofpclass(ninf nnorm nsub nzero) float @ret_nofpclass_nonegatives_copysign_nnan_flag(float %x, float %unknown.sign) { ; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_nofpclass_nonegatives_copysign_nnan_flag ; CHECK-SAME: (float [[X:%.*]], float [[UNKNOWN_SIGN:%.*]]) { -; CHECK-NEXT: [[COPYSIGN:%.*]] = call nnan float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[COPYSIGN:%.*]] = call nnan float @llvm.copysign.f32(float [[X]], float [[UNKNOWN_SIGN]]) ; CHECK-NEXT: ret float [[COPYSIGN]] ; %copysign = call nnan float @llvm.copysign.f32(float %x, float %unknown.sign) @@ -617,7 +649,7 @@ define nofpclass(ninf nnorm nsub nzero) float @ret_nofpclass_nonegatives_copysig define nofpclass(nan ninf nnorm nsub nzero) float @ret_nofpclass_nonegatives_nonan_copysign(float %x, float %unknown.sign) { ; CHECK-LABEL: define nofpclass(nan ninf nzero nsub nnorm) float @ret_nofpclass_nonegatives_nonan_copysign ; CHECK-SAME: (float [[X:%.*]], float [[UNKNOWN_SIGN:%.*]]) { -; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[X]], float [[UNKNOWN_SIGN]]) ; CHECK-NEXT: ret float [[COPYSIGN]] ; %copysign = call float @llvm.copysign.f32(float %x, float %unknown.sign) @@ -627,7 +659,8 @@ define nofpclass(nan ninf nnorm nsub nzero) float @ret_nofpclass_nonegatives_non define nofpclass(pinf pnorm psub pzero) float @ret_nofpclass_nopositives__copysign_fabs_select_pinf_rhs(i1 %cond, float %x, float %sign) { ; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_nofpclass_nopositives__copysign_fabs_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], float [[SIGN:%.*]]) { -; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 +; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) ; CHECK-NEXT: ret float [[COPYSIGN]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 @@ -640,7 +673,8 @@ define nofpclass(pinf pnorm psub pzero) float @ret_nofpclass_nopositives__copysi define nofpclass(inf nnorm nsub nzero) float @ret_nofpclass_no_negatives_noinf__copysign_unknown_select_pinf_rhs(i1 %cond, float %x, float %unknown.sign) { ; CHECK-LABEL: define nofpclass(inf nzero nsub nnorm) float @ret_nofpclass_no_negatives_noinf__copysign_unknown_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], float [[UNKNOWN_SIGN:%.*]]) { -; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 +; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[SELECT]], float [[UNKNOWN_SIGN]]) ; CHECK-NEXT: ret float [[COPYSIGN]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 @@ -652,8 +686,8 @@ define nofpclass(inf nnorm nsub nzero) float @ret_nofpclass_no_negatives_noinf__ define nofpclass(inf pnorm psub pzero) float @ret_nofpclass_no_positives_noinf__copysign_unknown_select_pinf_rhs(i1 %cond, float %x, float %unknown.sign) { ; CHECK-LABEL: define nofpclass(inf pzero psub pnorm) float @ret_nofpclass_no_positives_noinf__copysign_unknown_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], float [[UNKNOWN_SIGN:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]]) -; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[TMP1]] +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 +; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[SELECT]], float [[UNKNOWN_SIGN]]) ; CHECK-NEXT: ret float [[COPYSIGN]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 @@ -666,7 +700,7 @@ define nofpclass(ninf nnorm nsub nzero) float @ret_nofpclass_no_negatives__copys ; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_nofpclass_no_negatives__copysign_unknown_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], float [[UNKNOWN_SIGN:%.*]]) { ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 -; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) +; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[SELECT]], float [[UNKNOWN_SIGN]]) ; CHECK-NEXT: ret float [[COPYSIGN]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 @@ -679,8 +713,7 @@ define nofpclass(pinf pnorm psub pzero) float @ret_nofpclass_no_positives__copys ; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_nofpclass_no_positives__copysign_unknown_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], float [[UNKNOWN_SIGN:%.*]]) { ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 -; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) -; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[TMP1]] +; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[SELECT]], float [[UNKNOWN_SIGN]]) ; CHECK-NEXT: ret float [[COPYSIGN]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 @@ -693,7 +726,7 @@ define nofpclass(nan ninf nnorm nsub nzero) float @ret_nofpclass_no_negatives_no ; CHECK-LABEL: define nofpclass(nan ninf nzero nsub nnorm) float @ret_nofpclass_no_negatives_nonan__copysign_unknown_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], float [[UNKNOWN_SIGN:%.*]]) { ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 -; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) +; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[SELECT]], float [[UNKNOWN_SIGN]]) ; CHECK-NEXT: ret float [[COPYSIGN]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 @@ -706,8 +739,7 @@ define nofpclass(nan pinf pnorm psub pzero) float @ret_nofpclass_no_positives_no ; CHECK-LABEL: define nofpclass(nan pinf pzero psub pnorm) float @ret_nofpclass_no_positives_nonan__copysign_unknown_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], float [[UNKNOWN_SIGN:%.*]]) { ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 -; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) -; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[TMP1]] +; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[SELECT]], float [[UNKNOWN_SIGN]]) ; CHECK-NEXT: ret float [[COPYSIGN]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 @@ -758,7 +790,9 @@ define nofpclass(nan ninf nnorm nsub nzero) float @ret_nofpclass_nan_negatives__ define nofpclass(nan ninf nnorm nsub zero) float @ret_nofpclass_nan_negatives_zero__select_clamp_pos_to_zero(float %x) { ; CHECK-LABEL: define nofpclass(nan ninf zero nsub nnorm) float @ret_nofpclass_nan_negatives_zero__select_clamp_pos_to_zero ; CHECK-SAME: (float [[X:%.*]]) { -; CHECK-NEXT: ret float [[X]] +; CHECK-NEXT: [[IS_GT_ZERO:%.*]] = fcmp ogt float [[X]], 0.000000e+00 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_GT_ZERO]], float 0.000000e+00, float [[X]] +; CHECK-NEXT: ret float [[SELECT]] ; %is.gt.zero = fcmp ogt float %x, 0.0 %select = select i1 %is.gt.zero, float 0.0, float %x @@ -769,7 +803,9 @@ define nofpclass(nan ninf nnorm nsub zero) float @ret_nofpclass_nan_negatives_ze define nofpclass(ninf nnorm nsub zero) float @ret_nofpclass_negatives_zero__select_clamp_pos_to_zero(float %x) { ; CHECK-LABEL: define nofpclass(ninf zero nsub nnorm) float @ret_nofpclass_negatives_zero__select_clamp_pos_to_zero ; CHECK-SAME: (float [[X:%.*]]) { -; CHECK-NEXT: ret float [[X]] +; CHECK-NEXT: [[IS_GT_ZERO:%.*]] = fcmp ogt float [[X]], 0.000000e+00 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_GT_ZERO]], float 0.000000e+00, float [[X]] +; CHECK-NEXT: ret float [[SELECT]] ; %is.gt.zero = fcmp ogt float %x, 0.0 %select = select i1 %is.gt.zero, float 0.0, float %x @@ -783,7 +819,8 @@ define nofpclass(inf) float @ret_nofpclass_noinfs__assumed_isinf__select_pinf_lh ; CHECK-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]]) ; CHECK-NEXT: [[X_IS_INF:%.*]] = fcmp oeq float [[FABS_X]], 0x7FF0000000000000 ; CHECK-NEXT: call void @llvm.assume(i1 [[X_IS_INF]]) -; CHECK-NEXT: ret float [[Y]] +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float [[Y]] +; CHECK-NEXT: ret float [[SELECT]] ; %fabs.x = call float @llvm.fabs.f32(float %x) %x.is.inf = fcmp oeq float %fabs.x, 0x7FF0000000000000 @@ -801,13 +838,18 @@ define nofpclass(nan inf nzero nsub nnorm) float @powr_issue64870(float nofpclas ; CHECK-NEXT: [[I1:%.*]] = tail call float @llvm.log2.f32(float [[I]]) ; CHECK-NEXT: [[I2:%.*]] = fmul float [[I1]], [[Y]] ; CHECK-NEXT: [[I3:%.*]] = tail call nofpclass(ninf nzero nsub nnorm) float @llvm.exp2.f32(float [[I2]]) +; CHECK-NEXT: [[I4:%.*]] = fcmp olt float [[Y]], 0.000000e+00 +; CHECK-NEXT: [[I5:%.*]] = select i1 [[I4]], float 0x7FF0000000000000, float 0.000000e+00 ; CHECK-NEXT: [[I6:%.*]] = fcmp oeq float [[X]], 0.000000e+00 -; CHECK-NEXT: [[I7:%.*]] = select i1 [[I6]], float 0.000000e+00, float [[I3]] +; CHECK-NEXT: [[I7:%.*]] = select i1 [[I6]], float [[I5]], float [[I3]] ; CHECK-NEXT: [[I8:%.*]] = fcmp oeq float [[Y]], 0.000000e+00 +; CHECK-NEXT: [[I9:%.*]] = select i1 [[I6]], float 0x7FF8000000000000, float 1.000000e+00 +; CHECK-NEXT: [[I10:%.*]] = select i1 [[I8]], float [[I9]], float [[I7]] ; CHECK-NEXT: [[I11:%.*]] = fcmp oeq float [[X]], 1.000000e+00 -; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[I11]], i1 true, i1 [[I8]] -; CHECK-NEXT: [[I12:%.*]] = select i1 [[TMP0]], float 1.000000e+00, float [[I7]] -; CHECK-NEXT: ret float [[I12]] +; CHECK-NEXT: [[I12:%.*]] = select i1 [[I11]], float 1.000000e+00, float [[I10]] +; CHECK-NEXT: [[I13:%.*]] = fcmp olt float [[X]], 0.000000e+00 +; CHECK-NEXT: [[I14:%.*]] = select i1 [[I13]], float 0x7FF8000000000000, float [[I12]] +; CHECK-NEXT: ret float [[I14]] ; entry: %i = tail call float @llvm.fabs.f32(float %x) @@ -839,8 +881,12 @@ define nofpclass(nan inf nzero nsub nnorm) float @test_powr_issue64870_2(float n ; CHECK-NEXT: [[I4:%.*]] = select i1 [[I]], float 0x7FF8000000000000, float [[ARG1]] ; CHECK-NEXT: [[I5:%.*]] = fmul float [[I4]], [[I3]] ; CHECK-NEXT: [[I6:%.*]] = tail call noundef nofpclass(ninf nzero nsub nnorm) float @llvm.exp2.f32(float noundef [[I5]]) +; CHECK-NEXT: [[I7:%.*]] = fcmp olt float [[I4]], 0.000000e+00 +; CHECK-NEXT: [[I8:%.*]] = select i1 [[I7]], float 0x7FF0000000000000, float 0.000000e+00 +; CHECK-NEXT: [[I9:%.*]] = fcmp ueq float [[I4]], 0.000000e+00 ; CHECK-NEXT: [[I10:%.*]] = fcmp oeq float [[I2]], 0.000000e+00 -; CHECK-NEXT: [[I12:%.*]] = select i1 [[I10]], float 0.000000e+00, float [[I6]] +; CHECK-NEXT: [[I11:%.*]] = select i1 [[I9]], float 0x7FF8000000000000, float [[I8]] +; CHECK-NEXT: [[I12:%.*]] = select i1 [[I10]], float [[I11]], float [[I6]] ; CHECK-NEXT: ret float [[I12]] ; bb: @@ -877,10 +923,16 @@ define nofpclass(nan inf) float @pow_f32(float nofpclass(nan inf) %arg, float no ; CHECK-NEXT: [[I11:%.*]] = and i1 [[I7]], [[I10]] ; CHECK-NEXT: [[I12:%.*]] = select i1 [[I11]], float [[ARG]], float 1.000000e+00 ; CHECK-NEXT: [[I13:%.*]] = tail call noundef float @llvm.copysign.f32(float noundef [[I4]], float noundef [[I12]]) +; CHECK-NEXT: [[I14:%.*]] = fcmp olt float [[ARG]], 0.000000e+00 +; CHECK-NEXT: [[I15:%.*]] = select i1 [[I7]], float [[I13]], float 0x7FF8000000000000 +; CHECK-NEXT: [[I16:%.*]] = select i1 [[I14]], float [[I15]], float [[I13]] ; CHECK-NEXT: [[I17:%.*]] = fcmp oeq float [[ARG]], 0.000000e+00 +; CHECK-NEXT: [[I18:%.*]] = fcmp olt float [[ARG1]], 0.000000e+00 +; CHECK-NEXT: [[I19:%.*]] = xor i1 [[I17]], [[I18]] +; CHECK-NEXT: [[I20:%.*]] = select i1 [[I19]], float 0.000000e+00, float 0x7FF0000000000000 ; CHECK-NEXT: [[I21:%.*]] = select i1 [[I11]], float [[ARG]], float 0.000000e+00 -; CHECK-NEXT: [[I22:%.*]] = tail call noundef nofpclass(nan sub norm) float @llvm.copysign.f32(float noundef 0.000000e+00, float noundef [[I21]]) -; CHECK-NEXT: [[I23:%.*]] = select i1 [[I17]], float [[I22]], float [[I13]] +; CHECK-NEXT: [[I22:%.*]] = tail call noundef nofpclass(nan sub norm) float @llvm.copysign.f32(float noundef [[I20]], float noundef [[I21]]) +; CHECK-NEXT: [[I23:%.*]] = select i1 [[I17]], float [[I22]], float [[I16]] ; CHECK-NEXT: [[I24:%.*]] = fcmp oeq float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[I25:%.*]] = fcmp oeq float [[ARG1]], 0.000000e+00 ; CHECK-NEXT: [[I26:%.*]] = or i1 [[I24]], [[I25]] @@ -925,7 +977,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__select_nofpclass_call_only_inf(i ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__select_nofpclass_call_only_inf ; CHECK-SAME: (i1 [[COND:%.*]], float [[Y:%.*]]) { ; CHECK-NEXT: [[MUST_BE_INF:%.*]] = call nofpclass(nan zero sub norm) float @extern() -; CHECK-NEXT: ret float [[Y]] +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[MUST_BE_INF]], float [[Y]] +; CHECK-NEXT: ret float [[SELECT]] ; %must.be.inf = call nofpclass(nan norm zero sub) float @extern() %select = select i1 %cond, float %must.be.inf, float %y @@ -936,7 +989,7 @@ define nofpclass(pinf) float @ret_nofpclass_pinf__nofpclass_call_only_inf(i1 %co ; CHECK-LABEL: define nofpclass(pinf) float @ret_nofpclass_pinf__nofpclass_call_only_inf ; CHECK-SAME: (i1 [[COND:%.*]], float [[Y:%.*]]) { ; CHECK-NEXT: [[MUST_BE_INF:%.*]] = call nofpclass(nan zero sub norm) float @extern() -; CHECK-NEXT: ret float 0xFFF0000000000000 +; CHECK-NEXT: ret float [[MUST_BE_INF]] ; %must.be.inf = call nofpclass(nan norm zero sub) float @extern() ret float %must.be.inf @@ -946,7 +999,7 @@ define nofpclass(ninf) float @ret_nofpclass_ninf__nofpclass_call_only_inf(i1 %co ; CHECK-LABEL: define nofpclass(ninf) float @ret_nofpclass_ninf__nofpclass_call_only_inf ; CHECK-SAME: (i1 [[COND:%.*]], float [[Y:%.*]]) { ; CHECK-NEXT: [[MUST_BE_INF:%.*]] = call nofpclass(nan zero sub norm) float @extern() -; CHECK-NEXT: ret float 0x7FF0000000000000 +; CHECK-NEXT: ret float [[MUST_BE_INF]] ; %must.be.inf = call nofpclass(nan norm zero sub) float @extern() ret float %must.be.inf @@ -956,7 +1009,7 @@ define nofpclass(nzero) float @ret_nofpclass_nzero__nofpclass_call_only_zero(i1 ; CHECK-LABEL: define nofpclass(nzero) float @ret_nofpclass_nzero__nofpclass_call_only_zero ; CHECK-SAME: (i1 [[COND:%.*]], float [[Y:%.*]]) { ; CHECK-NEXT: [[MUST_BE_ZERO:%.*]] = call nofpclass(nan inf sub norm) float @extern() -; CHECK-NEXT: ret float 0.000000e+00 +; CHECK-NEXT: ret float [[MUST_BE_ZERO]] ; %must.be.zero = call nofpclass(nan sub norm inf) float @extern() ret float %must.be.zero @@ -966,7 +1019,7 @@ define nofpclass(pzero) float @ret_nofpclass_pzero__nofpclass_call_only_zero(i1 ; CHECK-LABEL: define nofpclass(pzero) float @ret_nofpclass_pzero__nofpclass_call_only_zero ; CHECK-SAME: (i1 [[COND:%.*]], float [[Y:%.*]]) { ; CHECK-NEXT: [[MUST_BE_ZERO:%.*]] = call nofpclass(nan inf sub norm) float @extern() -; CHECK-NEXT: ret float -0.000000e+00 +; CHECK-NEXT: ret float [[MUST_BE_ZERO]] ; %must.be.zero = call nofpclass(nan sub norm inf) float @extern() ret float %must.be.zero @@ -1080,7 +1133,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__recursive_phi_0(i1 %cond0, float ; CHECK-NEXT: [[LOOP_COND:%.*]] = call i1 @loop.cond() ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[RET]], label [[LOOP]] ; CHECK: ret: -; CHECK-NEXT: ret float 0.000000e+00 +; CHECK-NEXT: [[PHI_RET:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ 0x7FF0000000000000, [[LOOP]] ] +; CHECK-NEXT: ret float [[PHI_RET]] ; entry: br i1 %cond0, label %loop, label %ret @@ -1105,7 +1159,7 @@ define nofpclass(inf) float @ret_nofpclass_inf__recursive_phi_1(i1 %cond0, float ; CHECK-NEXT: [[LOOP_COND:%.*]] = call i1 @loop.cond() ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[RET]], label [[LOOP]] ; CHECK: ret: -; CHECK-NEXT: ret float poison +; CHECK-NEXT: ret float 0x7FF0000000000000 ; entry: br i1 %cond0, label %loop, label %ret @@ -1157,7 +1211,8 @@ ret: define nofpclass(inf) float @ret_nofpclass_inf__arithmetic_fence_select_pinf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__arithmetic_fence_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[FENCE:%.*]] = call float @llvm.arithmetic.fence.f32(float [[X]]) +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 +; CHECK-NEXT: [[FENCE:%.*]] = call float @llvm.arithmetic.fence.f32(float [[SELECT]]) ; CHECK-NEXT: ret float [[FENCE]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 From 2ae3a712304870adf639a33547c1139a7b6304e5 Mon Sep 17 00:00:00 2001 From: Leonard Chan Date: Thu, 12 Oct 2023 20:34:31 +0000 Subject: [PATCH 021/720] Fix minimal-throw-catch.ll on x86 mac It looks like this broke after https://reviews.llvm.org/D86310 and the data layout just needs to be updated for this test. --- llvm/test/ExecutionEngine/OrcLazy/minimal-throw-catch.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/ExecutionEngine/OrcLazy/minimal-throw-catch.ll b/llvm/test/ExecutionEngine/OrcLazy/minimal-throw-catch.ll index b1e0aea05a5f1..7bbaa0575a387 100644 --- a/llvm/test/ExecutionEngine/OrcLazy/minimal-throw-catch.ll +++ b/llvm/test/ExecutionEngine/OrcLazy/minimal-throw-catch.ll @@ -4,7 +4,7 @@ ; Basic correctness testing for eh-frame processing and registration. source_filename = "minimal-throw-catch.cpp" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.14.0" @_ZTIi = external constant ptr From 9427fce6778c8d01a0519cd0382a0ae6a75b2d35 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Thu, 5 Oct 2023 11:17:14 -0500 Subject: [PATCH 022/720] [ValueTracking] Add tests for `cmpExcludesZero` for non-splat vecs; NFC --- .../Analysis/ValueTracking/known-non-zero.ll | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/llvm/test/Analysis/ValueTracking/known-non-zero.ll b/llvm/test/Analysis/ValueTracking/known-non-zero.ll index 6dce6e528165e..f64303f173015 100644 --- a/llvm/test/Analysis/ValueTracking/known-non-zero.ll +++ b/llvm/test/Analysis/ValueTracking/known-non-zero.ll @@ -1160,3 +1160,65 @@ define i1 @sdiv_known_non_zero_fail(i8 %x, i8 %y) { %nz = icmp ne i8 %xy, 0 ret i1 %nz } + +define <2 x i1> @cmp_excludes_zero_with_nonsplat_vec(<2 x i8> %a, <2 x i8> %b) { +; CHECK-LABEL: @cmp_excludes_zero_with_nonsplat_vec( +; CHECK-NEXT: [[C:%.*]] = icmp sge <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[A]], <2 x i8> +; CHECK-NEXT: [[AND:%.*]] = or <2 x i8> [[S]], [[B:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[AND]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %c = icmp sge <2 x i8> %a, + %s = select <2 x i1> %c, <2 x i8> %a, <2 x i8> + %and = or <2 x i8> %s, %b + %r = icmp eq <2 x i8> %and, zeroinitializer + ret <2 x i1> %r +} + +define <2 x i1> @cmp_excludes_zero_with_nonsplat_vec_wundef(<2 x i8> %a, <2 x i8> %b) { +; CHECK-LABEL: @cmp_excludes_zero_with_nonsplat_vec_wundef( +; CHECK-NEXT: [[C:%.*]] = icmp sge <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[A]], <2 x i8> +; CHECK-NEXT: [[AND:%.*]] = or <2 x i8> [[S]], [[B:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[AND]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %c = icmp sge <2 x i8> %a, + %s = select <2 x i1> %c, <2 x i8> %a, <2 x i8> + %and = or <2 x i8> %s, %b + %r = icmp eq <2 x i8> %and, zeroinitializer + ret <2 x i1> %r +} + +define <2 x i1> @cmp_excludes_zero_with_nonsplat_vec_wpoison(<2 x i8> %a, <2 x i8> %b) { +; CHECK-LABEL: @cmp_excludes_zero_with_nonsplat_vec_wpoison( +; CHECK-NEXT: [[C:%.*]] = icmp sge <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[A]], <2 x i8> +; CHECK-NEXT: [[AND:%.*]] = or <2 x i8> [[S]], [[B:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[AND]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %c = icmp sge <2 x i8> %a, + %s = select <2 x i1> %c, <2 x i8> %a, <2 x i8> + %and = or <2 x i8> %s, %b + %r = icmp eq <2 x i8> %and, zeroinitializer + ret <2 x i1> %r +} + + +define <2 x i1> @cmp_excludes_zero_with_nonsplat_vec_fail(<2 x i8> %a, <2 x i8> %b) { +; CHECK-LABEL: @cmp_excludes_zero_with_nonsplat_vec_fail( +; CHECK-NEXT: [[C:%.*]] = icmp sge <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[A]], <2 x i8> +; CHECK-NEXT: [[AND:%.*]] = or <2 x i8> [[S]], [[B:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[AND]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %c = icmp sge <2 x i8> %a, + %s = select <2 x i1> %c, <2 x i8> %a, <2 x i8> + %and = or <2 x i8> %s, %b + %r = icmp eq <2 x i8> %and, zeroinitializer + ret <2 x i1> %r +} + From dfda65c89272eb90c0377f6c15ad134fc902dab6 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Thu, 5 Oct 2023 21:16:34 -0500 Subject: [PATCH 023/720] [ValueTracking] Add support for non-splat vecs in cmpExcludesZero Just a small QOL change. --- llvm/lib/Analysis/ValueTracking.cpp | 19 ++++++++++++++++--- .../Analysis/ValueTracking/known-non-zero.ll | 6 +----- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 11b39751b542f..2b0bbe6f1f434 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -567,11 +567,24 @@ static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) { // All other predicates - rely on generic ConstantRange handling. const APInt *C; - if (!match(RHS, m_APInt(C))) + auto Zero = APInt::getZero(RHS->getType()->getScalarSizeInBits()); + if (match(RHS, m_APInt(C))) { + ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C); + return !TrueValues.contains(Zero); + } + + auto *VC = dyn_cast(RHS); + if (VC == nullptr) return false; - ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C); - return !TrueValues.contains(APInt::getZero(C->getBitWidth())); + for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem; + ++ElemIdx) { + ConstantRange TrueValues = ConstantRange::makeExactICmpRegion( + Pred, VC->getElementAsAPInt(ElemIdx)); + if (TrueValues.contains(Zero)) + return false; + } + return true; } static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) { diff --git a/llvm/test/Analysis/ValueTracking/known-non-zero.ll b/llvm/test/Analysis/ValueTracking/known-non-zero.ll index f64303f173015..dbec47ea0ae26 100644 --- a/llvm/test/Analysis/ValueTracking/known-non-zero.ll +++ b/llvm/test/Analysis/ValueTracking/known-non-zero.ll @@ -1163,11 +1163,7 @@ define i1 @sdiv_known_non_zero_fail(i8 %x, i8 %y) { define <2 x i1> @cmp_excludes_zero_with_nonsplat_vec(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @cmp_excludes_zero_with_nonsplat_vec( -; CHECK-NEXT: [[C:%.*]] = icmp sge <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[A]], <2 x i8> -; CHECK-NEXT: [[AND:%.*]] = or <2 x i8> [[S]], [[B:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[AND]], zeroinitializer -; CHECK-NEXT: ret <2 x i1> [[R]] +; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %c = icmp sge <2 x i8> %a, %s = select <2 x i1> %c, <2 x i8> %a, <2 x i8> From 444383e0d07cd5de3d60b25cf849fd0b68b6e974 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Thu, 28 Sep 2023 19:39:09 -0500 Subject: [PATCH 024/720] [ValueTracking] Do more thorough non-zero check in `isKnownToBePowerOfTwo` when `OrZero` is no set. We can cover more cases by directly checking if the result is known-nonzero for common patterns when they are missing `OrZero`. This patch add `isKnownNonZero` checks for `shl`, `lshr`, `and`, and `mul`. Differential Revision: https://reviews.llvm.org/D157309 --- llvm/lib/Analysis/ValueTracking.cpp | 25 +++++++++---------- .../ValueTracking/known-power-of-two.ll | 6 ++--- .../Transforms/InstSimplify/ctpop-pow2.ll | 5 +--- 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 2b0bbe6f1f434..82310444326d6 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2061,20 +2061,19 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); return false; case Instruction::Mul: - return OrZero && - isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) && - isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); + return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) && + isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q) && + (OrZero || isKnownNonZero(I, Depth, Q)); case Instruction::And: - if (OrZero) { - // A power of two and'd with anything is a power of two or zero. - if (isKnownToBeAPowerOfTwo(I->getOperand(1), /*OrZero*/ true, Depth, Q) || - isKnownToBeAPowerOfTwo(I->getOperand(0), /*OrZero*/ true, Depth, Q)) - return true; - // X & (-X) is always a power of two or zero. - if (match(I->getOperand(0), m_Neg(m_Specific(I->getOperand(1)))) || - match(I->getOperand(1), m_Neg(m_Specific(I->getOperand(0))))) - return true; - } + // A power of two and'd with anything is a power of two or zero. + if (OrZero && + (isKnownToBeAPowerOfTwo(I->getOperand(1), /*OrZero*/ true, Depth, Q) || + isKnownToBeAPowerOfTwo(I->getOperand(0), /*OrZero*/ true, Depth, Q))) + return true; + // X & (-X) is always a power of two or zero. + if (match(I->getOperand(0), m_Neg(m_Specific(I->getOperand(1)))) || + match(I->getOperand(1), m_Neg(m_Specific(I->getOperand(0))))) + return OrZero || isKnownNonZero(I->getOperand(0), Depth, Q); return false; case Instruction::Add: { // Adding a power-of-two or zero to the same power-of-two or zero yields diff --git a/llvm/test/Analysis/ValueTracking/known-power-of-two.ll b/llvm/test/Analysis/ValueTracking/known-power-of-two.ll index b86cf59fa2046..12fefda31aae3 100644 --- a/llvm/test/Analysis/ValueTracking/known-power-of-two.ll +++ b/llvm/test/Analysis/ValueTracking/known-power-of-two.ll @@ -584,9 +584,9 @@ define i1 @and_is_pow2(i16 %x, i16 %y) { ; CHECK-SAME: (i16 [[X:%.*]], i16 [[Y:%.*]]) { ; CHECK-NEXT: [[XNZ:%.*]] = or i16 [[X]], 4 ; CHECK-NEXT: [[X_NEG:%.*]] = sub nsw i16 0, [[XNZ]] -; CHECK-NEXT: [[XX:%.*]] = and i16 [[XNZ]], [[X_NEG]] -; CHECK-NEXT: [[AND:%.*]] = and i16 [[XX]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[AND]], [[XX]] +; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[X_NEG]], [[Y]] +; CHECK-NEXT: [[AND:%.*]] = and i16 [[TMP1]], [[XNZ]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i16 [[AND]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %xnz = or i16 %x, 4 diff --git a/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll b/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll index eae368f03ca7e..48cc8895aebbc 100644 --- a/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll +++ b/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll @@ -41,10 +41,7 @@ define i16 @ctpop_x_and_negx(i16 %x) { define i8 @ctpop_x_nz_and_negx(i8 %x) { ; CHECK-LABEL: @ctpop_x_nz_and_negx( -; CHECK-NEXT: [[X1:%.*]] = or i8 [[X:%.*]], 1 -; CHECK-NEXT: [[V0:%.*]] = sub i8 0, [[X1]] -; CHECK-NEXT: [[V1:%.*]] = and i8 [[X1]], [[V0]] -; CHECK-NEXT: ret i8 [[V1]] +; CHECK-NEXT: ret i8 1 ; %x1 = or i8 %x, 1 %v0 = sub i8 0, %x1 From 968468af9c6808fa76304deb226f13ef85403e4a Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Thu, 28 Sep 2023 19:39:13 -0500 Subject: [PATCH 025/720] [InstCombine] Tests for adding flags to shifts; NFC Differential Revision: https://reviews.llvm.org/D157531 --- .../Transforms/InstCombine/shift-flags.ll | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/shift-flags.ll diff --git a/llvm/test/Transforms/InstCombine/shift-flags.ll b/llvm/test/Transforms/InstCombine/shift-flags.ll new file mode 100644 index 0000000000000..ca1c65307559a --- /dev/null +++ b/llvm/test/Transforms/InstCombine/shift-flags.ll @@ -0,0 +1,119 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +define i8 @shl_add_nuw(i8 %amt_in, i8 %cnt_in) { +; CHECK-LABEL: @shl_add_nuw( +; CHECK-NEXT: [[AMT:%.*]] = and i8 [[AMT_IN:%.*]], 63 +; CHECK-NEXT: [[CNT:%.*]] = and i8 [[CNT_IN:%.*]], 2 +; CHECK-NEXT: [[R:%.*]] = shl i8 [[AMT]], [[CNT]] +; CHECK-NEXT: ret i8 [[R]] +; + %amt = and i8 %amt_in, 63 + %cnt = and i8 %cnt_in, 2 + %r = shl i8 %amt, %cnt + ret i8 %r +} + +define i8 @shl_add_nuw_fail(i8 %amt_in, i8 %cnt_in) { +; CHECK-LABEL: @shl_add_nuw_fail( +; CHECK-NEXT: [[AMT:%.*]] = and i8 [[AMT_IN:%.*]], 63 +; CHECK-NEXT: [[CNT:%.*]] = and i8 [[CNT_IN:%.*]], 3 +; CHECK-NEXT: [[R:%.*]] = shl i8 [[AMT]], [[CNT]] +; CHECK-NEXT: ret i8 [[R]] +; + %amt = and i8 %amt_in, 63 + %cnt = and i8 %cnt_in, 3 + %r = shl i8 %amt, %cnt + ret i8 %r +} + +define i8 @shl_add_nuw_and_nsw(i8 %amt_in, i8 %cnt_in) { +; CHECK-LABEL: @shl_add_nuw_and_nsw( +; CHECK-NEXT: [[AMT:%.*]] = and i8 [[AMT_IN:%.*]], 31 +; CHECK-NEXT: [[CNT:%.*]] = and i8 [[CNT_IN:%.*]], 2 +; CHECK-NEXT: [[R:%.*]] = shl i8 [[AMT]], [[CNT]] +; CHECK-NEXT: ret i8 [[R]] +; + %amt = and i8 %amt_in, 31 + %cnt = and i8 %cnt_in, 2 + %r = shl i8 %amt, %cnt + ret i8 %r +} + +define i8 @shl_add_nsw(i8 %amt_in, i8 %cnt_in) { +; CHECK-LABEL: @shl_add_nsw( +; CHECK-NEXT: [[AMT:%.*]] = or i8 [[AMT_IN:%.*]], -32 +; CHECK-NEXT: [[CNT:%.*]] = and i8 [[CNT_IN:%.*]], 2 +; CHECK-NEXT: [[R:%.*]] = shl i8 [[AMT]], [[CNT]] +; CHECK-NEXT: ret i8 [[R]] +; + %amt = or i8 %amt_in, 224 + %cnt = and i8 %cnt_in, 2 + %r = shl i8 %amt, %cnt + ret i8 %r +} + +define i8 @shl_add_nsw_fail(i8 %amt_in, i8 %cnt_in) { +; CHECK-LABEL: @shl_add_nsw_fail( +; CHECK-NEXT: [[AMT:%.*]] = or i8 [[AMT_IN:%.*]], -64 +; CHECK-NEXT: [[CNT:%.*]] = and i8 [[CNT_IN:%.*]], 2 +; CHECK-NEXT: [[R:%.*]] = shl i8 [[AMT]], [[CNT]] +; CHECK-NEXT: ret i8 [[R]] +; + %amt = or i8 %amt_in, 192 + %cnt = and i8 %cnt_in, 2 + %r = shl i8 %amt, %cnt + ret i8 %r +} + +define i8 @lshr_add_exact(i8 %amt_in, i8 %cnt_in) { +; CHECK-LABEL: @lshr_add_exact( +; CHECK-NEXT: [[AMT:%.*]] = and i8 [[AMT_IN:%.*]], -4 +; CHECK-NEXT: [[CNT:%.*]] = and i8 [[CNT_IN:%.*]], 2 +; CHECK-NEXT: [[R:%.*]] = lshr i8 [[AMT]], [[CNT]] +; CHECK-NEXT: ret i8 [[R]] +; + %amt = and i8 %amt_in, -4 + %cnt = and i8 %cnt_in, 2 + %r = lshr i8 %amt, %cnt + ret i8 %r +} + +define i8 @lshr_add_exact_fail(i8 %amt_in, i8 %cnt_in) { +; CHECK-LABEL: @lshr_add_exact_fail( +; CHECK-NEXT: [[AMT:%.*]] = and i8 [[AMT_IN:%.*]], -7 +; CHECK-NEXT: [[CNT:%.*]] = and i8 [[CNT_IN:%.*]], 2 +; CHECK-NEXT: [[R:%.*]] = lshr i8 [[AMT]], [[CNT]] +; CHECK-NEXT: ret i8 [[R]] +; + %amt = and i8 %amt_in, -7 + %cnt = and i8 %cnt_in, 2 + %r = lshr i8 %amt, %cnt + ret i8 %r +} + +define i8 @ashr_add_exact(i8 %amt_in, i8 %cnt_in) { +; CHECK-LABEL: @ashr_add_exact( +; CHECK-NEXT: [[AMT:%.*]] = and i8 [[AMT_IN:%.*]], -14 +; CHECK-NEXT: [[CNT:%.*]] = and i8 [[CNT_IN:%.*]], 1 +; CHECK-NEXT: [[R:%.*]] = ashr i8 [[AMT]], [[CNT]] +; CHECK-NEXT: ret i8 [[R]] +; + %amt = and i8 %amt_in, -14 + %cnt = and i8 %cnt_in, 1 + %r = ashr i8 %amt, %cnt + ret i8 %r +} + +define i8 @ashr_add_exact_fail(i8 %amt_in, i8 %cnt_in) { +; CHECK-LABEL: @ashr_add_exact_fail( +; CHECK-NEXT: [[AMT:%.*]] = and i8 [[AMT_IN:%.*]], -14 +; CHECK-NEXT: [[CNT:%.*]] = and i8 [[CNT_IN:%.*]], 2 +; CHECK-NEXT: [[R:%.*]] = ashr i8 [[AMT]], [[CNT]] +; CHECK-NEXT: ret i8 [[R]] +; + %amt = and i8 %amt_in, -14 + %cnt = and i8 %cnt_in, 2 + %r = ashr i8 %amt, %cnt + ret i8 %r +} From 2dd52b4527667837cc525aa48435ab5cbfa30a0b Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Thu, 28 Sep 2023 19:39:15 -0500 Subject: [PATCH 026/720] [InstCombine] Improve logic for adding flags to shift instructions. Instead of relying on constant operands, use known bits to do the computation. Proofs: https://alive2.llvm.org/ce/z/M-aBnw Differential Revision: https://reviews.llvm.org/D157532 --- .../InstCombine/InstCombineShifts.cpp | 93 +++++++++++++------ .../ValueTracking/known-power-of-two.ll | 30 +++--- .../Transforms/InstCombine/and-add-shl.ll | 2 +- ...undant-left-shift-input-masking-pr49778.ll | 4 +- llvm/test/Transforms/InstCombine/rotate.ll | 2 +- .../Transforms/InstCombine/shift-flags.ll | 10 +- .../InstCombine/trunc-inseltpoison.ll | 6 +- llvm/test/Transforms/InstCombine/trunc.ll | 6 +- 8 files changed, 95 insertions(+), 58 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 83defd5816f59..e178f9536b69f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -941,6 +941,60 @@ Instruction *InstCombinerImpl::foldLShrOverflowBit(BinaryOperator &I) { return new ZExtInst(Overflow, Ty); } +// Try to set nuw/nsw flags on shl or exact flag on lshr/ashr using knownbits. +static bool setShiftFlags(BinaryOperator &I, const SimplifyQuery &Q) { + assert(I.isShift() && "Expected a shift as input"); + // We already have all the flags. + if (I.getOpcode() == Instruction::Shl) { + if (I.hasNoUnsignedWrap() && I.hasNoSignedWrap()) + return false; + } else { + if (I.isExact()) + return false; + } + + // Compute what we know about shift count. + KnownBits KnownCnt = + computeKnownBits(I.getOperand(1), Q.DL, /*Depth*/ 0, Q.AC, Q.CxtI, Q.DT); + // If we know nothing about shift count or its a poison shift, we won't be + // able to prove anything so return before computing shift amount. + if (KnownCnt.isUnknown()) + return false; + unsigned BitWidth = KnownCnt.getBitWidth(); + APInt MaxCnt = KnownCnt.getMaxValue(); + if (MaxCnt.uge(BitWidth)) + return false; + + KnownBits KnownAmt = + computeKnownBits(I.getOperand(0), Q.DL, /*Depth*/ 0, Q.AC, Q.CxtI, Q.DT); + bool Changed = false; + + if (I.getOpcode() == Instruction::Shl) { + // If we have as many leading zeros than maximum shift cnt we have nuw. + if (!I.hasNoUnsignedWrap() && MaxCnt.ule(KnownAmt.countMinLeadingZeros())) { + I.setHasNoUnsignedWrap(); + Changed = true; + } + // If we have more sign bits than maximum shift cnt we have nsw. + if (!I.hasNoSignedWrap()) { + if (MaxCnt.ult(KnownAmt.countMinSignBits()) || + MaxCnt.ult(ComputeNumSignBits(I.getOperand(0), Q.DL, /*Depth*/ 0, + Q.AC, Q.CxtI, Q.DT))) { + I.setHasNoSignedWrap(); + Changed = true; + } + } + return Changed; + } + + // If we have at least as many trailing zeros as maximum count then we have + // exact. + Changed = MaxCnt.ule(KnownAmt.countMinTrailingZeros()); + I.setIsExact(Changed); + + return Changed; +} + Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { const SimplifyQuery Q = SQ.getWithInstruction(&I); @@ -1121,22 +1175,11 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { Value *NewShift = Builder.CreateShl(X, Op1); return BinaryOperator::CreateSub(NewLHS, NewShift); } - - // If the shifted-out value is known-zero, then this is a NUW shift. - if (!I.hasNoUnsignedWrap() && - MaskedValueIsZero(Op0, APInt::getHighBitsSet(BitWidth, ShAmtC), 0, - &I)) { - I.setHasNoUnsignedWrap(); - return &I; - } - - // If the shifted-out value is all signbits, then this is a NSW shift. - if (!I.hasNoSignedWrap() && ComputeNumSignBits(Op0, 0, &I) > ShAmtC) { - I.setHasNoSignedWrap(); - return &I; - } } + if (setShiftFlags(I, Q)) + return &I; + // Transform (x >> y) << y to x & (-1 << y) // Valid for any type of right-shift. Value *X; @@ -1427,15 +1470,12 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { Value *And = Builder.CreateAnd(BoolX, BoolY); return new ZExtInst(And, Ty); } - - // If the shifted-out value is known-zero, then this is an exact shift. - if (!I.isExact() && - MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmtC), 0, &I)) { - I.setIsExact(); - return &I; - } } + const SimplifyQuery Q = SQ.getWithInstruction(&I); + if (setShiftFlags(I, Q)) + return &I; + // Transform (x << y) >> y to x & (-1 >> y) if (match(Op0, m_OneUse(m_Shl(m_Value(X), m_Specific(Op1))))) { Constant *AllOnes = ConstantInt::getAllOnesValue(Ty); @@ -1594,15 +1634,12 @@ Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) { if (match(Op0, m_OneUse(m_NSWSub(m_Value(X), m_Value(Y))))) return new SExtInst(Builder.CreateICmpSLT(X, Y), Ty); } - - // If the shifted-out value is known-zero, then this is an exact shift. - if (!I.isExact() && - MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmt), 0, &I)) { - I.setIsExact(); - return &I; - } } + const SimplifyQuery Q = SQ.getWithInstruction(&I); + if (setShiftFlags(I, Q)) + return &I; + // Prefer `-(x & 1)` over `(x << (bitwidth(x)-1)) a>> (bitwidth(x)-1)` // as the pattern to splat the lowest bit. // FIXME: iff X is already masked, we don't need the one-use check. diff --git a/llvm/test/Analysis/ValueTracking/known-power-of-two.ll b/llvm/test/Analysis/ValueTracking/known-power-of-two.ll index 12fefda31aae3..7bcf96065a69d 100644 --- a/llvm/test/Analysis/ValueTracking/known-power-of-two.ll +++ b/llvm/test/Analysis/ValueTracking/known-power-of-two.ll @@ -413,11 +413,11 @@ define i1 @mul_is_pow2(i16 %x, i16 %y, i16 %z) { ; CHECK-SAME: (i16 [[X:%.*]], i16 [[Y:%.*]], i16 [[Z:%.*]]) { ; CHECK-NEXT: [[XSMALL:%.*]] = and i16 [[X]], 3 ; CHECK-NEXT: [[ZSMALL:%.*]] = and i16 [[Z]], 3 -; CHECK-NEXT: [[XP2:%.*]] = shl i16 4, [[XSMALL]] -; CHECK-NEXT: [[ZP2:%.*]] = shl i16 2, [[ZSMALL]] -; CHECK-NEXT: [[XX:%.*]] = mul nuw nsw i16 [[XP2]], [[ZP2]] +; CHECK-NEXT: [[ZP2:%.*]] = shl nuw nsw i16 2, [[ZSMALL]] +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i16 [[XSMALL]], 2 +; CHECK-NEXT: [[XX:%.*]] = shl nuw nsw i16 [[ZP2]], [[TMP1]] ; CHECK-NEXT: [[AND:%.*]] = and i16 [[XX]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[AND]], [[XX]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i16 [[AND]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %xsmall = and i16 %x, 3 @@ -436,9 +436,9 @@ define i1 @mul_is_pow2_fail(i16 %x, i16 %y, i16 %z) { ; CHECK-SAME: (i16 [[X:%.*]], i16 [[Y:%.*]], i16 [[Z:%.*]]) { ; CHECK-NEXT: [[XSMALL:%.*]] = and i16 [[X]], 7 ; CHECK-NEXT: [[ZSMALL:%.*]] = and i16 [[Z]], 7 -; CHECK-NEXT: [[XP2:%.*]] = shl i16 4, [[XSMALL]] -; CHECK-NEXT: [[ZP2:%.*]] = shl i16 2, [[ZSMALL]] -; CHECK-NEXT: [[XX:%.*]] = mul i16 [[XP2]], [[ZP2]] +; CHECK-NEXT: [[ZP2:%.*]] = shl nuw nsw i16 2, [[ZSMALL]] +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i16 [[XSMALL]], 2 +; CHECK-NEXT: [[XX:%.*]] = shl i16 [[ZP2]], [[TMP1]] ; CHECK-NEXT: [[AND:%.*]] = and i16 [[XX]], [[Y]] ; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[AND]], [[XX]] ; CHECK-NEXT: ret i1 [[R]] @@ -459,9 +459,9 @@ define i1 @mul_is_pow2_fail2(i16 %x, i16 %y, i16 %z) { ; CHECK-SAME: (i16 [[X:%.*]], i16 [[Y:%.*]], i16 [[Z:%.*]]) { ; CHECK-NEXT: [[XSMALL:%.*]] = and i16 [[X]], 3 ; CHECK-NEXT: [[ZSMALL:%.*]] = and i16 [[Z]], 3 -; CHECK-NEXT: [[XP2:%.*]] = shl i16 3, [[XSMALL]] -; CHECK-NEXT: [[ZP2:%.*]] = shl i16 2, [[ZSMALL]] -; CHECK-NEXT: [[XX:%.*]] = mul nuw nsw i16 [[XP2]], [[ZP2]] +; CHECK-NEXT: [[XP2:%.*]] = shl nuw nsw i16 3, [[XSMALL]] +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i16 [[ZSMALL]], 1 +; CHECK-NEXT: [[XX:%.*]] = shl nuw nsw i16 [[XP2]], [[TMP1]] ; CHECK-NEXT: [[AND:%.*]] = and i16 [[XX]], [[Y]] ; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[AND]], [[XX]] ; CHECK-NEXT: ret i1 [[R]] @@ -481,9 +481,9 @@ define i1 @shl_is_pow2(i16 %x, i16 %y) { ; CHECK-LABEL: define i1 @shl_is_pow2 ; CHECK-SAME: (i16 [[X:%.*]], i16 [[Y:%.*]]) { ; CHECK-NEXT: [[XSMALL:%.*]] = and i16 [[X]], 7 -; CHECK-NEXT: [[XX:%.*]] = shl i16 4, [[XSMALL]] +; CHECK-NEXT: [[XX:%.*]] = shl nuw nsw i16 4, [[XSMALL]] ; CHECK-NEXT: [[AND:%.*]] = and i16 [[XX]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[AND]], [[XX]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i16 [[AND]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %xsmall = and i16 %x, 7 @@ -515,7 +515,7 @@ define i1 @shl_is_pow2_fail2(i16 %x, i16 %y) { ; CHECK-LABEL: define i1 @shl_is_pow2_fail2 ; CHECK-SAME: (i16 [[X:%.*]], i16 [[Y:%.*]]) { ; CHECK-NEXT: [[XSMALL:%.*]] = and i16 [[X]], 7 -; CHECK-NEXT: [[XX:%.*]] = shl i16 5, [[XSMALL]] +; CHECK-NEXT: [[XX:%.*]] = shl nuw nsw i16 5, [[XSMALL]] ; CHECK-NEXT: [[AND:%.*]] = and i16 [[XX]], [[Y]] ; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[AND]], [[XX]] ; CHECK-NEXT: ret i1 [[R]] @@ -532,9 +532,9 @@ define i1 @lshr_is_pow2(i16 %x, i16 %y) { ; CHECK-LABEL: define i1 @lshr_is_pow2 ; CHECK-SAME: (i16 [[X:%.*]], i16 [[Y:%.*]]) { ; CHECK-NEXT: [[XSMALL:%.*]] = and i16 [[X]], 7 -; CHECK-NEXT: [[XX:%.*]] = lshr i16 512, [[XSMALL]] +; CHECK-NEXT: [[XX:%.*]] = lshr exact i16 512, [[XSMALL]] ; CHECK-NEXT: [[AND:%.*]] = and i16 [[XX]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[AND]], [[XX]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i16 [[AND]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %xsmall = and i16 %x, 7 diff --git a/llvm/test/Transforms/InstCombine/and-add-shl.ll b/llvm/test/Transforms/InstCombine/and-add-shl.ll index 28778f34137e0..92b3a8144d62c 100644 --- a/llvm/test/Transforms/InstCombine/and-add-shl.ll +++ b/llvm/test/Transforms/InstCombine/and-add-shl.ll @@ -29,7 +29,7 @@ define i8 @and_not_shl(i8 %x) { ; CHECK-SAME: (i8 [[X:%.*]]) { ; CHECK-NEXT: [[OP1_P2:%.*]] = icmp ult i8 [[X]], 6 ; CHECK-NEXT: call void @llvm.assume(i1 [[OP1_P2]]) -; CHECK-NEXT: [[SHIFT:%.*]] = shl i8 -1, [[X]] +; CHECK-NEXT: [[SHIFT:%.*]] = shl nsw i8 -1, [[X]] ; CHECK-NEXT: [[NOT:%.*]] = and i8 [[SHIFT]], 32 ; CHECK-NEXT: [[R:%.*]] = xor i8 [[NOT]], 32 ; CHECK-NEXT: ret i8 [[R]] diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-pr49778.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-pr49778.ll index 96dc6c68f4d4f..b06a90e2cd99b 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-pr49778.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-pr49778.ll @@ -5,10 +5,10 @@ define i32 @src(i1 %x2) { ; CHECK-LABEL: @src( ; CHECK-NEXT: [[X13:%.*]] = zext i1 [[X2:%.*]] to i32 -; CHECK-NEXT: [[_7:%.*]] = shl i32 -1, [[X13]] +; CHECK-NEXT: [[_7:%.*]] = shl nsw i32 -1, [[X13]] ; CHECK-NEXT: [[MASK:%.*]] = xor i32 [[_7]], -1 ; CHECK-NEXT: [[_8:%.*]] = and i32 [[MASK]], [[X13]] -; CHECK-NEXT: [[_9:%.*]] = shl i32 [[_8]], [[X13]] +; CHECK-NEXT: [[_9:%.*]] = shl nuw nsw i32 [[_8]], [[X13]] ; CHECK-NEXT: ret i32 [[_9]] ; %x13 = zext i1 %x2 to i32 diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll index fece47534819e..ed5145255b2f0 100644 --- a/llvm/test/Transforms/InstCombine/rotate.ll +++ b/llvm/test/Transforms/InstCombine/rotate.ll @@ -705,7 +705,7 @@ define i9 @rotateleft_9_neg_mask_wide_amount_commute(i9 %v, i33 %shamt) { ; CHECK-NEXT: [[LSHAMT:%.*]] = and i33 [[SHAMT]], 8 ; CHECK-NEXT: [[RSHAMT:%.*]] = and i33 [[NEG]], 8 ; CHECK-NEXT: [[CONV:%.*]] = zext i9 [[V:%.*]] to i33 -; CHECK-NEXT: [[SHL:%.*]] = shl i33 [[CONV]], [[LSHAMT]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw nsw i33 [[CONV]], [[LSHAMT]] ; CHECK-NEXT: [[SHR:%.*]] = lshr i33 [[CONV]], [[RSHAMT]] ; CHECK-NEXT: [[OR:%.*]] = or i33 [[SHL]], [[SHR]] ; CHECK-NEXT: [[RET:%.*]] = trunc i33 [[OR]] to i9 diff --git a/llvm/test/Transforms/InstCombine/shift-flags.ll b/llvm/test/Transforms/InstCombine/shift-flags.ll index ca1c65307559a..08cf4821d85b4 100644 --- a/llvm/test/Transforms/InstCombine/shift-flags.ll +++ b/llvm/test/Transforms/InstCombine/shift-flags.ll @@ -5,7 +5,7 @@ define i8 @shl_add_nuw(i8 %amt_in, i8 %cnt_in) { ; CHECK-LABEL: @shl_add_nuw( ; CHECK-NEXT: [[AMT:%.*]] = and i8 [[AMT_IN:%.*]], 63 ; CHECK-NEXT: [[CNT:%.*]] = and i8 [[CNT_IN:%.*]], 2 -; CHECK-NEXT: [[R:%.*]] = shl i8 [[AMT]], [[CNT]] +; CHECK-NEXT: [[R:%.*]] = shl nuw i8 [[AMT]], [[CNT]] ; CHECK-NEXT: ret i8 [[R]] ; %amt = and i8 %amt_in, 63 @@ -31,7 +31,7 @@ define i8 @shl_add_nuw_and_nsw(i8 %amt_in, i8 %cnt_in) { ; CHECK-LABEL: @shl_add_nuw_and_nsw( ; CHECK-NEXT: [[AMT:%.*]] = and i8 [[AMT_IN:%.*]], 31 ; CHECK-NEXT: [[CNT:%.*]] = and i8 [[CNT_IN:%.*]], 2 -; CHECK-NEXT: [[R:%.*]] = shl i8 [[AMT]], [[CNT]] +; CHECK-NEXT: [[R:%.*]] = shl nuw nsw i8 [[AMT]], [[CNT]] ; CHECK-NEXT: ret i8 [[R]] ; %amt = and i8 %amt_in, 31 @@ -44,7 +44,7 @@ define i8 @shl_add_nsw(i8 %amt_in, i8 %cnt_in) { ; CHECK-LABEL: @shl_add_nsw( ; CHECK-NEXT: [[AMT:%.*]] = or i8 [[AMT_IN:%.*]], -32 ; CHECK-NEXT: [[CNT:%.*]] = and i8 [[CNT_IN:%.*]], 2 -; CHECK-NEXT: [[R:%.*]] = shl i8 [[AMT]], [[CNT]] +; CHECK-NEXT: [[R:%.*]] = shl nsw i8 [[AMT]], [[CNT]] ; CHECK-NEXT: ret i8 [[R]] ; %amt = or i8 %amt_in, 224 @@ -70,7 +70,7 @@ define i8 @lshr_add_exact(i8 %amt_in, i8 %cnt_in) { ; CHECK-LABEL: @lshr_add_exact( ; CHECK-NEXT: [[AMT:%.*]] = and i8 [[AMT_IN:%.*]], -4 ; CHECK-NEXT: [[CNT:%.*]] = and i8 [[CNT_IN:%.*]], 2 -; CHECK-NEXT: [[R:%.*]] = lshr i8 [[AMT]], [[CNT]] +; CHECK-NEXT: [[R:%.*]] = lshr exact i8 [[AMT]], [[CNT]] ; CHECK-NEXT: ret i8 [[R]] ; %amt = and i8 %amt_in, -4 @@ -96,7 +96,7 @@ define i8 @ashr_add_exact(i8 %amt_in, i8 %cnt_in) { ; CHECK-LABEL: @ashr_add_exact( ; CHECK-NEXT: [[AMT:%.*]] = and i8 [[AMT_IN:%.*]], -14 ; CHECK-NEXT: [[CNT:%.*]] = and i8 [[CNT_IN:%.*]], 1 -; CHECK-NEXT: [[R:%.*]] = ashr i8 [[AMT]], [[CNT]] +; CHECK-NEXT: [[R:%.*]] = ashr exact i8 [[AMT]], [[CNT]] ; CHECK-NEXT: ret i8 [[R]] ; %amt = and i8 %amt_in, -14 diff --git a/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll b/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll index ac0115a0f5715..b5dcb9b67d676 100644 --- a/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll @@ -345,7 +345,7 @@ define i64 @test11(i32 %A, i32 %B) { ; CHECK-NEXT: [[C:%.*]] = zext i32 [[A:%.*]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], 31 ; CHECK-NEXT: [[E:%.*]] = zext i32 [[TMP1]] to i64 -; CHECK-NEXT: [[F:%.*]] = shl i64 [[C]], [[E]] +; CHECK-NEXT: [[F:%.*]] = shl nuw nsw i64 [[C]], [[E]] ; CHECK-NEXT: ret i64 [[F]] ; %C = zext i32 %A to i128 @@ -361,7 +361,7 @@ define <2 x i64> @test11_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], ; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -; CHECK-NEXT: [[F:%.*]] = shl <2 x i64> [[C]], [[E]] +; CHECK-NEXT: [[F:%.*]] = shl nuw nsw <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[F]] ; %C = zext <2 x i32> %A to <2 x i128> @@ -377,7 +377,7 @@ define <2 x i64> @test11_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) { ; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], ; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -; CHECK-NEXT: [[F:%.*]] = shl <2 x i64> [[C]], [[E]] +; CHECK-NEXT: [[F:%.*]] = shl nuw nsw <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[F]] ; %C = zext <2 x i32> %A to <2 x i128> diff --git a/llvm/test/Transforms/InstCombine/trunc.ll b/llvm/test/Transforms/InstCombine/trunc.ll index e04bcaf073b64..33baee858493a 100644 --- a/llvm/test/Transforms/InstCombine/trunc.ll +++ b/llvm/test/Transforms/InstCombine/trunc.ll @@ -345,7 +345,7 @@ define i64 @test11(i32 %A, i32 %B) { ; CHECK-NEXT: [[C:%.*]] = zext i32 [[A:%.*]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], 31 ; CHECK-NEXT: [[E:%.*]] = zext i32 [[TMP1]] to i64 -; CHECK-NEXT: [[F:%.*]] = shl i64 [[C]], [[E]] +; CHECK-NEXT: [[F:%.*]] = shl nuw nsw i64 [[C]], [[E]] ; CHECK-NEXT: ret i64 [[F]] ; %C = zext i32 %A to i128 @@ -361,7 +361,7 @@ define <2 x i64> @test11_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], ; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -; CHECK-NEXT: [[F:%.*]] = shl <2 x i64> [[C]], [[E]] +; CHECK-NEXT: [[F:%.*]] = shl nuw nsw <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[F]] ; %C = zext <2 x i32> %A to <2 x i128> @@ -377,7 +377,7 @@ define <2 x i64> @test11_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) { ; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], ; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -; CHECK-NEXT: [[F:%.*]] = shl <2 x i64> [[C]], [[E]] +; CHECK-NEXT: [[F:%.*]] = shl nuw nsw <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[F]] ; %C = zext <2 x i32> %A to <2 x i128> From 403e0e8cd95f21d5f94f1e0663c2cfe48e54bf08 Mon Sep 17 00:00:00 2001 From: isuckatcs <65320245+isuckatcs@users.noreply.github.com> Date: Thu, 12 Oct 2023 23:26:44 +0200 Subject: [PATCH 027/720] [clang][Interp] Fix crash during `InterpStack` printing (#68246) `InterpStack` is using an `std::vector<>` to track the `ItemTypes`. As a result, the new types are inserted to the back of the `std::vector<>`, however `dump()` was reading the types from the front (the bottom of the stack) and printing the value on the top of the stack. This lead to a crash if the type on the bottom had a different type from the type on the top. E.g.: ``` Items: 2. Size: 40 0/8: 0 1/40: 0x5590cddc0460 {16, 16, 32} ``` The same method also miscalculated the offsets during printing the stack, which was a source of incorrect stack dumps and future crashes. This patch changes the order of iteration of the types and fixes the offset calculation. As for testing the change, the issue is that it needs to be done as a unittest, however from `clang/unittests` we don't have access to `clang/lib`, where `Interp` resides. Although the previous implementation didn't have unittests either, so I'm not sure if we actually care that much or not. --- clang/lib/AST/Interp/InterpStack.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/clang/lib/AST/Interp/InterpStack.cpp b/clang/lib/AST/Interp/InterpStack.cpp index 18a34079c3b16..91fe40feb7671 100644 --- a/clang/lib/AST/Interp/InterpStack.cpp +++ b/clang/lib/AST/Interp/InterpStack.cpp @@ -86,20 +86,25 @@ void InterpStack::shrink(size_t Size) { void InterpStack::dump() const { #ifndef NDEBUG - llvm::errs() << "Items: " << ItemTypes.size() << ". Size: " << size() << "\n"; + llvm::errs() << "Items: " << ItemTypes.size() << ". Size: " << size() << '\n'; if (ItemTypes.empty()) return; size_t Index = 0; - size_t Offset = align(primSize(ItemTypes[0])); - for (PrimType Ty : ItemTypes) { - llvm::errs() << Index << "/" << Offset << ": "; - TYPE_SWITCH(Ty, { + size_t Offset = 0; + + // The type of the item on the top of the stack is inserted to the back + // of the vector, so the iteration has to happen backwards. + for (auto TyIt = ItemTypes.rbegin(); TyIt != ItemTypes.rend(); ++TyIt) { + Offset += align(primSize(*TyIt)); + + llvm::errs() << Index << '/' << Offset << ": "; + TYPE_SWITCH(*TyIt, { const T &V = peek(Offset); llvm::errs() << V; }); - llvm::errs() << "\n"; - Offset += align(primSize(Ty)); + llvm::errs() << '\n'; + ++Index; } #endif From b3a39a9bdb6b3300c872e0229fadbaac430649c1 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 12 Oct 2023 14:30:33 -0700 Subject: [PATCH 028/720] [libc++] Check formatting with clang-format 17 (#68928) This updates the clang-format we use in libc++ to 17. This is necessary to start running the generated-files checks in GitHub Actions (in #68920). In fact this is a pre-existing issue regardless of #68920 -- right now our ignore_format.txt job disagrees with the LLVM-wide clang-format job. --- libcxx/include/__concepts/swappable.h | 2 +- libcxx/include/__ranges/to.h | 6 +- libcxx/include/__type_traits/promote.h | 2 +- .../implicit_deduction_guides.pass.cpp | 267 +++++++++--------- .../implicit_deduction_guides.pass.cpp | 7 +- libcxx/test/support/counting_projection.h | 2 +- libcxx/utils/ci/buildkite-pipeline.yml | 2 +- libcxx/utils/data/ignore_format.txt | 6 - 8 files changed, 145 insertions(+), 149 deletions(-) diff --git a/libcxx/include/__concepts/swappable.h b/libcxx/include/__concepts/swappable.h index c1969de34510a..cdffe34205557 100644 --- a/libcxx/include/__concepts/swappable.h +++ b/libcxx/include/__concepts/swappable.h @@ -92,7 +92,7 @@ struct __fn { // 2.3 Otherwise, if `E1` and `E2` are lvalues of the same type `T` that models... template <__exchangeable _Tp> _LIBCPP_HIDE_FROM_ABI constexpr void operator()(_Tp& __x, _Tp& __y) const - noexcept(is_nothrow_move_constructible_v<_Tp>&& is_nothrow_move_assignable_v<_Tp>) { + noexcept(is_nothrow_move_constructible_v<_Tp> && is_nothrow_move_assignable_v<_Tp>) { __y = _VSTD::exchange(__x, _VSTD::move(__y)); } }; diff --git a/libcxx/include/__ranges/to.h b/libcxx/include/__ranges/to.h index a519662e759e1..cf162100ee46b 100644 --- a/libcxx/include/__ranges/to.h +++ b/libcxx/include/__ranges/to.h @@ -207,13 +207,11 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr auto to(_Args&&... __args) static_assert( !is_volatile_v<_Container>, "The target container cannot be volatile-qualified, please remove the volatile"); - auto __to_func = [](_Range && __range, _Tail && ... __tail) + auto __to_func = [](_Range&& __range, _Tail&&... __tail) requires requires { // /**/ ranges::to<_Container>(std::forward<_Range>(__range), std::forward<_Tail>(__tail)...); } - { - return ranges::to<_Container>(std::forward<_Range>(__range), std::forward<_Tail>(__tail)...); - }; + { return ranges::to<_Container>(std::forward<_Range>(__range), std::forward<_Tail>(__tail)...); }; return __range_adaptor_closure_t(std::__bind_back(__to_func, std::forward<_Args>(__args)...)); } diff --git a/libcxx/include/__type_traits/promote.h b/libcxx/include/__type_traits/promote.h index 018bd69df2604..e22b4a422c2c8 100644 --- a/libcxx/include/__type_traits/promote.h +++ b/libcxx/include/__type_traits/promote.h @@ -50,7 +50,7 @@ struct __numeric_type { template ::value&& __numeric_type<_A2>::value&& __numeric_type<_A3>::value> + bool = __numeric_type<_A1>::value && __numeric_type<_A2>::value && __numeric_type<_A3>::value> class __promote_imp { public: static const bool value = false; diff --git a/libcxx/test/std/strings/basic.string/string.cons/implicit_deduction_guides.pass.cpp b/libcxx/test/std/strings/basic.string/string.cons/implicit_deduction_guides.pass.cpp index e0fa234639bd0..f9bd18a663d7d 100644 --- a/libcxx/test/std/strings/basic.string/string.cons/implicit_deduction_guides.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.cons/implicit_deduction_guides.pass.cpp @@ -48,15 +48,17 @@ using BStr = std::basic_string, Alloc>; TEST_CONSTEXPR_CXX20 bool test() { using TestSizeT = test_allocator::size_type; { - // Testing (1) - // Nothing to do. Cannot deduce without any arguments. - } { - // Testing (2) - // This overload isn't compatible with implicit deduction guides as - // specified in the standard. - // const test_allocator alloc{}; - // std::basic_string s(alloc); - } { // Testing (3) w/o allocator + // Testing (1) + // Nothing to do. Cannot deduce without any arguments. + } + { + // Testing (2) + // This overload isn't compatible with implicit deduction guides as + // specified in the standard. + // const test_allocator alloc{}; + // std::basic_string s(alloc); + } + { // Testing (3) w/o allocator std::basic_string s(6ull, 'a'); ASSERT_SAME_TYPE(decltype(s), std::string); assert(s == "aaaaaa"); @@ -187,162 +189,163 @@ TEST_CONSTEXPR_CXX20 bool test() { ASSERT_SAME_TYPE(decltype(s), std::string); assert(s == "abc"); } - {// (8) w/ allocator - {using Expect = std::basic_string, test_allocator>; - using It = cpp17_input_iterator; - const char* input = "abcdef"; - std::basic_string s(It(input), It(input + 3), test_allocator{}); - ASSERT_SAME_TYPE(decltype(s), Expect); - assert(s == "abc"); -} + { // (8) w/ allocator + { + using Expect = std::basic_string, test_allocator>; + using It = cpp17_input_iterator; + const char* input = "abcdef"; + std::basic_string s(It(input), It(input + 3), test_allocator{}); + ASSERT_SAME_TYPE(decltype(s), Expect); + assert(s == "abc"); + } #ifndef TEST_HAS_NO_WIDE_CHARACTERS -{ - using ExpectW = std::basic_string, test_allocator>; - using It = cpp17_input_iterator; - const wchar_t* input = L"abcdef"; - std::basic_string s(It(input), It(input + 3), test_allocator{}); - ASSERT_SAME_TYPE(decltype(s), ExpectW); - assert(s == L"abc"); -} + { + using ExpectW = std::basic_string, test_allocator>; + using It = cpp17_input_iterator; + const wchar_t* input = L"abcdef"; + std::basic_string s(It(input), It(input + 3), test_allocator{}); + ASSERT_SAME_TYPE(decltype(s), ExpectW); + assert(s == L"abc"); + } #endif -} -{ // Testing (9) - const std::string sin("abc"); - std::basic_string s(sin); - ASSERT_SAME_TYPE(decltype(s), std::string); - assert(s == "abc"); + } + { // Testing (9) + const std::string sin("abc"); + std::basic_string s(sin); + ASSERT_SAME_TYPE(decltype(s), std::string); + assert(s == "abc"); #ifndef TEST_HAS_NO_WIDE_CHARACTERS - using WStr = std::basic_string, test_allocator>; - const WStr win(L"abcdef"); - std::basic_string w(win); - ASSERT_SAME_TYPE(decltype(w), WStr); - assert(w == L"abcdef"); + using WStr = std::basic_string, test_allocator>; + const WStr win(L"abcdef"); + std::basic_string w(win); + ASSERT_SAME_TYPE(decltype(w), WStr); + assert(w == L"abcdef"); #endif -} -{ // Testing (10) - const std::string sin("abc"); - std::basic_string s(sin, std::allocator{}); - ASSERT_SAME_TYPE(decltype(s), std::string); - assert(s == "abc"); + } + { // Testing (10) + const std::string sin("abc"); + std::basic_string s(sin, std::allocator{}); + ASSERT_SAME_TYPE(decltype(s), std::string); + assert(s == "abc"); #ifndef TEST_HAS_NO_WIDE_CHARACTERS - using WStr = std::basic_string, test_allocator>; - const WStr win(L"abcdef"); - std::basic_string w(win, test_allocator{}); - ASSERT_SAME_TYPE(decltype(w), WStr); - assert(w == L"abcdef"); + using WStr = std::basic_string, test_allocator>; + const WStr win(L"abcdef"); + std::basic_string w(win, test_allocator{}); + ASSERT_SAME_TYPE(decltype(w), WStr); + assert(w == L"abcdef"); #endif -} -{ // Testing (11) - std::string sin("abc"); - std::basic_string s(std::move(sin)); - ASSERT_SAME_TYPE(decltype(s), std::string); - assert(s == "abc"); + } + { // Testing (11) + std::string sin("abc"); + std::basic_string s(std::move(sin)); + ASSERT_SAME_TYPE(decltype(s), std::string); + assert(s == "abc"); #ifndef TEST_HAS_NO_WIDE_CHARACTERS - using WStr = std::basic_string, test_allocator>; - WStr win(L"abcdef"); - std::basic_string w(std::move(win)); - ASSERT_SAME_TYPE(decltype(w), WStr); - assert(w == L"abcdef"); + using WStr = std::basic_string, test_allocator>; + WStr win(L"abcdef"); + std::basic_string w(std::move(win)); + ASSERT_SAME_TYPE(decltype(w), WStr); + assert(w == L"abcdef"); #endif -} -{ // Testing (12) - std::string sin("abc"); - std::basic_string s(std::move(sin), std::allocator{}); - ASSERT_SAME_TYPE(decltype(s), std::string); - assert(s == "abc"); + } + { // Testing (12) + std::string sin("abc"); + std::basic_string s(std::move(sin), std::allocator{}); + ASSERT_SAME_TYPE(decltype(s), std::string); + assert(s == "abc"); #ifndef TEST_HAS_NO_WIDE_CHARACTERS - using WStr = std::basic_string, test_allocator>; - WStr win(L"abcdef"); - std::basic_string w(std::move(win), test_allocator{}); - ASSERT_SAME_TYPE(decltype(w), WStr); - assert(w == L"abcdef"); + using WStr = std::basic_string, test_allocator>; + WStr win(L"abcdef"); + std::basic_string w(std::move(win), test_allocator{}); + ASSERT_SAME_TYPE(decltype(w), WStr); + assert(w == L"abcdef"); #endif -} -{ // Testing (13) w/o allocator - std::basic_string s({'a', 'b', 'c'}); - ASSERT_SAME_TYPE(decltype(s), std::string); - assert(s == "abc"); + } + { // Testing (13) w/o allocator + std::basic_string s({'a', 'b', 'c'}); + ASSERT_SAME_TYPE(decltype(s), std::string); + assert(s == "abc"); #ifndef TEST_HAS_NO_WIDE_CHARACTERS - std::basic_string w({L'a', L'b', L'c'}); - ASSERT_SAME_TYPE(decltype(w), std::wstring); - assert(w == L"abc"); + std::basic_string w({L'a', L'b', L'c'}); + ASSERT_SAME_TYPE(decltype(w), std::wstring); + assert(w == L"abc"); #endif -} -{ // Testing (13) w/ allocator - std::basic_string s({'a', 'b', 'c'}, test_allocator{}); - ASSERT_SAME_TYPE(decltype(s), BStr>); - assert(s == "abc"); + } + { // Testing (13) w/ allocator + std::basic_string s({'a', 'b', 'c'}, test_allocator{}); + ASSERT_SAME_TYPE(decltype(s), BStr>); + assert(s == "abc"); #ifndef TEST_HAS_NO_WIDE_CHARACTERS - std::basic_string w({L'a', L'b', L'c'}, test_allocator{}); - ASSERT_SAME_TYPE(decltype(w), BStr>); - assert(w == L"abc"); + std::basic_string w({L'a', L'b', L'c'}, test_allocator{}); + ASSERT_SAME_TYPE(decltype(w), BStr>); + assert(w == L"abc"); #endif -} -{ // Testing (14) w/o allocator - std::string_view sv("abc"); - std::basic_string s(sv); - ASSERT_SAME_TYPE(decltype(s), std::string); - assert(s == "abc"); + } + { // Testing (14) w/o allocator + std::string_view sv("abc"); + std::basic_string s(sv); + ASSERT_SAME_TYPE(decltype(s), std::string); + assert(s == "abc"); #ifndef TEST_HAS_NO_WIDE_CHARACTERS - using Expect = std::basic_string>; - std::basic_string_view> BSV(L"abcdef"); - std::basic_string w(BSV); - ASSERT_SAME_TYPE(decltype(w), Expect); - assert(w == L"abcdef"); + using Expect = std::basic_string>; + std::basic_string_view> BSV(L"abcdef"); + std::basic_string w(BSV); + ASSERT_SAME_TYPE(decltype(w), Expect); + assert(w == L"abcdef"); #endif -} -{ // Testing (14) w/ allocator - using ExpectS = std::basic_string, test_allocator>; - std::string_view sv("abc"); - std::basic_string s(sv, test_allocator{}); - ASSERT_SAME_TYPE(decltype(s), ExpectS); - assert(s == "abc"); + } + { // Testing (14) w/ allocator + using ExpectS = std::basic_string, test_allocator>; + std::string_view sv("abc"); + std::basic_string s(sv, test_allocator{}); + ASSERT_SAME_TYPE(decltype(s), ExpectS); + assert(s == "abc"); #ifndef TEST_HAS_NO_WIDE_CHARACTERS - using ExpectW = std::basic_string, test_allocator>; - std::basic_string_view> BSV(L"abcdef"); - std::basic_string w(BSV, test_allocator{}); - ASSERT_SAME_TYPE(decltype(w), ExpectW); - assert(w == L"abcdef"); + using ExpectW = std::basic_string, test_allocator>; + std::basic_string_view> BSV(L"abcdef"); + std::basic_string w(BSV, test_allocator{}); + ASSERT_SAME_TYPE(decltype(w), ExpectW); + assert(w == L"abcdef"); #endif -} -{ // Testing (15) w/o allocator - std::string s0("abc"); - std::basic_string s(s0, 1, 1); - ASSERT_SAME_TYPE(decltype(s), std::string); - assert(s == "b"); + } + { // Testing (15) w/o allocator + std::string s0("abc"); + std::basic_string s(s0, 1, 1); + ASSERT_SAME_TYPE(decltype(s), std::string); + assert(s == "b"); #ifndef TEST_HAS_NO_WIDE_CHARACTERS - std::wstring w0(L"abcdef"); - std::basic_string w(w0, 2, 2); - ASSERT_SAME_TYPE(decltype(w), std::wstring); - assert(w == L"cd"); + std::wstring w0(L"abcdef"); + std::basic_string w(w0, 2, 2); + ASSERT_SAME_TYPE(decltype(w), std::wstring); + assert(w == L"cd"); #endif -} -{ // Testing (15) w/ allocator - using ExpectS = std::basic_string, test_allocator>; - ExpectS s0("abc"); - std::basic_string s(s0, 1, 1, test_allocator{4}); - ASSERT_SAME_TYPE(decltype(s), ExpectS); - assert(s == "b"); + } + { // Testing (15) w/ allocator + using ExpectS = std::basic_string, test_allocator>; + ExpectS s0("abc"); + std::basic_string s(s0, 1, 1, test_allocator{4}); + ASSERT_SAME_TYPE(decltype(s), ExpectS); + assert(s == "b"); #ifndef TEST_HAS_NO_WIDE_CHARACTERS - using ExpectW = std::basic_string, test_allocator>; - ExpectW w0(L"abcdef"); - std::basic_string w(w0, 2, 2, test_allocator{6}); - ASSERT_SAME_TYPE(decltype(w), ExpectW); - assert(w == L"cd"); + using ExpectW = std::basic_string, test_allocator>; + ExpectW w0(L"abcdef"); + std::basic_string w(w0, 2, 2, test_allocator{6}); + ASSERT_SAME_TYPE(decltype(w), ExpectW); + assert(w == L"cd"); #endif -} + } -return true; + return true; } int main(int, char**) { diff --git a/libcxx/test/std/strings/string.view/string.view.cons/implicit_deduction_guides.pass.cpp b/libcxx/test/std/strings/string.view/string.view.cons/implicit_deduction_guides.pass.cpp index 534b024480feb..53caed55064fa 100644 --- a/libcxx/test/std/strings/string.view/string.view.cons/implicit_deduction_guides.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.cons/implicit_deduction_guides.pass.cpp @@ -27,9 +27,10 @@ // (4) basic_string_view(const CharT*) int main(int, char**) { { - // Testing (1) - // Nothing to do. Cannot deduce without any arguments. - } { // Testing (2) + // Testing (1) + // Nothing to do. Cannot deduce without any arguments. + } + { // Testing (2) const std::string_view sin("abc"); std::basic_string_view s(sin); ASSERT_SAME_TYPE(decltype(s), std::string_view); diff --git a/libcxx/test/support/counting_projection.h b/libcxx/test/support/counting_projection.h index 1af2c80f244d8..ad549c749ae42 100644 --- a/libcxx/test/support/counting_projection.h +++ b/libcxx/test/support/counting_projection.h @@ -26,7 +26,7 @@ class counting_projection { constexpr counting_projection(Proj proj, int& count) : proj_(std::move(proj)), count_(&count) {} template - constexpr decltype(auto) operator()(T&& value) const { + constexpr decltype(auto) operator()(T && value) const { ++(*count_); return std::invoke(proj_, std::forward(value)); } diff --git a/libcxx/utils/ci/buildkite-pipeline.yml b/libcxx/utils/ci/buildkite-pipeline.yml index ebfb35eee91e1..7a125d16af594 100644 --- a/libcxx/utils/ci/buildkite-pipeline.yml +++ b/libcxx/utils/ci/buildkite-pipeline.yml @@ -26,7 +26,7 @@ env: # LLVM POST-BRANCH bump version # LLVM POST-BRANCH add compiler test for ToT - 1, e.g. "Clang 17" # LLVM RELEASE bump remove compiler ToT - 3, e.g. "Clang 15" - LLVM_STABLE_VERSION: "16" # Used for tooling, update after the RELEASE. + LLVM_STABLE_VERSION: "17" # Used for tooling, update after the RELEASE. LLVM_HEAD_VERSION: "18" # Used compiler, update POST-BRANCH. GCC_STABLE_VERSION: "13" steps: diff --git a/libcxx/utils/data/ignore_format.txt b/libcxx/utils/data/ignore_format.txt index 01038c75f4e13..34ab4004ece37 100644 --- a/libcxx/utils/data/ignore_format.txt +++ b/libcxx/utils/data/ignore_format.txt @@ -5265,7 +5265,6 @@ libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/ba libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/ctor.default.pass.cpp libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/ctor.outer_iterator.pass.cpp libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/equal.pass.cpp -libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/increment.pass.cpp libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_move.pass.cpp libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_swap.pass.cpp libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/types.compile.pass.cpp @@ -5275,11 +5274,8 @@ libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.outer/ct libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.outer/equal.pass.cpp libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.outer/types.compile.pass.cpp libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.outer.value/ctor.default.pass.cpp -libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.outer.value/view_interface.pass.cpp libcxx/test/std/ranges/range.adaptors/range.lazy.split/types.h -libcxx/test/std/ranges/range.adaptors/range.lazy.split/view_interface.pass.cpp libcxx/test/std/ranges/range.adaptors/range.reverse/adaptor.pass.cpp -libcxx/test/std/ranges/range.adaptors/range.reverse/base.pass.cpp libcxx/test/std/ranges/range.adaptors/range.reverse/begin.pass.cpp libcxx/test/std/ranges/range.adaptors/range.reverse/borrowing.compile.pass.cpp libcxx/test/std/ranges/range.adaptors/range.reverse/ctad.compile.pass.cpp @@ -5300,7 +5296,6 @@ libcxx/test/std/ranges/range.adaptors/range.take/ctad.compile.pass.cpp libcxx/test/std/ranges/range.adaptors/range.take/ctor.default.pass.cpp libcxx/test/std/ranges/range.adaptors/range.take/range_concept_conformance.compile.pass.cpp libcxx/test/std/ranges/range.adaptors/range.take/sentinel/ctor.pass.cpp -libcxx/test/std/ranges/range.adaptors/range.take/sentinel/eq.pass.cpp libcxx/test/std/ranges/range.adaptors/range.take/size.pass.cpp libcxx/test/std/ranges/range.adaptors/range.take/types.h libcxx/test/std/ranges/range.adaptors/range.take.while/adaptor.pass.cpp @@ -5357,7 +5352,6 @@ libcxx/test/std/ranges/range.factories/range.iota.view/iterator/ctor.value.pass. libcxx/test/std/ranges/range.factories/range.iota.view/iterator/decrement.pass.cpp libcxx/test/std/ranges/range.factories/range.iota.view/iterator/increment.pass.cpp libcxx/test/std/ranges/range.factories/range.iota.view/iterator/member_typedefs.compile.pass.cpp -libcxx/test/std/ranges/range.factories/range.iota.view/iterator/minus.pass.cpp libcxx/test/std/ranges/range.factories/range.iota.view/iterator/star.pass.cpp libcxx/test/std/ranges/range.factories/range.iota.view/iterator/subscript.pass.cpp libcxx/test/std/ranges/range.factories/range.iota.view/range_concept_conformance.compile.pass.cpp From ab6c3d50345f7751f77c16d0909b17e942a3def7 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 12 Oct 2023 14:45:45 -0700 Subject: [PATCH 029/720] [AMDGPU] Change the representation of double literals in operands (#68740) A 64-bit literal can be used as a 32-bit zero or sign extended operand. In case of double zeroes are added to the low 32 bits. Currently asm parser stores only high 32 bits of a double into an operand. To support codegen as requested by the https://github.com/llvm/llvm-project/issues/67781 we need to change the representation to store a full 64-bit value so that codegen can simply add immediates to an instruction. There is some code to support compatibility with existing tests and asm kernels. We allow to use short hex strings to represent only a high 32 bit of a double value as a valid literal. --- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 21 ++++++++++++--- .../Disassembler/AMDGPUDisassembler.cpp | 27 ++++++++++++++----- .../AMDGPU/Disassembler/AMDGPUDisassembler.h | 9 ++++--- .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 15 +++++++---- .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h | 2 +- .../MCTargetDesc/AMDGPUMCCodeEmitter.cpp | 5 +++- llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 4 ++- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 7 +++++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 3 +++ 9 files changed, 71 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 1e07e8deb560f..e16fed445b9f9 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -2141,9 +2141,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo const_cast(AsmParser)->Warning(Inst.getLoc(), "Can't encode literal as exact 64-bit floating-point operand. " "Low 32-bits will be set to zero"); + Val &= 0xffffffff00000000u; } - Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); + Inst.addOperand(MCOperand::createImm(Val)); setImmKindLiteral(); return; } @@ -2242,7 +2243,9 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo return; } - Inst.addOperand(MCOperand::createImm(Lo_32(Val))); + Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? Val << 32 : Lo_32(Val); + + Inst.addOperand(MCOperand::createImm(Val)); setImmKindLiteral(); return; @@ -4309,7 +4312,19 @@ bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, continue; if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { - uint32_t Value = static_cast(MO.getImm()); + uint64_t Value = static_cast(MO.getImm()); + bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) && + AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8; + bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64); + + if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) { + Error(getLitLoc(Operands), "invalid operand for instruction"); + return false; + } + + if (IsFP64 && IsValid32Op) + Value = Hi_32(Value); + if (NumLiterals == 0 || LiteralValue != Value) { LiteralValue = Value; ++NumLiterals; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index a504a5e86760b..d74fd0b3a9ea7 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -378,6 +378,15 @@ static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm, return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256)); } +static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, + uint64_t Addr, + const MCDisassembler *Decoder) { + assert(Imm < (1 << 9) && "9-bit encoding"); + auto DAsm = static_cast(Decoder); + return addOperand( + Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64, true)); +} + static DecodeStatus DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder) { @@ -1219,7 +1228,7 @@ AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const { return MCOperand::createImm(Literal); } -MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { +MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const { // For now all literal constants are supposed to be unsigned integer // ToDo: deal with signed/unsigned 64-bit integer constants // ToDo: deal with float/double constants @@ -1229,9 +1238,11 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { Twine(Bytes.size())); } HasLiteral = true; - Literal = eatBytes(Bytes); + Literal = Literal64 = eatBytes(Bytes); + if (ExtendFP64) + Literal64 <<= 32; } - return MCOperand::createImm(Literal); + return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal); } MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { @@ -1448,7 +1459,7 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral, - unsigned ImmWidth) const { + unsigned ImmWidth, bool IsFP) const { using namespace AMDGPU::EncValues; assert(Val < 1024); // enum10 @@ -1460,13 +1471,15 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val, return createRegOperand(IsAGPR ? getAgprClassId(Width) : getVgprClassId(Width), Val - VGPR_MIN); } - return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth); + return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth, + IsFP); } MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral, - unsigned ImmWidth) const { + unsigned ImmWidth, + bool IsFP) const { // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been // decoded earlier. assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0"); @@ -1494,7 +1507,7 @@ MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, // Keep a sentinel value for deferred setting return MCOperand::createImm(LITERAL_CONST); else - return decodeLiteralConstant(); + return decodeLiteralConstant(IsFP && ImmWidth == 64); } switch (Width) { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 5f3b277d577ff..91b73b593d616 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -97,6 +97,7 @@ class AMDGPUDisassembler : public MCDisassembler { const unsigned TargetMaxInstBytes; mutable ArrayRef Bytes; mutable uint32_t Literal; + mutable uint64_t Literal64; mutable bool HasLiteral; mutable std::optional EnableWavefrontSize32; @@ -229,15 +230,15 @@ class AMDGPUDisassembler : public MCDisassembler { static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm); MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const; - MCOperand decodeLiteralConstant() const; + MCOperand decodeLiteralConstant(bool ExtendFP64) const; MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, - bool MandatoryLiteral = false, - unsigned ImmWidth = 0) const; + bool MandatoryLiteral = false, unsigned ImmWidth = 0, + bool IsFP = false) const; MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral = false, - unsigned ImmWidth = 0) const; + unsigned ImmWidth = 0, bool IsFP = false) const; MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const; MCOperand decodeSpecialReg32(unsigned Val) const; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index ad4c48a8d6558..9459ee088ddde 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -426,7 +426,7 @@ void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI, - raw_ostream &O) { + raw_ostream &O, bool IsFP) { int64_t SImm = static_cast(Imm); if (SImm >= -16 && SImm <= 64) { O << SImm; @@ -454,7 +454,10 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, else if (Imm == 0x3fc45f306dc9c882 && STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm)) O << "0.15915494309189532"; - else { + else if (IsFP) { + assert(AMDGPU::isValid32BitLiteral(Imm, true)); + O << formatHex(static_cast(Hi_32(Imm))); + } else { assert(isUInt<32>(Imm) || isInt<32>(Imm)); // In rare situations, we will have a 32-bit literal in a 64-bit @@ -605,11 +608,13 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo, printImmediate32(Op.getImm(), STI, O); break; case AMDGPU::OPERAND_REG_IMM_INT64: - case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_INLINE_C_INT64: + printImmediate64(Op.getImm(), STI, O, false); + break; + case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_INLINE_C_FP64: case AMDGPU::OPERAND_REG_INLINE_AC_FP64: - printImmediate64(Op.getImm(), STI, O); + printImmediate64(Op.getImm(), STI, O, true); break; case AMDGPU::OPERAND_REG_INLINE_C_INT16: case AMDGPU::OPERAND_REG_INLINE_AC_INT16: @@ -671,7 +676,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo, if (RCBits == 32) printImmediate32(llvm::bit_cast((float)Value), STI, O); else if (RCBits == 64) - printImmediate64(llvm::bit_cast(Value), STI, O); + printImmediate64(llvm::bit_cast(Value), STI, O, true); else llvm_unreachable("Invalid register class size"); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index 3b14faab136b3..dc83547a4afe0 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -91,7 +91,7 @@ class AMDGPUInstPrinter : public MCInstPrinter { void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI, raw_ostream &O); void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI, - raw_ostream &O); + raw_ostream &O, bool IsFP); void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printRegularOperand(const MCInst *MI, unsigned OpNo, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp index 21243f80e0554..d93f747bf6f0a 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp @@ -411,7 +411,10 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI, } else if (!Op.isExpr()) // Exprs will be replaced with a fixup value. llvm_unreachable("Must be immediate or expr"); - support::endian::write(CB, Imm, llvm::endianness::little); + if (Desc.operands()[i].OperandType == AMDGPU::OPERAND_REG_IMM_FP64) + Imm = Hi_32(Imm); + + support::endian::write(CB, Imm, support::endianness::little); // Only one literal value allowed break; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index c3c5bfae405aa..ea06e85fb400c 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1263,7 +1263,9 @@ def VSrc_f32 : RegOrF32 <"VS_32", "OPERAND_REG_IMM">; def VSrc_v2b16 : RegOrV2B16 <"VS_32", "OPERAND_REG_IMM">; def VSrc_v2f16 : RegOrV2F16 <"VS_32", "OPERAND_REG_IMM">; def VSrc_b64 : RegOrB64 <"VS_64", "OPERAND_REG_IMM">; -def VSrc_f64 : RegOrF64 <"VS_64", "OPERAND_REG_IMM">; +def VSrc_f64 : RegOrF64 <"VS_64", "OPERAND_REG_IMM"> { + let DecoderMethod = "decodeOperand_VSrc_f64"; +} def VSrc_v2b32 : RegOrV2B32 <"VS_64", "OPERAND_REG_IMM">; def VSrc_v2f32 : RegOrV2F32 <"VS_64", "OPERAND_REG_IMM">; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 6d0ad763d9e6c..d123b384a27d4 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2519,6 +2519,13 @@ bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) { return Lo16 == Hi16; } +bool isValid32BitLiteral(uint64_t Val, bool IsFP64) { + if (IsFP64) + return !(Val & 0xffffffffu); + + return isUInt<32>(Val) || isInt<32>(Val); +} + bool isArgPassedInSGPR(const Argument *A) { const Function *F = A->getParent(); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 297a69f54d637..bb2964f592f66 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1290,6 +1290,9 @@ bool isInlinableIntLiteralV216(int32_t Literal); LLVM_READNONE bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi); +LLVM_READNONE +bool isValid32BitLiteral(uint64_t Val, bool IsFP64); + bool isArgPassedInSGPR(const Argument *Arg); bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo); From 8da1e3dd24a1cc6bc99bf3334009d2d19f21018f Mon Sep 17 00:00:00 2001 From: Brad Smith Date: Thu, 12 Oct 2023 18:05:49 -0400 Subject: [PATCH 030/720] [Driver] Have -rdynamic be a no-op on Haiku (#67872) Do the same as the Haiku GCC patches. https://github.com/haikuports/haikuports/commit/46afdec05771d126eb6cb6c3b3deb957604617c4 --- clang/lib/Driver/ToolChains/Haiku.cpp | 5 +++-- clang/test/Driver/haiku.c | 5 +++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Haiku.cpp b/clang/lib/Driver/ToolChains/Haiku.cpp index 1985fed9cf32a..c2653a4a2022e 100644 --- a/clang/lib/Driver/ToolChains/Haiku.cpp +++ b/clang/lib/Driver/ToolChains/Haiku.cpp @@ -42,6 +42,9 @@ void haiku::Linker::ConstructJob(Compilation &C, const JobAction &JA, // Silence warning for "clang -pie foo.o -o foo" Args.ClaimAllArgs(options::OPT_pie); + // -rdynamic is a no-op with Haiku. Claim argument to avoid warning. + Args.ClaimAllArgs(options::OPT_rdynamic); + if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); @@ -49,8 +52,6 @@ void haiku::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (Static) { CmdArgs.push_back("-Bstatic"); } else { - if (Args.hasArg(options::OPT_rdynamic)) - CmdArgs.push_back("-export-dynamic"); if (Shared) CmdArgs.push_back("-shared"); CmdArgs.push_back("--enable-new-dtags"); diff --git a/clang/test/Driver/haiku.c b/clang/test/Driver/haiku.c index 3888c67329232..965d3cf97bc36 100644 --- a/clang/test/Driver/haiku.c +++ b/clang/test/Driver/haiku.c @@ -56,6 +56,11 @@ // CHECK-LD-X86_64-SAME: {{^}} "[[SYSROOT]]/boot/system/develop/tools/lib/gcc/x86_64-unknown-haiku/13.2.0/crtendS.o" // CHECK-LD-X86_64-SAME: {{^}} "[[SYSROOT]]/boot/system/develop/lib/crtn.o" +// Check -rdynamic is a no-op +// RUN: %clang -### -rdynamic %s 2>&1 --target=x86_64-unknown-haiku \ +// RUN: | FileCheck --check-prefix=CHECK-RDYNAMIC %s +// CHECK-RDYNAMIC-NOT: "-export-dynamic" + // Check the right flags are present with -shared // RUN: %clang -### %s -shared 2>&1 --target=x86_64-unknown-haiku \ // RUN: --gcc-toolchain="" \ From b90fcafcd68f77c86f18ecd812fb92961afbb3ba Mon Sep 17 00:00:00 2001 From: spupyrev Date: Thu, 12 Oct 2023 15:07:43 -0700 Subject: [PATCH 031/720] [CodeLayout][NFC] Using MergedVector to avoid extra vector allocations (#68724) Using a wrapper (MergedVector) around vectors to avoid extra vector allocations. Plus a few edits in the comments. --- llvm/lib/Transforms/Utils/CodeLayout.cpp | 139 +++++++++++++---------- 1 file changed, 77 insertions(+), 62 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp index 057a5e86c04ac..dea91dcac21ae 100644 --- a/llvm/lib/Transforms/Utils/CodeLayout.cpp +++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp @@ -99,7 +99,7 @@ static cl::opt BackwardDistance( cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP")); // The maximum size of a chain created by the algorithm. The size is bounded -// so that the algorithm can efficiently process extremely large instance. +// so that the algorithm can efficiently process extremely large instances. static cl::opt MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(4096), cl::desc("The maximum size of a chain to create.")); @@ -217,8 +217,8 @@ struct NodeT { NodeT &operator=(const NodeT &) = delete; NodeT &operator=(NodeT &&) = default; - explicit NodeT(size_t Index, uint64_t Size, uint64_t EC) - : Index(Index), Size(Size), ExecutionCount(EC) {} + explicit NodeT(size_t Index, uint64_t Size, uint64_t Count) + : Index(Index), Size(Size), ExecutionCount(Count) {} bool isEntry() const { return Index == 0; } @@ -477,12 +477,12 @@ void ChainT::mergeEdges(ChainT *Other) { using NodeIter = std::vector::const_iterator; -/// A wrapper around three chains of nodes; it is used to avoid extra -/// instantiation of the vectors. -struct MergedChain { - MergedChain(NodeIter Begin1, NodeIter End1, NodeIter Begin2 = NodeIter(), - NodeIter End2 = NodeIter(), NodeIter Begin3 = NodeIter(), - NodeIter End3 = NodeIter()) +/// A wrapper around three concatenated vectors (chains) of nodes; it is used +/// to avoid extra instantiation of the vectors. +struct MergedNodesT { + MergedNodesT(NodeIter Begin1, NodeIter End1, NodeIter Begin2 = NodeIter(), + NodeIter End2 = NodeIter(), NodeIter Begin3 = NodeIter(), + NodeIter End3 = NodeIter()) : Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3), End3(End3) {} @@ -507,6 +507,8 @@ struct MergedChain { const NodeT *getFirstNode() const { return *Begin1; } + bool empty() const { return Begin1 == End1; } + private: NodeIter Begin1; NodeIter End1; @@ -516,14 +518,34 @@ struct MergedChain { NodeIter End3; }; +/// A wrapper around two concatenated vectors (chains) of jumps. +struct MergedJumpsT { + MergedJumpsT(const std::vector *Jumps1, + const std::vector *Jumps2 = nullptr) { + assert(!Jumps1->empty() && "cannot merge empty jump list"); + JumpArray[0] = Jumps1; + JumpArray[1] = Jumps2; + } + + template void forEach(const F &Func) const { + for (auto Jumps : JumpArray) + if (Jumps != nullptr) + for (JumpT *Jump : *Jumps) + Func(Jump); + } + +private: + std::array *, 2> JumpArray{nullptr, nullptr}; +}; + /// Merge two chains of nodes respecting a given 'type' and 'offset'. /// /// If MergeType == 0, then the result is a concatenation of two chains. /// Otherwise, the first chain is cut into two sub-chains at the offset, /// and merged using all possible ways of concatenating three chains. -MergedChain mergeNodes(const std::vector &X, - const std::vector &Y, size_t MergeOffset, - MergeTypeT MergeType) { +MergedNodesT mergeNodes(const std::vector &X, + const std::vector &Y, size_t MergeOffset, + MergeTypeT MergeType) { // Split the first chain, X, into X1 and X2. NodeIter BeginX1 = X.begin(); NodeIter EndX1 = X.begin() + MergeOffset; @@ -535,15 +557,15 @@ MergedChain mergeNodes(const std::vector &X, // Construct a new chain from the three existing ones. switch (MergeType) { case MergeTypeT::X_Y: - return MergedChain(BeginX1, EndX2, BeginY, EndY); + return MergedNodesT(BeginX1, EndX2, BeginY, EndY); case MergeTypeT::Y_X: - return MergedChain(BeginY, EndY, BeginX1, EndX2); + return MergedNodesT(BeginY, EndY, BeginX1, EndX2); case MergeTypeT::X1_Y_X2: - return MergedChain(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2); + return MergedNodesT(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2); case MergeTypeT::Y_X2_X1: - return MergedChain(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1); + return MergedNodesT(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1); case MergeTypeT::X2_X1_Y: - return MergedChain(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY); + return MergedNodesT(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY); } llvm_unreachable("unexpected chain merge type"); } @@ -618,6 +640,7 @@ class ExtTSPImpl { AllChains.reserve(NumNodes); HotChains.reserve(NumNodes); for (NodeT &Node : AllNodes) { + // Create a chain. AllChains.emplace_back(Node.Index, &Node); Node.CurChain = &AllChains.back(); if (Node.ExecutionCount > 0) @@ -630,13 +653,13 @@ class ExtTSPImpl { for (JumpT *Jump : PredNode.OutJumps) { NodeT *SuccNode = Jump->Target; ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain); - // this edge is already present in the graph. + // This edge is already present in the graph. if (CurEdge != nullptr) { assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr); CurEdge->appendJump(Jump); continue; } - // this is a new edge. + // This is a new edge. AllEdges.emplace_back(Jump); PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back()); SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back()); @@ -649,7 +672,7 @@ class ExtTSPImpl { /// to B are from A. Such nodes should be adjacent in the optimal ordering; /// the method finds and merges such pairs of nodes. void mergeForcedPairs() { - // Find fallthroughs based on edge weights. + // Find forced pairs of blocks. for (NodeT &Node : AllNodes) { if (SuccNodes[Node.Index].size() == 1 && PredNodes[SuccNodes[Node.Index][0]].size() == 1 && @@ -699,9 +722,7 @@ class ExtTSPImpl { /// Deterministically compare pairs of chains. auto compareChainPairs = [](const ChainT *A1, const ChainT *B1, const ChainT *A2, const ChainT *B2) { - if (A1 != A2) - return A1->Id < A2->Id; - return B1->Id < B2->Id; + return std::make_tuple(A1->Id, B1->Id) < std::make_tuple(A2->Id, B2->Id); }; while (HotChains.size() > 1) { @@ -769,24 +790,22 @@ class ExtTSPImpl { } /// Compute the Ext-TSP score for a given node order and a list of jumps. - double extTSPScore(const MergedChain &MergedBlocks, - const std::vector &Jumps) const { - if (Jumps.empty()) - return 0.0; + double extTSPScore(const MergedNodesT &Nodes, + const MergedJumpsT &Jumps) const { uint64_t CurAddr = 0; - MergedBlocks.forEach([&](const NodeT *Node) { + Nodes.forEach([&](const NodeT *Node) { Node->EstimatedAddr = CurAddr; CurAddr += Node->Size; }); double Score = 0; - for (JumpT *Jump : Jumps) { + Jumps.forEach([&](const JumpT *Jump) { const NodeT *SrcBlock = Jump->Source; const NodeT *DstBlock = Jump->Target; Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size, DstBlock->EstimatedAddr, Jump->ExecutionCount, Jump->IsConditional); - } + }); return Score; } @@ -798,17 +817,13 @@ class ExtTSPImpl { /// element being the corresponding merging type. MergeGainT getBestMergeGain(ChainT *ChainPred, ChainT *ChainSucc, ChainEdge *Edge) const { - if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) { + if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) return Edge->getCachedMergeGain(ChainPred, ChainSucc); - } + assert(!Edge->jumps().empty() && "trying to merge chains w/o jumps"); // Precompute jumps between ChainPred and ChainSucc. - auto Jumps = Edge->jumps(); ChainEdge *EdgePP = ChainPred->getEdge(ChainPred); - if (EdgePP != nullptr) { - Jumps.insert(Jumps.end(), EdgePP->jumps().begin(), EdgePP->jumps().end()); - } - assert(!Jumps.empty() && "trying to merge chains w/o jumps"); + MergedJumpsT Jumps(&Edge->jumps(), EdgePP ? &EdgePP->jumps() : nullptr); // This object holds the best chosen gain of merging two chains. MergeGainT Gain = MergeGainT(); @@ -875,19 +890,20 @@ class ExtTSPImpl { /// /// The two chains are not modified in the method. MergeGainT computeMergeGain(const ChainT *ChainPred, const ChainT *ChainSucc, - const std::vector &Jumps, - size_t MergeOffset, MergeTypeT MergeType) const { - auto MergedBlocks = + const MergedJumpsT &Jumps, size_t MergeOffset, + MergeTypeT MergeType) const { + MergedNodesT MergedNodes = mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType); // Do not allow a merge that does not preserve the original entry point. if ((ChainPred->isEntry() || ChainSucc->isEntry()) && - !MergedBlocks.getFirstNode()->isEntry()) + !MergedNodes.getFirstNode()->isEntry()) return MergeGainT(); // The gain for the new chain. - auto NewGainScore = extTSPScore(MergedBlocks, Jumps) - ChainPred->Score; - return MergeGainT(NewGainScore, MergeOffset, MergeType); + double NewScore = extTSPScore(MergedNodes, Jumps); + double CurScore = ChainPred->Score; + return MergeGainT(NewScore - CurScore, MergeOffset, MergeType); } /// Merge chain From into chain Into, update the list of active chains, @@ -897,7 +913,7 @@ class ExtTSPImpl { assert(Into != From && "a chain cannot be merged with itself"); // Merge the nodes. - MergedChain MergedNodes = + MergedNodesT MergedNodes = mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType); Into->merge(From, MergedNodes.getNodes()); @@ -908,8 +924,9 @@ class ExtTSPImpl { // Update cached ext-tsp score for the new chain. ChainEdge *SelfEdge = Into->getEdge(Into); if (SelfEdge != nullptr) { - MergedNodes = MergedChain(Into->Nodes.begin(), Into->Nodes.end()); - Into->Score = extTSPScore(MergedNodes, SelfEdge->jumps()); + MergedNodes = MergedNodesT(Into->Nodes.begin(), Into->Nodes.end()); + MergedJumpsT MergedJumps(&SelfEdge->jumps()); + Into->Score = extTSPScore(MergedNodes, MergedJumps); } // Remove the chain from the list of active chains. @@ -943,7 +960,7 @@ class ExtTSPImpl { // Sorting chains by density in the decreasing order. std::sort(SortedChains.begin(), SortedChains.end(), [&](const ChainT *L, const ChainT *R) { - // Place the entry point is at the beginning of the order. + // Place the entry point at the beginning of the order. if (L->isEntry() != R->isEntry()) return L->isEntry(); @@ -1163,9 +1180,9 @@ class CDSortImpl { /// result is a pair with the first element being the gain and the second /// element being the corresponding merging type. MergeGainT getBestMergeGain(ChainEdge *Edge) const { + assert(!Edge->jumps().empty() && "trying to merge chains w/o jumps"); // Precompute jumps between ChainPred and ChainSucc. - auto Jumps = Edge->jumps(); - assert(!Jumps.empty() && "trying to merge chains w/o jumps"); + MergedJumpsT Jumps(&Edge->jumps()); ChainT *SrcChain = Edge->srcChain(); ChainT *DstChain = Edge->dstChain(); @@ -1204,7 +1221,7 @@ class CDSortImpl { /// /// The two chains are not modified in the method. MergeGainT computeMergeGain(ChainT *ChainPred, ChainT *ChainSucc, - const std::vector &Jumps, + const MergedJumpsT &Jumps, MergeTypeT MergeType) const { // This doesn't depend on the ordering of the nodes double FreqGain = freqBasedLocalityGain(ChainPred, ChainSucc); @@ -1255,24 +1272,22 @@ class CDSortImpl { } /// Compute the change of the distance locality after merging the chains. - double distBasedLocalityGain(const MergedChain &MergedBlocks, - const std::vector &Jumps) const { - if (Jumps.empty()) - return 0.0; + double distBasedLocalityGain(const MergedNodesT &Nodes, + const MergedJumpsT &Jumps) const { uint64_t CurAddr = 0; - MergedBlocks.forEach([&](const NodeT *Node) { + Nodes.forEach([&](const NodeT *Node) { Node->EstimatedAddr = CurAddr; CurAddr += Node->Size; }); double CurScore = 0; double NewScore = 0; - for (const JumpT *Arc : Jumps) { - uint64_t SrcAddr = Arc->Source->EstimatedAddr + Arc->Offset; - uint64_t DstAddr = Arc->Target->EstimatedAddr; - NewScore += distScore(SrcAddr, DstAddr, Arc->ExecutionCount); - CurScore += distScore(0, TotalSize, Arc->ExecutionCount); - } + Jumps.forEach([&](const JumpT *Jump) { + uint64_t SrcAddr = Jump->Source->EstimatedAddr + Jump->Offset; + uint64_t DstAddr = Jump->Target->EstimatedAddr; + NewScore += distScore(SrcAddr, DstAddr, Jump->ExecutionCount); + CurScore += distScore(0, TotalSize, Jump->ExecutionCount); + }); return NewScore - CurScore; } @@ -1283,7 +1298,7 @@ class CDSortImpl { assert(Into != From && "a chain cannot be merged with itself"); // Merge the nodes. - MergedChain MergedNodes = + MergedNodesT MergedNodes = mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType); Into->merge(From, MergedNodes.getNodes()); From 46cb8d9a325233ac11ed5e90367c43774294d87e Mon Sep 17 00:00:00 2001 From: AdityaK <1894981+hiraditya@users.noreply.github.com> Date: Thu, 12 Oct 2023 16:03:07 -0700 Subject: [PATCH 032/720] [TSAN] add support for riscv64 (#68735) Implements for sv39 and sv48 VMA layout. Userspace only has access to the bottom half of vma range. The top half is used by kernel. There is no dedicated vsyscall or heap segment. PIE program is allocated to start at TASK_SIZE/3*2. Maximum ASLR is ARCH_MMAP_RND_BITS_MAX+PAGE_SHIFT=24+12=36 Loader, vdso and other libraries are allocated below stack from the top. Also change RestoreAddr to use 4 bits to accommodate MappingRiscv64_48 Reviewed by: MaskRay, dvyukov, asb, StephenFan, luismarques, jrtc27, hiraditya, vitalybuka Differential Revision: https://reviews.llvm.org/D145214 D145214 was reverted because one file was missing in the latest commit. Luckily the file was there in the previous commit, probably the author missed uploading that file with latest commit. Co-authored-by: Alex Fan --- clang/lib/Driver/ToolChains/Linux.cpp | 2 +- .../cmake/Modules/AllSupportedArchDefs.cmake | 2 +- .../lib/sanitizer_common/sanitizer_platform.h | 2 +- compiler-rt/lib/tsan/rtl/CMakeLists.txt | 4 + .../lib/tsan/rtl/tsan_interceptors_posix.cpp | 2 + compiler-rt/lib/tsan/rtl/tsan_platform.h | 76 ++++++- .../lib/tsan/rtl/tsan_platform_linux.cpp | 34 ++- compiler-rt/lib/tsan/rtl/tsan_rtl.h | 4 +- compiler-rt/lib/tsan/rtl/tsan_rtl_riscv64.S | 203 ++++++++++++++++++ compiler-rt/test/tsan/map32bit.cpp | 1 + compiler-rt/test/tsan/mmap_large.cpp | 3 +- compiler-rt/test/tsan/test.h | 2 + 12 files changed, 318 insertions(+), 17 deletions(-) create mode 100644 compiler-rt/lib/tsan/rtl/tsan_rtl_riscv64.S diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index 1ba222bf83b10..735af54f114ce 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -801,7 +801,7 @@ SanitizerMask Linux::getSupportedSanitizers() const { IsRISCV64 || IsSystemZ || IsHexagon || IsLoongArch64) Res |= SanitizerKind::Leak; if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ || - IsLoongArch64) + IsLoongArch64 || IsRISCV64) Res |= SanitizerKind::Thread; if (IsX86_64 || IsSystemZ) Res |= SanitizerKind::KernelMemory; diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake index e8ab660c1d83c..416777171d2ca 100644 --- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake @@ -66,7 +66,7 @@ set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC32} ${PPC ${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON} ${RISCV32} ${RISCV64} ${LOONGARCH64}) set(ALL_TSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64} ${S390X} - ${LOONGARCH64}) + ${LOONGARCH64} ${RISCV64}) set(ALL_UBSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${RISCV64} ${MIPS32} ${MIPS64} ${PPC64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON} ${LOONGARCH64}) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h index c1ca5c9ca4478..5280416f8bd30 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h @@ -303,7 +303,7 @@ # define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 40) # endif #elif SANITIZER_RISCV64 -# define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 38) +# define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 47) #elif defined(__aarch64__) # if SANITIZER_APPLE # if SANITIZER_OSX || SANITIZER_IOSSIM diff --git a/compiler-rt/lib/tsan/rtl/CMakeLists.txt b/compiler-rt/lib/tsan/rtl/CMakeLists.txt index 7b18d379e9197..791c0596f65ab 100644 --- a/compiler-rt/lib/tsan/rtl/CMakeLists.txt +++ b/compiler-rt/lib/tsan/rtl/CMakeLists.txt @@ -220,6 +220,10 @@ else() set(TSAN_ASM_SOURCES tsan_rtl_mips64.S ) + elseif(arch MATCHES "riscv64") + set(TSAN_ASM_SOURCES + tsan_rtl_riscv64.S + ) elseif(arch MATCHES "s390x") set(TSAN_ASM_SOURCES tsan_rtl_s390x.S diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp index 5add97ccd17a3..80f86ca98ed9c 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp @@ -81,6 +81,8 @@ struct ucontext_t { #define PTHREAD_ABI_BASE "GLIBC_2.17" #elif SANITIZER_LOONGARCH64 #define PTHREAD_ABI_BASE "GLIBC_2.36" +#elif SANITIZER_RISCV64 +# define PTHREAD_ABI_BASE "GLIBC_2.27" #endif extern "C" int pthread_attr_init(void *attr); diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform.h b/compiler-rt/lib/tsan/rtl/tsan_platform.h index f0cdaf48eaa31..cfbb57d1d8d8d 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform.h +++ b/compiler-rt/lib/tsan/rtl/tsan_platform.h @@ -377,6 +377,71 @@ struct MappingPPC64_47 { static const uptr kMidAppMemEnd = 0; }; +/* +C/C++ on linux/riscv64 (39-bit VMA) +0000 0010 00 - 0200 0000 00: main binary ( 8 GB) +0200 0000 00 - 1000 0000 00: - +1000 0000 00 - 4000 0000 00: shadow memory (64 GB) +4000 0000 00 - 4800 0000 00: metainfo (16 GB) +4800 0000 00 - 5500 0000 00: - +5500 0000 00 - 5a00 0000 00: main binary (PIE) (~8 GB) +5600 0000 00 - 7c00 0000 00: - +7d00 0000 00 - 7fff ffff ff: libraries and main thread stack ( 8 GB) + +mmap by default allocates from top downwards +VDSO sits below loader and above dynamic libraries, within HiApp region. +Heap starts after program region whose position depends on pie or non-pie. +Disable tracking them since their locations are not fixed. +*/ +struct MappingRiscv64_39 { + static const uptr kLoAppMemBeg = 0x0000001000ull; + static const uptr kLoAppMemEnd = 0x0200000000ull; + static const uptr kShadowBeg = 0x1000000000ull; + static const uptr kShadowEnd = 0x2000000000ull; + static const uptr kMetaShadowBeg = 0x2000000000ull; + static const uptr kMetaShadowEnd = 0x2400000000ull; + static const uptr kMidAppMemBeg = 0x2aaaaaa000ull; + static const uptr kMidAppMemEnd = 0x2c00000000ull; + static const uptr kHeapMemBeg = 0x2c00000000ull; + static const uptr kHeapMemEnd = 0x2c00000000ull; + static const uptr kHiAppMemBeg = 0x3c00000000ull; + static const uptr kHiAppMemEnd = 0x3fffffffffull; + static const uptr kShadowMsk = 0x3800000000ull; + static const uptr kShadowXor = 0x0800000000ull; + static const uptr kShadowAdd = 0x0000000000ull; + static const uptr kVdsoBeg = 0x4000000000ull; +}; + +/* +C/C++ on linux/riscv64 (48-bit VMA) +0000 0000 1000 - 0500 0000 0000: main binary ( 5 TB) +0500 0000 0000 - 2000 0000 0000: - +2000 0000 0000 - 4000 0000 0000: shadow memory (32 TB) +4000 0000 0000 - 4800 0000 0000: metainfo ( 8 TB) +4800 0000 0000 - 5555 5555 5000: - +5555 5555 5000 - 5a00 0000 0000: main binary (PIE) (~5 TB) +5a00 0000 0000 - 7a00 0000 0000: - +7a00 0000 0000 - 7fff ffff ffff: libraries and main thread stack ( 5 TB) +*/ +struct MappingRiscv64_48 { + static const uptr kLoAppMemBeg = 0x000000001000ull; + static const uptr kLoAppMemEnd = 0x050000000000ull; + static const uptr kShadowBeg = 0x200000000000ull; + static const uptr kShadowEnd = 0x400000000000ull; + static const uptr kMetaShadowBeg = 0x400000000000ull; + static const uptr kMetaShadowEnd = 0x480000000000ull; + static const uptr kMidAppMemBeg = 0x555555555000ull; + static const uptr kMidAppMemEnd = 0x5a0000000000ull; + static const uptr kHeapMemBeg = 0x5a0000000000ull; + static const uptr kHeapMemEnd = 0x5a0000000000ull; + static const uptr kHiAppMemBeg = 0x7a0000000000ull; + static const uptr kHiAppMemEnd = 0x7fffffffffffull; + static const uptr kShadowMsk = 0x700000000000ull; + static const uptr kShadowXor = 0x100000000000ull; + static const uptr kShadowAdd = 0x000000000000ull; + static const uptr kVdsoBeg = 0x800000000000ull; +}; + /* C/C++ on linux/s390x While the kernel provides a 64-bit address space, we have to restrict ourselves @@ -665,6 +730,13 @@ ALWAYS_INLINE auto SelectMapping(Arg arg) { } # elif defined(__mips64) return Func::template Apply(arg); +# elif SANITIZER_RISCV64 + switch (vmaSize) { + case 39: + return Func::template Apply(arg); + case 48: + return Func::template Apply(arg); + } # elif defined(__s390x__) return Func::template Apply(arg); # else @@ -686,6 +758,8 @@ void ForEachMapping() { Func::template Apply(); Func::template Apply(); Func::template Apply(); + Func::template Apply(); + Func::template Apply(); Func::template Apply(); Func::template Apply(); Func::template Apply(); @@ -894,7 +968,7 @@ struct RestoreAddrImpl { Mapping::kMidAppMemEnd, Mapping::kHiAppMemBeg, Mapping::kHiAppMemEnd, Mapping::kHeapMemBeg, Mapping::kHeapMemEnd, }; - const uptr indicator = 0x0e0000000000ull; + const uptr indicator = 0x0f0000000000ull; const uptr ind_lsb = 1ull << LeastSignificantSetBitIndex(indicator); for (uptr i = 0; i < ARRAY_SIZE(ranges); i += 2) { uptr beg = ranges[i]; diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp index d161fa8d217e8..369509ed0a604 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp @@ -267,7 +267,17 @@ void InitializePlatformEarly() { Die(); } # endif -#endif +# elif SANITIZER_RISCV64 + // the bottom half of vma is allocated for userspace + vmaSize = vmaSize + 1; +# if !SANITIZER_GO + if (vmaSize != 39 && vmaSize != 48) { + Printf("FATAL: ThreadSanitizer: unsupported VMA range\n"); + Printf("FATAL: Found %zd - Supported 39 and 48\n", vmaSize); + Die(); + } +# endif +# endif } void InitializePlatform() { @@ -399,13 +409,15 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) { return mangled_sp ^ xor_key; #elif defined(__mips__) return mangled_sp; -#elif defined(__s390x__) +# elif SANITIZER_RISCV64 + return mangled_sp; +# elif defined(__s390x__) // tcbhead_t.stack_guard uptr xor_key = ((uptr *)__builtin_thread_pointer())[5]; return mangled_sp ^ xor_key; -#else - #error "Unknown platform" -#endif +# else +# error "Unknown platform" +# endif } #if SANITIZER_NETBSD @@ -429,11 +441,13 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) { # define LONG_JMP_SP_ENV_SLOT 1 # elif defined(__mips64) # define LONG_JMP_SP_ENV_SLOT 1 -# elif defined(__s390x__) -# define LONG_JMP_SP_ENV_SLOT 9 -# else -# define LONG_JMP_SP_ENV_SLOT 6 -# endif +# elif SANITIZER_RISCV64 +# define LONG_JMP_SP_ENV_SLOT 13 +# elif defined(__s390x__) +# define LONG_JMP_SP_ENV_SLOT 9 +# else +# define LONG_JMP_SP_ENV_SLOT 6 +# endif #endif uptr ExtractLongJmpSp(uptr *env) { diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h index a5606dbc7f882..de4ea0bb5f487 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h @@ -56,8 +56,8 @@ namespace __tsan { #if !SANITIZER_GO struct MapUnmapCallback; -#if defined(__mips64) || defined(__aarch64__) || defined(__loongarch__) || \ - defined(__powerpc__) +# if defined(__mips64) || defined(__aarch64__) || defined(__loongarch__) || \ + defined(__powerpc__) || SANITIZER_RISCV64 struct AP32 { static const uptr kSpaceBeg = 0; diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_riscv64.S b/compiler-rt/lib/tsan/rtl/tsan_rtl_riscv64.S new file mode 100644 index 0000000000000..8e6b9b9432ef8 --- /dev/null +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_riscv64.S @@ -0,0 +1,203 @@ +#include "sanitizer_common/sanitizer_asm.h" + +.section .text + +.comm _ZN14__interception11real_setjmpE,8,8 +.globl ASM_SYMBOL_INTERCEPTOR(setjmp) +ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(setjmp)) +ASM_SYMBOL_INTERCEPTOR(setjmp): + CFI_STARTPROC + + // Save frame pointer and return address register + addi sp, sp, -32 + sd ra, 24(sp) + sd s0, 16(sp) + CFI_DEF_CFA_OFFSET (32) + CFI_OFFSET (1, -8) + CFI_OFFSET (8, -16) + + // Adjust the SP for previous frame + addi s0, sp, 32 + CFI_DEF_CFA_REGISTER (8) + + // Save env parameter + sd a0, 8(sp) + CFI_OFFSET (10, -24) + + // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` + addi a0, s0, 0 + + // call tsan interceptor + call ASM_SYMBOL(__tsan_setjmp) + + // Restore env parameter + ld a0, 8(sp) + CFI_RESTORE (10) + + // Restore frame/link register + ld s0, 16(sp) + ld ra, 24(sp) + addi sp, sp, 32 + CFI_RESTORE (8) + CFI_RESTORE (1) + CFI_DEF_CFA (2, 0) + + // tail jump to libc setjmp + la t1, _ZN14__interception11real_setjmpE + ld t1, 0(t1) + jr t1 + + CFI_ENDPROC +ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(setjmp)) + +.comm _ZN14__interception12real__setjmpE,8,8 +.globl ASM_SYMBOL_INTERCEPTOR(_setjmp) +ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(_setjmp)) +ASM_SYMBOL_INTERCEPTOR(_setjmp): + CFI_STARTPROC + + // Save frame pointer and return address register + addi sp, sp, -32 + sd ra, 24(sp) + sd s0, 16(sp) + CFI_DEF_CFA_OFFSET (32) + CFI_OFFSET (1, -8) + CFI_OFFSET (8, -16) + + // Adjust the SP for previous frame + addi s0, sp, 32 + CFI_DEF_CFA_REGISTER (8) + + // Save env parameter + sd a0, 8(sp) + CFI_OFFSET (10, -24) + + // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` + addi a0, s0, 0 + + // call tsan interceptor + call ASM_SYMBOL(__tsan_setjmp) + + // Restore env parameter + ld a0, 8(sp) + CFI_RESTORE (10) + + // Restore frame/link register + ld s0, 16(sp) + ld ra, 24(sp) + addi sp, sp, 32 + CFI_RESTORE (8) + CFI_RESTORE (1) + CFI_DEF_CFA (2, 0) + + // tail jump to libc setjmp + la t1, _ZN14__interception12real__setjmpE + ld t1, 0(t1) + jr t1 + + CFI_ENDPROC +ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(_setjmp)) + +.comm _ZN14__interception14real_sigsetjmpE,8,8 +.globl ASM_SYMBOL_INTERCEPTOR(sigsetjmp) +ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(sigsetjmp)) +ASM_SYMBOL_INTERCEPTOR(sigsetjmp): + CFI_STARTPROC + + // Save frame pointer and return address register + addi sp, sp, -32 + sd ra, 24(sp) + sd s0, 16(sp) + CFI_DEF_CFA_OFFSET (32) + CFI_OFFSET (1, -8) + CFI_OFFSET (8, -16) + + // Adjust the SP for previous frame + addi s0, sp, 32 + CFI_DEF_CFA_REGISTER (8) + + // Save env parameter + sd a0, 8(sp) + sd a1, 0(sp) + CFI_OFFSET (10, -24) + CFI_OFFSET (11, -32) + + // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` + addi a0, s0, 0 + + // call tsan interceptor + call ASM_SYMBOL(__tsan_setjmp) + + // Restore env parameter + ld a0, 8(sp) + ld a1, 0(sp) + CFI_RESTORE (10) + CFI_RESTORE (11) + + // Restore frame/link register + ld s0, 16(sp) + ld ra, 24(sp) + addi sp, sp, 32 + CFI_RESTORE (8) + CFI_RESTORE (1) + CFI_DEF_CFA (2, 0) + + // tail jump to libc setjmp + la t1, _ZN14__interception14real_sigsetjmpE + ld t1, 0(t1) + jr t1 + + CFI_ENDPROC +ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(sigsetjmp)) + +.comm _ZN14__interception16real___sigsetjmpE,8,8 +.globl ASM_SYMBOL_INTERCEPTOR(__sigsetjmp) +ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp)) +ASM_SYMBOL_INTERCEPTOR(__sigsetjmp): + CFI_STARTPROC + + // Save frame pointer and return address register + addi sp, sp, -32 + sd ra, 24(sp) + sd s0, 16(sp) + CFI_DEF_CFA_OFFSET (32) + CFI_OFFSET (1, -8) + CFI_OFFSET (8, -16) + + // Adjust the SP for previous frame + addi s0, sp, 32 + CFI_DEF_CFA_REGISTER (8) + + // Save env parameter + sd a0, 8(sp) + sd a1, 0(sp) + CFI_OFFSET (10, -24) + CFI_OFFSET (11, -32) + + // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` + addi a0, s0, 0 + + // call tsan interceptor + call ASM_SYMBOL(__tsan_setjmp) + + // Restore env parameter + ld a0, 8(sp) + ld a1, 0(sp) + CFI_RESTORE (10) + CFI_RESTORE (11) + + // Restore frame/link register + ld s0, 16(sp) + ld ra, 24(sp) + addi sp, sp, 32 + CFI_RESTORE (8) + CFI_RESTORE (1) + CFI_DEF_CFA (2, 0) + + // tail jump to libc setjmp + la t1, _ZN14__interception16real___sigsetjmpE + ld t1, 0(t1) + jr t1 + + CFI_ENDPROC +ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp)) diff --git a/compiler-rt/test/tsan/map32bit.cpp b/compiler-rt/test/tsan/map32bit.cpp index e8bac22647bb5..9c0760f54b73a 100644 --- a/compiler-rt/test/tsan/map32bit.cpp +++ b/compiler-rt/test/tsan/map32bit.cpp @@ -13,6 +13,7 @@ // XFAIL: target=powerpc64{{.*}} // XFAIL: target=s390x{{.*}} // XFAIL: target=loongarch64{{.*}} +// XFAIL: target=riscv64{{.*}} // MAP_32BIT doesn't exist on OS X and NetBSD. // UNSUPPORTED: darwin,target={{.*netbsd.*}} diff --git a/compiler-rt/test/tsan/mmap_large.cpp b/compiler-rt/test/tsan/mmap_large.cpp index 85ebe7f76b023..a6aca720bf8a1 100644 --- a/compiler-rt/test/tsan/mmap_large.cpp +++ b/compiler-rt/test/tsan/mmap_large.cpp @@ -17,7 +17,8 @@ int main() { #ifdef __x86_64__ const size_t kLog2Size = 39; -#elif defined(__mips64) || defined(__aarch64__) || defined(__loongarch_lp64) +#elif defined(__mips64) || defined(__aarch64__) || \ + defined(__loongarch_lp64) || (defined(__riscv) && __riscv_xlen == 64) const size_t kLog2Size = 32; #elif defined(__powerpc64__) const size_t kLog2Size = 39; diff --git a/compiler-rt/test/tsan/test.h b/compiler-rt/test/tsan/test.h index 7406318243c57..6fd552465823e 100644 --- a/compiler-rt/test/tsan/test.h +++ b/compiler-rt/test/tsan/test.h @@ -76,6 +76,8 @@ unsigned long long monotonic_clock_ns() { const int kPCInc = 1; #elif defined(__sparc__) || defined(__mips__) const int kPCInc = 8; +#elif defined(__riscv) && __riscv_xlen == 64 +const int kPCInc = 2; #else const int kPCInc = 4; #endif From 282ea28f18cf01b350c91450be60e82fff7cbf42 Mon Sep 17 00:00:00 2001 From: Guray Ozen Date: Thu, 12 Oct 2023 16:24:36 -0700 Subject: [PATCH 033/720] [mlir][nvvm] Fix circular dependency in (#68934) BasicPtxBuilder includes NVVMDialect and vice versa. Cmake appereantly forgives that, but this causes bazel build fails. This PR aims to fix that --- mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp b/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp index eeedccf3ba3fc..121504fc20c01 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.h" -#include "mlir/Dialect/LLVMIR/NVVMDialect.h" #include "mlir/Support/LogicalResult.h" #define DEBUG_TYPE "ptx-builder" @@ -28,6 +27,8 @@ using namespace mlir; using namespace NVVM; +static constexpr int64_t kSharedMemorySpace = 3; + static char getRegisterType(Type type) { if (type.isInteger(1)) return 'b'; @@ -43,7 +44,7 @@ static char getRegisterType(Type type) { return 'd'; if (auto ptr = type.dyn_cast()) { // Shared address spaces is addressed with 32-bit pointers. - if (ptr.getAddressSpace() == NVVM::kSharedMemorySpace) { + if (ptr.getAddressSpace() == kSharedMemorySpace) { return 'r'; } return 'l'; From cd0d478e7cfa4ecf44c6fa97c796678cea5e4256 Mon Sep 17 00:00:00 2001 From: Tom Yang Date: Thu, 12 Oct 2023 16:17:26 -0700 Subject: [PATCH 034/720] quick fix for TestDumpDwo PR#66035 introduced a test failure that causes windows build bots to fail. These unit tests shouldn't be running on Windows. Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: --- .../target/dump-separate-debug-info/dwo/TestDumpDwo.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lldb/test/API/commands/target/dump-separate-debug-info/dwo/TestDumpDwo.py b/lldb/test/API/commands/target/dump-separate-debug-info/dwo/TestDumpDwo.py index c58ffdefb4587..3d9d8e8e77adb 100644 --- a/lldb/test/API/commands/target/dump-separate-debug-info/dwo/TestDumpDwo.py +++ b/lldb/test/API/commands/target/dump-separate-debug-info/dwo/TestDumpDwo.py @@ -25,6 +25,7 @@ def get_dwos_from_json(self): @skipIfRemote @skipIfDarwin + @skipIfWindows def test_dwos_loaded_json_output(self): self.build() exe = self.getBuildArtifact("a.out") @@ -47,6 +48,7 @@ def test_dwos_loaded_json_output(self): @skipIfRemote @skipIfDarwin + @skipIfWindows def test_dwos_not_loaded_json_output(self): self.build() exe = self.getBuildArtifact("a.out") @@ -71,6 +73,7 @@ def test_dwos_not_loaded_json_output(self): @skipIfRemote @skipIfDarwin + @skipIfWindows def test_dwos_loaded_table_output(self): self.build() exe = self.getBuildArtifact("a.out") @@ -97,6 +100,7 @@ def test_dwos_loaded_table_output(self): @skipIfRemote @skipIfDarwin + @skipIfWindows def test_dwos_not_loaded_table_output(self): self.build() exe = self.getBuildArtifact("a.out") From 2cea1babefbb726b00573c4cb5c89dc47664dc17 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 12 Oct 2023 16:59:04 -0700 Subject: [PATCH 035/720] [libc++] Remove libc++'s own header (#68806) It doesn't seem to do anything useful beyond what the C library header is doing, so there's no purpose in having one. --- libcxx/include/CMakeLists.txt | 1 - libcxx/include/__std_clang_module | 1 - libcxx/include/csetjmp | 8 ---- libcxx/include/module.modulemap.in | 5 +- libcxx/include/setjmp.h | 46 ------------------- .../depr.c.headers/setjmp_h.compile.pass.cpp | 9 ++-- .../support.runtime/csetjmp.pass.cpp | 30 +++++++----- libcxx/utils/data/ignore_format.txt | 3 -- 8 files changed, 25 insertions(+), 78 deletions(-) delete mode 100644 libcxx/include/setjmp.h diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 4d98b8eed1afd..9b03430a87d83 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -977,7 +977,6 @@ set(files scoped_allocator semaphore set - setjmp.h shared_mutex source_location span diff --git a/libcxx/include/__std_clang_module b/libcxx/include/__std_clang_module index 2644ea98b4907..e2e9e85ffc7d8 100644 --- a/libcxx/include/__std_clang_module +++ b/libcxx/include/__std_clang_module @@ -169,7 +169,6 @@ # include #endif #include -#include #if !defined(_LIBCPP_HAS_NO_THREADS) # include #endif diff --git a/libcxx/include/csetjmp b/libcxx/include/csetjmp index 4c64e8327e3f0..d219c8e6cb225 100644 --- a/libcxx/include/csetjmp +++ b/libcxx/include/csetjmp @@ -35,14 +35,6 @@ void longjmp(jmp_buf env, int val); #include -#ifndef _LIBCPP_SETJMP_H -# error tried including but didn't find libc++'s header. \ - This usually means that your header search paths are not configured properly. \ - The header search paths should contain the C++ Standard Library headers before \ - any C Standard Library, and you are probably using compiler flags that make that \ - not be the case. -#endif - #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in index 09184af2732c8..3e5a8a391b6e6 100644 --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -456,10 +456,7 @@ module std_math_h [system] { header "math.h" export * } -module std_setjmp_h [system] { - header "setjmp.h" - export * -} +// provided by C library. // provided by C library. // FIXME: is missing. // provided by compiler. diff --git a/libcxx/include/setjmp.h b/libcxx/include/setjmp.h deleted file mode 100644 index f4a2bbcb0bd39..0000000000000 --- a/libcxx/include/setjmp.h +++ /dev/null @@ -1,46 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCPP_SETJMP_H -#define _LIBCPP_SETJMP_H - -/* - setjmp.h synopsis - -Macros: - - setjmp - -Types: - - jmp_buf - -void longjmp(jmp_buf env, int val); - -*/ - -#include <__config> - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif - -#if __has_include_next() -# include_next -#endif - -#ifdef __cplusplus - -#ifndef setjmp -#define setjmp(env) setjmp(env) -#endif - -#endif // __cplusplus - -#endif // _LIBCPP_SETJMP_H diff --git a/libcxx/test/std/depr/depr.c.headers/setjmp_h.compile.pass.cpp b/libcxx/test/std/depr/depr.c.headers/setjmp_h.compile.pass.cpp index 7a49a85510202..eaaeecbeb70ec 100644 --- a/libcxx/test/std/depr/depr.c.headers/setjmp_h.compile.pass.cpp +++ b/libcxx/test/std/depr/depr.c.headers/setjmp_h.compile.pass.cpp @@ -7,14 +7,15 @@ //===----------------------------------------------------------------------===// // test +// +// Even though is not provided by libc++, we still test that +// using it with libc++ on the search path will work. #include #include "test_macros.h" -#ifndef setjmp -#error setjmp not defined -#endif - jmp_buf jb; ASSERT_SAME_TYPE(void, decltype(longjmp(jb, 0))); + +void f() { setjmp(jb); } diff --git a/libcxx/test/std/language.support/support.runtime/csetjmp.pass.cpp b/libcxx/test/std/language.support/support.runtime/csetjmp.pass.cpp index 0e3d8f69e99c2..d6d32c371b9e5 100644 --- a/libcxx/test/std/language.support/support.runtime/csetjmp.pass.cpp +++ b/libcxx/test/std/language.support/support.runtime/csetjmp.pass.cpp @@ -9,20 +9,28 @@ // test #include +#include #include -#include "test_macros.h" +int main(int, char**) { + std::jmp_buf jb; -#ifndef setjmp -#error setjmp not defined -#endif + switch (setjmp(jb)) { + // First time we set the buffer, the function should return 0 + case 0: + break; -int main(int, char**) -{ - std::jmp_buf jb; - ((void)jb); // Prevent unused warning - static_assert((std::is_same::value), - "std::is_same::value"); + // If it returned 42, then we're coming from the std::longjmp call below + case 42: + return 0; - return 0; + // Otherwise, something is wrong + default: + return 1; + } + + std::longjmp(jb, 42); + static_assert(std::is_same::value, ""); + + return 1; } diff --git a/libcxx/utils/data/ignore_format.txt b/libcxx/utils/data/ignore_format.txt index 34ab4004ece37..e4f56b00c7583 100644 --- a/libcxx/utils/data/ignore_format.txt +++ b/libcxx/utils/data/ignore_format.txt @@ -418,7 +418,6 @@ libcxx/include/regex libcxx/include/scoped_allocator libcxx/include/semaphore libcxx/include/set -libcxx/include/setjmp.h libcxx/include/span libcxx/include/__split_buffer libcxx/include/sstream @@ -2725,7 +2724,6 @@ libcxx/test/std/depr/depr.c.headers/inttypes_h.compile.pass.cpp libcxx/test/std/depr/depr.c.headers/limits_h.compile.pass.cpp libcxx/test/std/depr/depr.c.headers/locale_h.compile.pass.cpp libcxx/test/std/depr/depr.c.headers/math_h.pass.cpp -libcxx/test/std/depr/depr.c.headers/setjmp_h.compile.pass.cpp libcxx/test/std/depr/depr.c.headers/signal_h.compile.pass.cpp libcxx/test/std/depr/depr.c.headers/stdarg_h.compile.pass.cpp libcxx/test/std/depr/depr.c.headers/stdbool_h.compile.pass.cpp @@ -3941,7 +3939,6 @@ libcxx/test/std/language.support/support.rtti/bad.typeid/bad_typeid.pass.cpp libcxx/test/std/language.support/support.rtti/type.info/type_info.equal.pass.cpp libcxx/test/std/language.support/support.rtti/type.info/type_info_hash.pass.cpp libcxx/test/std/language.support/support.rtti/type.info/type_info.pass.cpp -libcxx/test/std/language.support/support.runtime/csetjmp.pass.cpp libcxx/test/std/language.support/support.runtime/csignal.pass.cpp libcxx/test/std/language.support/support.runtime/cstdarg.pass.cpp libcxx/test/std/language.support/support.runtime/cstdbool.pass.cpp From 5bf701a6687a46fd898621f5077959ff202d716b Mon Sep 17 00:00:00 2001 From: hanhanW Date: Thu, 12 Oct 2023 17:09:14 -0700 Subject: [PATCH 036/720] Revert "[mlir][arith] Canonicalization patterns for `arith.select` (#67809)" This reverts commit 6668d14931c31d3dd80580930b4154e1eb1721b2. --- .../Dialect/Arith/IR/ArithCanonicalization.td | 44 ----------- mlir/lib/Dialect/Arith/IR/ArithOps.cpp | 4 +- mlir/test/Dialect/Arith/canonicalize.mlir | 76 ------------------- 3 files changed, 1 insertion(+), 123 deletions(-) diff --git a/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td b/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td index 9d38513215d3e..f3d84d0b261e8 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td +++ b/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td @@ -233,50 +233,6 @@ def CmpIExtUI : CPred<"$0.getValue() == arith::CmpIPredicate::eq || " "$0.getValue() == arith::CmpIPredicate::ne">> $pred)]>; -//===----------------------------------------------------------------------===// -// SelectOp -//===----------------------------------------------------------------------===// - -// select(not(pred), a, b) => select(pred, b, a) -def SelectNotCond : - Pat<(SelectOp (Arith_XOrIOp $pred, (ConstantLikeMatcher APIntAttr:$ones)), $a, $b), - (SelectOp $pred, $b, $a), - [(IsScalarOrSplatNegativeOne $ones)]>; - -// select(pred, select(pred, a, b), c) => select(pred, a, c) -def RedundantSelectTrue : - Pat<(SelectOp $pred, (SelectOp $pred, $a, $b), $c), - (SelectOp $pred, $a, $c)>; - -// select(pred, a, select(pred, b, c)) => select(pred, a, c) -def RedundantSelectFalse : - Pat<(SelectOp $pred, $a, (SelectOp $pred, $b, $c)), - (SelectOp $pred, $a, $c)>; - -// select(predA, select(predB, x, y), y) => select(and(predA, predB), x, y) -def SelectAndCond : - Pat<(SelectOp $predA, (SelectOp $predB, $x, $y), $y), - (SelectOp (Arith_AndIOp $predA, $predB), $x, $y)>; - -// select(predA, select(predB, y, x), y) => select(and(predA, not(predB)), x, y) -def SelectAndNotCond : - Pat<(SelectOp $predA, (SelectOp $predB, $y, $x), $y), - (SelectOp (Arith_AndIOp $predA, - (Arith_XOrIOp $predB, (Arith_ConstantOp ConstantAttr))), - $x, $y)>; - -// select(predA, x, select(predB, x, y)) => select(or(predA, predB), x, y) -def SelectOrCond : - Pat<(SelectOp $predA, $x, (SelectOp $predB, $x, $y)), - (SelectOp (Arith_OrIOp $predA, $predB), $x, $y)>; - -// select(predA, x, select(predB, y, x)) => select(or(predA, not(predB)), x, y) -def SelectOrNotCond : - Pat<(SelectOp $predA, $x, (SelectOp $predB, $y, $x)), - (SelectOp (Arith_OrIOp $predA, - (Arith_XOrIOp $predB, (Arith_ConstantOp ConstantAttr))), - $x, $y)>; - //===----------------------------------------------------------------------===// // IndexCastOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp index 0ecc288f3b077..ae8a6ef350ce1 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp +++ b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp @@ -2212,9 +2212,7 @@ struct SelectToExtUI : public OpRewritePattern { void arith::SelectOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results.add(context); + results.add(context); } OpFoldResult arith::SelectOp::fold(FoldAdaptor adaptor) { diff --git a/mlir/test/Dialect/Arith/canonicalize.mlir b/mlir/test/Dialect/Arith/canonicalize.mlir index 1b0547c9e8f80..f697f3d01458e 100644 --- a/mlir/test/Dialect/Arith/canonicalize.mlir +++ b/mlir/test/Dialect/Arith/canonicalize.mlir @@ -128,82 +128,6 @@ func.func @selToArith(%arg0: i1, %arg1 : i1, %arg2 : i1) -> i1 { return %res : i1 } -// CHECK-LABEL: @redundantSelectTrue -// CHECK-NEXT: %[[res:.+]] = arith.select %arg0, %arg1, %arg3 -// CHECK-NEXT: return %[[res]] -func.func @redundantSelectTrue(%arg0: i1, %arg1 : i32, %arg2 : i32, %arg3 : i32) -> i32 { - %0 = arith.select %arg0, %arg1, %arg2 : i32 - %res = arith.select %arg0, %0, %arg3 : i32 - return %res : i32 -} - -// CHECK-LABEL: @redundantSelectFalse -// CHECK-NEXT: %[[res:.+]] = arith.select %arg0, %arg3, %arg2 -// CHECK-NEXT: return %[[res]] -func.func @redundantSelectFalse(%arg0: i1, %arg1 : i32, %arg2 : i32, %arg3 : i32) -> i32 { - %0 = arith.select %arg0, %arg1, %arg2 : i32 - %res = arith.select %arg0, %arg3, %0 : i32 - return %res : i32 -} - -// CHECK-LABEL: @selNotCond -// CHECK-NEXT: %[[res1:.+]] = arith.select %arg0, %arg2, %arg1 -// CHECK-NEXT: %[[res2:.+]] = arith.select %arg0, %arg4, %arg3 -// CHECK-NEXT: return %[[res1]], %[[res2]] -func.func @selNotCond(%arg0: i1, %arg1 : i32, %arg2 : i32, %arg3 : i32, %arg4 : i32) -> (i32, i32) { - %one = arith.constant 1 : i1 - %cond1 = arith.xori %arg0, %one : i1 - %cond2 = arith.xori %one, %arg0 : i1 - - %res1 = arith.select %cond1, %arg1, %arg2 : i32 - %res2 = arith.select %cond2, %arg3, %arg4 : i32 - return %res1, %res2 : i32, i32 -} - -// CHECK-LABEL: @selAndCond -// CHECK-NEXT: %[[and:.+]] = arith.andi %arg1, %arg0 -// CHECK-NEXT: %[[res:.+]] = arith.select %[[and]], %arg2, %arg3 -// CHECK-NEXT: return %[[res]] -func.func @selAndCond(%arg0: i1, %arg1: i1, %arg2 : i32, %arg3 : i32) -> i32 { - %sel = arith.select %arg0, %arg2, %arg3 : i32 - %res = arith.select %arg1, %sel, %arg3 : i32 - return %res : i32 -} - -// CHECK-LABEL: @selAndNotCond -// CHECK-NEXT: %[[one:.+]] = arith.constant true -// CHECK-NEXT: %[[not:.+]] = arith.xori %arg0, %[[one]] -// CHECK-NEXT: %[[and:.+]] = arith.andi %arg1, %[[not]] -// CHECK-NEXT: %[[res:.+]] = arith.select %[[and]], %arg3, %arg2 -// CHECK-NEXT: return %[[res]] -func.func @selAndNotCond(%arg0: i1, %arg1: i1, %arg2 : i32, %arg3 : i32) -> i32 { - %sel = arith.select %arg0, %arg2, %arg3 : i32 - %res = arith.select %arg1, %sel, %arg2 : i32 - return %res : i32 -} - -// CHECK-LABEL: @selOrCond -// CHECK-NEXT: %[[or:.+]] = arith.ori %arg1, %arg0 -// CHECK-NEXT: %[[res:.+]] = arith.select %[[or]], %arg2, %arg3 -// CHECK-NEXT: return %[[res]] -func.func @selOrCond(%arg0: i1, %arg1: i1, %arg2 : i32, %arg3 : i32) -> i32 { - %sel = arith.select %arg0, %arg2, %arg3 : i32 - %res = arith.select %arg1, %arg2, %sel : i32 - return %res : i32 -} - -// CHECK-LABEL: @selOrNotCond -// CHECK-NEXT: %[[one:.+]] = arith.constant true -// CHECK-NEXT: %[[not:.+]] = arith.xori %arg0, %[[one]] -// CHECK-NEXT: %[[or:.+]] = arith.ori %arg1, %[[not]] -// CHECK-NEXT: %[[res:.+]] = arith.select %[[or]], %arg3, %arg2 -// CHECK-NEXT: return %[[res]] -func.func @selOrNotCond(%arg0: i1, %arg1: i1, %arg2 : i32, %arg3 : i32) -> i32 { - %sel = arith.select %arg0, %arg2, %arg3 : i32 - %res = arith.select %arg1, %arg3, %sel : i32 - return %res : i32 -} - // Test case: Folding of comparisons with equal operands. // CHECK-LABEL: @cmpi_equal_operands // CHECK-DAG: %[[T:.*]] = arith.constant true From fcb4c0555e2f8f77c335c386e299093329458209 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= Date: Thu, 12 Oct 2023 17:40:16 -0700 Subject: [PATCH 037/720] [mlir][nfc] Rename type constraint for scalable vectors (#68808) For consistency with other predicates, rename: * allDimsScalableVectorTypePred -> IsVectorTypeWithAllDimsScalablePred * IsScalableVectorTypePred -> IsVectorTypeWithAnyDimScalablePred --- mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td | 2 +- mlir/include/mlir/IR/CommonTypeConstraints.td | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td index e09092268082d..049c9759d70bf 100644 --- a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td +++ b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td @@ -27,7 +27,7 @@ include "mlir/Interfaces/InferTypeOpInterface.td" class SMETileType dims, string description> : ShapedContainerType<[datatype], - And<[IsVectorOfRankPred<[2]>, allDimsScalableVectorTypePred, + And<[IsVectorOfRankPred<[2]>, IsVectorTypeWithAllDimsScalablePred, IsVectorOfShape]>, description>; diff --git a/mlir/include/mlir/IR/CommonTypeConstraints.td b/mlir/include/mlir/IR/CommonTypeConstraints.td index c3f18965e343a..59249349921a3 100644 --- a/mlir/include/mlir/IR/CommonTypeConstraints.td +++ b/mlir/include/mlir/IR/CommonTypeConstraints.td @@ -34,8 +34,9 @@ def IsFixedVectorTypePred : CPred<[{::llvm::isa<::mlir::VectorType>($_self) && !::llvm::cast($_self).isScalable()}]>; // Whether a type is a scalable VectorType. -def IsScalableVectorTypePred : CPred<[{::llvm::isa<::mlir::VectorType>($_self) && - ::llvm::cast($_self).isScalable()}]>; +def IsVectorTypeWithAnyDimScalablePred + : CPred<[{::llvm::isa<::mlir::VectorType>($_self) && + ::llvm::cast($_self).isScalable()}]>; // Whether a type is a scalable VectorType, with a single trailing scalable dimension. // Examples: @@ -51,7 +52,7 @@ def IsVectorTypeWithOnlyTrailingDimScalablePred : And<[ ]>; // Whether a type is a VectorType and all dimensions are scalable. -def allDimsScalableVectorTypePred : And<[ +def IsVectorTypeWithAllDimsScalablePred : And<[ IsVectorTypePred, CPred<[{::llvm::cast<::mlir::VectorType>($_self).allDimsScalable()}]> ]>; @@ -414,7 +415,7 @@ class FixedVectorOf allowedTypes> : "fixed-length vector", "::mlir::VectorType">; class ScalableVectorOf allowedTypes> : - ShapedContainerType; // Any vector with a single trailing scalable dimension, with an element type in @@ -447,7 +448,7 @@ class IsFixedVectorOfRankPred allowedRanks> : // Whether the number of elements of a scalable vector is from the given // `allowedRanks` list class IsScalableVectorOfRankPred allowedRanks> : - And<[IsScalableVectorTypePred, + And<[IsVectorTypeWithAnyDimScalablePred, Or($_self).getRank() == }] @@ -497,7 +498,7 @@ class IsFixedVectorOfLengthPred allowedLengths> : // Whether the number of elements of a scalable vector is from the given // `allowedLengths` list class IsScalableVectorOfLengthPred allowedLengths> : - And<[IsScalableVectorTypePred, + And<[IsVectorTypeWithAnyDimScalablePred, Or($_self).getNumElements() == }] From fbe47bf532e83cd802bc452a0b7db9aef9fb2aad Mon Sep 17 00:00:00 2001 From: Aart Bik <39774503+aartbik@users.noreply.github.com> Date: Thu, 12 Oct 2023 17:47:26 -0700 Subject: [PATCH 038/720] [mlir][sparse] remove dead code from utils (#68943) --- .../SparseTensor/Transforms/CodegenUtils.cpp | 107 ------------------ .../SparseTensor/Transforms/CodegenUtils.h | 28 ----- .../Transforms/SparseTensorConversion.cpp | 10 -- 3 files changed, 145 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index dac6d6b64551c..298ff09883556 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -23,54 +23,6 @@ using namespace mlir; using namespace mlir::sparse_tensor; -/// If the tensor is a sparse constant, generates and returns the pair of -/// the constants for the coordinates and the values. -static std::optional> -genSplitSparseConstant(OpBuilder &builder, Location loc, Value tensor) { - if (auto constOp = tensor.getDefiningOp()) { - if (auto a = dyn_cast(constOp.getValue())) { - auto coordinates = builder.create(loc, a.getIndices()); - auto values = builder.create(loc, a.getValues()); - return std::make_pair(coordinates, values); - } - } - return {}; -} - -/// Reads `coordinates[k][0..rank-1]` and `value[k]`, appending the -/// former onto `cvs` and returning the latter. -// FIXME: Change the `rank` argument to `Dimension dimRank` or `Level lvlRank`, -// to clarify its intended meaning. -static Value genCoordsAndValueForSparse(OpBuilder &builder, Location loc, - Value coordinates, Value values, - SmallVectorImpl &cvs, Value k, - unsigned rank) { - for (unsigned d = 0; d < rank; d++) { - Value dim = constantIndex(builder, loc, d); - Value crd = - builder.create(loc, coordinates, ValueRange{k, dim}); - crd = builder.create(loc, builder.getIndexType(), crd); - // builder.create(loc, crd, cvs, dim); - cvs.push_back(crd); - } - return builder.create(loc, values, k); -} - -/// Generates code to read the value from `tensor[ivs]`, and open -/// a conditional for whether the value is non-zero. The generated code -/// looks like the following and the insertion point after this routine -/// is inside the then-branch. -/// if (tensor[ivs] != 0) -/// insert_point -static Value genCoordsAndValueForDense(OpBuilder &builder, Location loc, - Value tensor, - SmallVectorImpl &cvs, - ValueRange ivs) { - Value val = genValueForDense(builder, loc, tensor, ivs); - cvs.append(ivs.begin(), ivs.end()); - return val; -} - //===----------------------------------------------------------------------===// // ExecutionEngine/SparseTensorUtils helper functions. //===----------------------------------------------------------------------===// @@ -450,65 +402,6 @@ void mlir::sparse_tensor::deallocDenseTensor(OpBuilder &builder, Location loc, builder.create(loc, buffer); } -Value mlir::sparse_tensor::genValueForDense(OpBuilder &builder, Location loc, - Value tensor, ValueRange ivs) { - Value val = builder.create(loc, tensor, ivs); - Value cond = genIsNonzero(builder, loc, val); - scf::IfOp ifOp = builder.create(loc, cond, /*else*/ false); - builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - return val; -} - -// FIXME: -// 1. Dense tensors loop should be generated by loop emitter. -// 2. Support reduction variables to propagate SSA chains properly. -// 3. Change the `rank` argument to `Dimension dimRank` or `Level lvlRank`, -// to clarify its meaning. -void mlir::sparse_tensor::genDenseTensorOrSparseConstantIterLoop( - OpBuilder &builder, Location loc, Value src, unsigned rank, - function_ref bodyBuilder) { - // `cvs` is actually the flattened coordinates array for all elements, - // not just for one element (since we do not `SmallVector::clear` after - // each iteration of the body of the loopnest. - SmallVector cvs; - SmallVector lo; - SmallVector hi; - SmallVector st; - const Value zero = constantIndex(builder, loc, 0); - const Value one = constantIndex(builder, loc, 1); - const auto splitSrc = genSplitSparseConstant(builder, loc, src); - if (splitSrc.has_value()) { - const Value srcCoordinates = splitSrc->first; - const Value srcValues = splitSrc->second; - lo.push_back(zero); - hi.push_back(linalg::createOrFoldDimOp(builder, loc, srcValues, 0)); - st.push_back(one); - scf::buildLoopNest(builder, loc, lo, hi, st, {}, - [&](OpBuilder &builder, Location loc, ValueRange ivs, - ValueRange /*args*/) -> scf::ValueVector { - Value val = genCoordsAndValueForSparse( - builder, loc, srcCoordinates, srcValues, cvs, - ivs[0], rank); - bodyBuilder(builder, loc, val, cvs); - return {}; - }); - } else { - for (unsigned i = 0; i < rank; i++) { - lo.push_back(zero); - hi.push_back(linalg::createOrFoldDimOp(builder, loc, src, i)); - st.push_back(one); - } - scf::buildLoopNest(builder, loc, lo, hi, st, {}, - [&](OpBuilder &builder, Location loc, ValueRange ivs, - ValueRange /*args*/) -> scf::ValueVector { - Value val = genCoordsAndValueForDense(builder, loc, - src, cvs, ivs); - bodyBuilder(builder, loc, val, cvs); - return {}; - }); - } -} - void mlir::sparse_tensor::sizesFromSrc(OpBuilder &builder, SmallVectorImpl &sizes, Location loc, Value src) { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 1562ea3f20f73..4673d24fc81f3 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -221,34 +221,6 @@ Value allocDenseTensor(OpBuilder &builder, Location loc, /// Generates code to deallocate a dense buffer. void deallocDenseTensor(OpBuilder &builder, Location loc, Value buffer); -/// Generates code to read the value from `tensor[ivs]`. The generated code -/// looks like the following and the insertion point after this routine is -/// inside the then-branch. -/// if (tensor[ivs] != 0) -/// insert_point -Value genValueForDense(OpBuilder &builder, Location loc, Value tensor, - ValueRange ivs); - -/// Generates the loop structure to iterate over a dense tensor or a sparse -/// tensor constant to support the lowering of dense-to-sparse convert operator. -// -// The loop to iterate a dense tensor: -// for i1 in dim1 -// .. -// for ik in dimk -// val = a[i1,..,ik] -// if val != 0 -// loop-body -// -// The loop to iterate a sparse tensor constant: -// for i in range(NNZ) -// val = values[i] -// [i1,..,ik] = coordinates[i] -// loop-body -void genDenseTensorOrSparseConstantIterLoop( - OpBuilder &builder, Location loc, Value src, unsigned rank, - function_ref bodyBuilder); - /// Populates given sizes array from dense tensor or sparse tensor constant. void sizesFromSrc(OpBuilder &builder, SmallVectorImpl &sizes, Location loc, Value src); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index 4c2d6be29c02f..8e2dbcf864f97 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -241,16 +241,6 @@ class NewCallParams final { return true; } - /// Gets the dimension-to-level mapping. - // - // TODO: This is only ever used for passing into `genAddEltCall`; - // is there a better way to encapsulate that pattern (both to avoid - // this one-off getter, and to avoid potential mixups)? - Value getDimToLvl() const { - assert(isInitialized() && "Must initialize before getDimToLvl"); - return params[kParamDim2Lvl]; - } - /// Generates a function call, with the current static parameters /// and the given dynamic arguments. Value genNewCall(Action action, Value ptr = Value()) { From a712244f3b76cd2ef60b4f3ce5efaf6d4d49c6fe Mon Sep 17 00:00:00 2001 From: Kai Luo Date: Fri, 13 Oct 2023 08:51:11 +0800 Subject: [PATCH 039/720] [PowerPC][JITLink] Support R_PPC64_GOT_PCREL34 (#68658) `R_PPC64_GOT_PCREL34` is generated for pwr10+. --- llvm/include/llvm/ExecutionEngine/JITLink/ppc64.h | 5 +++++ llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp | 6 ++++++ llvm/lib/ExecutionEngine/JITLink/ppc64.cpp | 2 ++ .../JITLink/ppc64/ELF_ppc64_relocations.s | 15 +++++++++++++++ 4 files changed, 28 insertions(+) diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ppc64.h b/llvm/include/llvm/ExecutionEngine/JITLink/ppc64.h index e55edf3082533..ff932f6022bdc 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/ppc64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ppc64.h @@ -51,6 +51,7 @@ enum EdgeKind_ppc64 : Edge::Kind { TOCDelta16HI, TOCDelta16LO, TOCDelta16LODS, + RequestGOTAndTransformToDelta34, CallBranchDelta, // Need to restore r2 after the bl, suggesting the bl is followed by a nop. CallBranchDeltaRestoreTOC, @@ -170,6 +171,10 @@ class TOCTableManager : public TableManager> { // Create TOC section if TOC relocation, PLT or GOT is used. getOrCreateTOCSection(G); return false; + case RequestGOTAndTransformToDelta34: + E.setKind(ppc64::Delta34); + E.setTarget(createEntry(G, E.getTarget())); + return true; default: return false; } diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp index 8ede046e1636e..a095059496dc1 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp @@ -234,6 +234,9 @@ class ELFLinkGraphBuilder_ppc64 if (ELFReloc == ELF::R_PPC64_TLSLD) return make_error("Local-dynamic TLS model is not supported", inconvertibleErrorCode()); + if (ELFReloc == ELF::R_PPC64_PCREL_OPT) + // TODO: Support PCREL optimization, now ignore it. + return Error::success(); auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec); if (!ObjSymbol) @@ -360,6 +363,9 @@ class ELFLinkGraphBuilder_ppc64 case ELF::R_PPC64_PCREL34: Kind = ppc64::Delta34; break; + case ELF::R_PPC64_GOT_PCREL34: + Kind = ppc64::RequestGOTAndTransformToDelta34; + break; case ELF::R_PPC64_GOT_TLSGD16_HA: Kind = ppc64::RequestTLSDescInGOTAndTransformToTOCDelta16HA; break; diff --git a/llvm/lib/ExecutionEngine/JITLink/ppc64.cpp b/llvm/lib/ExecutionEngine/JITLink/ppc64.cpp index b147ffc8dac21..ac4a62a503919 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ppc64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ppc64.cpp @@ -120,6 +120,8 @@ const char *getEdgeKindName(Edge::Kind K) { return "TOCDelta16LO"; case TOCDelta16LODS: return "TOCDelta16LODS"; + case RequestGOTAndTransformToDelta34: + return "RequestGOTAndTransformToDelta34"; case CallBranchDelta: return "CallBranchDelta"; case CallBranchDeltaRestoreTOC: diff --git a/llvm/test/ExecutionEngine/JITLink/ppc64/ELF_ppc64_relocations.s b/llvm/test/ExecutionEngine/JITLink/ppc64/ELF_ppc64_relocations.s index 7e39a20ef6ab8..bcee29d1d34f6 100644 --- a/llvm/test/ExecutionEngine/JITLink/ppc64/ELF_ppc64_relocations.s +++ b/llvm/test/ExecutionEngine/JITLink/ppc64/ELF_ppc64_relocations.s @@ -8,6 +8,7 @@ # RUN: --abs external_addr14_func=0x0880 \ # RUN: --abs external_addr16_data=0x6000 \ # RUN: --abs external_addr32_data=0x36668840 \ +# RUN: --abs pcrel_external_var=0x36668860 \ # RUN: --check %s %t/elf_reloc.o # RUN: llvm-mc --triple=powerpc64-unknown-linux-gnu --filetype=obj -o \ # RUN: %t/elf_reloc.o %s @@ -18,6 +19,7 @@ # RUN: --abs external_addr14_func=0x0880 \ # RUN: --abs external_addr16_data=0x6000 \ # RUN: --abs external_addr32_data=0x36668840 \ +# RUN: --abs pcrel_external_var=0x36668860 \ # RUN: --check %s %t/elf_reloc.o # jitlink-check: section_addr(elf_reloc.o, $__GOT) + 0x8000 = __TOC__ @@ -240,6 +242,19 @@ reloc_rel16: blr .size reloc_rel16, .-reloc_rel16 +# Check R_PPC64_GOT_PCREL34 +# jitlink-check: (got_addr(elf_reloc.o, pcrel_external_var) - reloc_got_pcrel34)[33:0] = \ +# jitlink-check: ((((*{4}(reloc_got_pcrel34)) & 0x3ffff) << 16) | ((*{4}(reloc_got_pcrel34 + 4)) & 0xffff))[33:0] + .global reloc_got_pcrel34 + .p2align 4 + .type reloc_got_pcrel34,@function +reloc_got_pcrel34: + pld 3,pcrel_external_var@got@pcrel(0),1 +.Lpcrel0: + .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) + blr + .size reloc_got_pcrel34,.-reloc_got_pcrel34 + .type .L.str,@object .section .rodata.str1.1,"aMS",@progbits,1 .L.str: From ebaf8d4949830fd4b0a2f6df7aae8eccd39042e4 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Thu, 12 Oct 2023 18:17:37 -0700 Subject: [PATCH 040/720] [mlir][affine] ValueBoundsConstraintSet: Fully compose affine.apply (#68899) Fully compose `affine.apply` ops before adding them to the underlying `FlatLinearConstraints`. This works around a limitation of `FlatLinearConstraints`, which cannot deduce a constant bound if it involves two identical local variables. Details for future improvements of `FlatLinearConstraints`: The constraint set infrastructure fails to compute a constant bound of -8 for the first variable. ``` Domain: 0, Range: 1, Symbols: 4, Locals: 2 8 constraints (None None None None None Local Local const) 1 -1 0 0 0 0 0 0 = 0 0 1 -1 1 0 0 0 0 = 0 0 0 1 0 0 0 -16 0 = 0 0 0 0 1 0 -16 0 -8 = 0 0 0 0 0 -1 0 32 31 >= 0 0 0 0 0 1 0 -32 0 >= 0 0 0 0 0 -1 32 0 31 >= 0 0 0 0 0 1 -32 0 0 >= 0 ``` --- .../Affine/IR/ValueBoundsOpInterfaceImpl.h | 14 ++++++ .../Affine/IR/ValueBoundsOpInterfaceImpl.cpp | 47 +++++++++++++++++-- .../value-bounds-op-interface-impl.mlir | 32 +++++++++++++ .../Dialect/Affine/TestReifyValueBounds.cpp | 11 ++++- 4 files changed, 97 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.h b/mlir/include/mlir/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.h index 2abbabc5bb286..5d4774861bdfd 100644 --- a/mlir/include/mlir/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.h +++ b/mlir/include/mlir/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.h @@ -9,11 +9,25 @@ #ifndef MLIR_DIALECT_AFFINE_IR_VALUEBOUNDSOPINTERFACEIMPL_H #define MLIR_DIALECT_AFFINE_IR_VALUEBOUNDSOPINTERFACEIMPL_H +#include "mlir/Support/LogicalResult.h" + namespace mlir { class DialectRegistry; +class Value; namespace affine { void registerValueBoundsOpInterfaceExternalModels(DialectRegistry ®istry); + +/// Compute whether the given values are equal. Return "failure" if equality +/// could not be determined. `value1`/`value2` must be index-typed. +/// +/// This function is similar to `ValueBoundsConstraintSet::areEqual`. To work +/// around limitations in `FlatLinearConstraints`, this function fully composes +/// `value1` and `value2` (if they are the result of affine.apply ops) before +/// populating the constraint set. The folding/composing logic can see +/// opportunities for simplifications that the constraint set implementation +/// cannot see. +FailureOr fullyComposeAndCheckIfEqual(Value value1, Value value2); } // namespace affine } // namespace mlir diff --git a/mlir/lib/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.cpp b/mlir/lib/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.cpp index 97dd70e4f1d2b..d47c8eb8ccb42 100644 --- a/mlir/lib/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.cpp @@ -27,12 +27,22 @@ struct AffineApplyOpInterface assert(applyOp.getAffineMap().getNumResults() == 1 && "expected single result"); + // Fully compose this affine.apply with other ops because the folding logic + // can see opportunities for simplifying the affine map that + // `FlatLinearConstraints` can currently not see. + AffineMap map = applyOp.getAffineMap(); + SmallVector operands = llvm::to_vector(applyOp.getOperands()); + fullyComposeAffineMapAndOperands(&map, &operands); + // Align affine map result with dims/symbols in the constraint set. - AffineExpr expr = applyOp.getAffineMap().getResult(0); - SmallVector dimReplacements = llvm::to_vector(llvm::map_range( - applyOp.getDimOperands(), [&](Value v) { return cstr.getExpr(v); })); - SmallVector symReplacements = llvm::to_vector(llvm::map_range( - applyOp.getSymbolOperands(), [&](Value v) { return cstr.getExpr(v); })); + AffineExpr expr = map.getResult(0); + SmallVector dimReplacements, symReplacements; + for (int64_t i = 0, e = map.getNumDims(); i < e; ++i) + dimReplacements.push_back(cstr.getExpr(operands[i])); + for (int64_t i = map.getNumDims(), + e = map.getNumDims() + map.getNumSymbols(); + i < e; ++i) + symReplacements.push_back(cstr.getExpr(operands[i])); AffineExpr bound = expr.replaceDimsAndSymbols(dimReplacements, symReplacements); cstr.bound(value) == bound; @@ -92,3 +102,30 @@ void mlir::affine::registerValueBoundsOpInterfaceExternalModels( AffineMinOp::attachInterface(*ctx); }); } + +FailureOr mlir::affine::fullyComposeAndCheckIfEqual(Value value1, + Value value2) { + assert(value1.getType().isIndex() && "expected index type"); + assert(value2.getType().isIndex() && "expected index type"); + + // Subtract the two values/dimensions from each other. If the result is 0, + // both are equal. + Builder b(value1.getContext()); + AffineMap map = AffineMap::get(/*dimCount=*/2, /*symbolCount=*/0, + b.getAffineDimExpr(0) - b.getAffineDimExpr(1)); + // Fully compose the affine map with other ops because the folding logic + // can see opportunities for simplifying the affine map that + // `FlatLinearConstraints` can currently not see. + SmallVector mapOperands; + mapOperands.push_back(value1); + mapOperands.push_back(value2); + affine::fullyComposeAffineMapAndOperands(&map, &mapOperands); + ValueDimList valueDims; + for (Value v : mapOperands) + valueDims.push_back({v, std::nullopt}); + FailureOr bound = ValueBoundsConstraintSet::computeConstantBound( + presburger::BoundType::EQ, map, valueDims); + if (failed(bound)) + return failure(); + return *bound == 0; +} diff --git a/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir index 338c48c5b210b..8acf358c887a9 100644 --- a/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir +++ b/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir @@ -58,3 +58,35 @@ func.func @affine_min_lb(%a: index) -> (index) { %2 = "test.reify_bound"(%1) {type = "LB"}: (index) -> (index) return %2 : index } + +// ----- + +// CHECK-LABEL: func @composed_affine_apply( +// CHECK: %[[cst:.*]] = arith.constant -8 : index +// CHECK: return %[[cst]] +func.func @composed_affine_apply(%i1 : index) -> (index) { + // The ValueBoundsOpInterface implementation of affine.apply fully composes + // the affine map (and its operands) with other affine.apply ops drawn from + // its operands before adding it to the constraint set. This is to work + // around a limitation in `FlatLinearConstraints`, which can currently not + // compute a constant bound for %s. (The affine map simplification logic can + // simplify %s to -8.) + %i2 = affine.apply affine_map<(d0) -> ((d0 floordiv 32) * 16)>(%i1) + %i3 = affine.apply affine_map<(d0) -> ((d0 floordiv 32) * 16 + 8)>(%i1) + %s = affine.apply affine_map<()[s0, s1] -> (s0 - s1)>()[%i2, %i3] + %reified = "test.reify_constant_bound"(%s) {type = "EQ"} : (index) -> (index) + return %reified : index +} + + +// ----- + +// Test for affine::fullyComposeAndCheckIfEqual +func.func @composed_are_equal(%i1 : index) { + %i2 = affine.apply affine_map<(d0) -> ((d0 floordiv 32) * 16)>(%i1) + %i3 = affine.apply affine_map<(d0) -> ((d0 floordiv 32) * 16 + 8)>(%i1) + %s = affine.apply affine_map<()[s0, s1] -> (s0 - s1)>()[%i2, %i3] + // expected-remark @below{{different}} + "test.are_equal"(%i2, %i3) {compose} : (index, index) -> () + return +} diff --git a/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp b/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp index ad017cef1b9ba..6e3c3dff759a2 100644 --- a/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp +++ b/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.h" #include "mlir/Dialect/Affine/Transforms/Transforms.h" #include "mlir/Dialect/Arith/Transforms/Transforms.h" #include "mlir/Dialect/Func/IR/FuncOps.h" @@ -186,8 +187,14 @@ static LogicalResult testEquality(func::FuncOp funcOp) { op->emitOpError("invalid op"); return WalkResult::skip(); } - FailureOr equal = ValueBoundsConstraintSet::areEqual( - op->getOperand(0), op->getOperand(1)); + FailureOr equal = failure(); + if (op->hasAttr("compose")) { + equal = affine::fullyComposeAndCheckIfEqual(op->getOperand(0), + op->getOperand(1)); + } else { + equal = ValueBoundsConstraintSet::areEqual(op->getOperand(0), + op->getOperand(1)); + } if (failed(equal)) { op->emitError("could not determine equality"); } else if (*equal) { From 127cf4ead3f8e33ae0955a4420eab9aad29b63d3 Mon Sep 17 00:00:00 2001 From: zhongyunde 00443407 Date: Fri, 6 Oct 2023 21:54:36 -0400 Subject: [PATCH 041/720] [SVE][InstCombine] Precommit tests for select + ptrue --- .../InstCombine/AArch64/sve-intrinsic-sel.ll | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sel.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sel.ll index b0f059c9de605..0d0c3b9892758 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sel.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sel.ll @@ -12,6 +12,18 @@ define @replace_sel_intrinsic( %p, %1 } +define @sel_ptrue( %a, %b) { +; CHECK-LABEL: @sel_ptrue( +; CHECK-NEXT: [[PRED:%.*]] = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[RES:%.*]] = select [[PRED]], [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret [[RES]] +; + %pred = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %res = call @llvm.aarch64.sve.sel.nxv4i32( %pred, %a, %b) + ret %res +} + +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) declare @llvm.aarch64.sve.sel.nxv4i32(, , ) attributes #0 = { "target-features"="+sve" } From bf90ffb9b4617297053ce7228474e224922f2391 Mon Sep 17 00:00:00 2001 From: zhongyunde 00443407 Date: Wed, 27 Sep 2023 22:42:43 -0400 Subject: [PATCH 042/720] [SVE][InstCombine] Delete redundante sel instructions with ptrue svsel(pture, x, y) => x. depend on D121792 Reviewed By: paulwalker-arm, david-arm --- .../AArch64/AArch64TargetTransformInfo.cpp | 41 +++++++++++-------- .../InstCombine/AArch64/sve-intrinsic-sel.ll | 4 +- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index cded28054f592..d8a0e68d71237 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -798,10 +798,31 @@ instCombineConvertFromSVBool(InstCombiner &IC, IntrinsicInst &II) { return IC.replaceInstUsesWith(II, EarliestReplacement); } +static bool isAllActivePredicate(Value *Pred) { + // Look through convert.from.svbool(convert.to.svbool(...) chain. + Value *UncastedPred; + if (match(Pred, m_Intrinsic( + m_Intrinsic( + m_Value(UncastedPred))))) + // If the predicate has the same or less lanes than the uncasted + // predicate then we know the casting has no effect. + if (cast(Pred->getType())->getMinNumElements() <= + cast(UncastedPred->getType())->getMinNumElements()) + Pred = UncastedPred; + + return match(Pred, m_Intrinsic( + m_ConstantInt())); +} + static std::optional instCombineSVESel(InstCombiner &IC, IntrinsicInst &II) { - auto Select = IC.Builder.CreateSelect(II.getOperand(0), II.getOperand(1), - II.getOperand(2)); + // svsel(ptrue, x, y) => x + auto *OpPredicate = II.getOperand(0); + if (isAllActivePredicate(OpPredicate)) + return IC.replaceInstUsesWith(II, II.getOperand(1)); + + auto Select = + IC.Builder.CreateSelect(OpPredicate, II.getOperand(1), II.getOperand(2)); return IC.replaceInstUsesWith(II, Select); } @@ -1200,22 +1221,6 @@ instCombineSVEVectorFuseMulAddSub(InstCombiner &IC, IntrinsicInst &II, return IC.replaceInstUsesWith(II, Res); } -static bool isAllActivePredicate(Value *Pred) { - // Look through convert.from.svbool(convert.to.svbool(...) chain. - Value *UncastedPred; - if (match(Pred, m_Intrinsic( - m_Intrinsic( - m_Value(UncastedPred))))) - // If the predicate has the same or less lanes than the uncasted - // predicate then we know the casting has no effect. - if (cast(Pred->getType())->getMinNumElements() <= - cast(UncastedPred->getType())->getMinNumElements()) - Pred = UncastedPred; - - return match(Pred, m_Intrinsic( - m_ConstantInt())); -} - static std::optional instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) { Value *Pred = II.getOperand(0); diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sel.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sel.ll index 0d0c3b9892758..c6f08ce828826 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sel.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sel.ll @@ -14,9 +14,7 @@ define @replace_sel_intrinsic( %p, @sel_ptrue( %a, %b) { ; CHECK-LABEL: @sel_ptrue( -; CHECK-NEXT: [[PRED:%.*]] = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[RES:%.*]] = select [[PRED]], [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: ret [[RES]] +; CHECK-NEXT: ret [[A:%.*]] ; %pred = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %res = call @llvm.aarch64.sve.sel.nxv4i32( %pred, %a, %b) From 3104681686b17ea3c611e84b30884a25b84f87b6 Mon Sep 17 00:00:00 2001 From: Kai Luo Date: Fri, 13 Oct 2023 10:59:27 +0800 Subject: [PATCH 043/720] [PowerPC][Atomics] Remove redundant block to clear reservation (#68430) This PR is following what https://reviews.llvm.org/D134783 does for quardword CAS. --- .../PowerPC/PPCExpandAtomicPseudoInsts.cpp | 16 +- llvm/test/CodeGen/PowerPC/atomics-i128.ll | 174 +++++++++++++----- 2 files changed, 132 insertions(+), 58 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp index a9794ddd05667..aee57a5075ff7 100644 --- a/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp @@ -239,23 +239,18 @@ bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128( // loop: // old = lqarx ptr // - // bne 0, fail + // bne 0, exit // succ: // stqcx new ptr // bne 0, loop - // b exit - // fail: - // stqcx old ptr // exit: // .... MachineFunction::iterator MFI = ++MBB.getIterator(); MachineBasicBlock *LoopCmpMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *CmpSuccMBB = MF->CreateMachineBasicBlock(BB); - MachineBasicBlock *CmpFailMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB); MF->insert(MFI, LoopCmpMBB); MF->insert(MFI, CmpSuccMBB); - MF->insert(MFI, CmpFailMBB); MF->insert(MFI, ExitMBB); ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()), MBB.end()); @@ -276,9 +271,9 @@ bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128( BuildMI(CurrentMBB, DL, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE) .addReg(PPC::CR0) - .addMBB(CmpFailMBB); + .addMBB(ExitMBB); CurrentMBB->addSuccessor(CmpSuccMBB); - CurrentMBB->addSuccessor(CmpFailMBB); + CurrentMBB->addSuccessor(ExitMBB); // Build succ. CurrentMBB = CmpSuccMBB; PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo, @@ -288,16 +283,11 @@ bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128( .addImm(PPC::PRED_NE) .addReg(PPC::CR0) .addMBB(LoopCmpMBB); - BuildMI(CurrentMBB, DL, TII->get(PPC::B)).addMBB(ExitMBB); CurrentMBB->addSuccessor(LoopCmpMBB); CurrentMBB->addSuccessor(ExitMBB); - CurrentMBB = CmpFailMBB; - BuildMI(CurrentMBB, DL, SC).addReg(Old).addReg(RA).addReg(RB); - CurrentMBB->addSuccessor(ExitMBB); recomputeLiveIns(*LoopCmpMBB); recomputeLiveIns(*CmpSuccMBB); - recomputeLiveIns(*CmpFailMBB); recomputeLiveIns(*ExitMBB); NMBBI = MBB.end(); MI.eraseFromParent(); diff --git a/llvm/test/CodeGen/PowerPC/atomics-i128.ll b/llvm/test/CodeGen/PowerPC/atomics-i128.ll index 66d727caed69f..f5422a9b7b542 100644 --- a/llvm/test/CodeGen/PowerPC/atomics-i128.ll +++ b/llvm/test/CodeGen/PowerPC/atomics-i128.ll @@ -986,10 +986,7 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) { ; CHECK-NEXT: mr r10, r6 ; CHECK-NEXT: stqcx. r10, 0, r3 ; CHECK-NEXT: bne cr0, .LBB7_1 -; CHECK-NEXT: b .LBB7_4 ; CHECK-NEXT: .LBB7_3: # %entry -; CHECK-NEXT: stqcx. r8, 0, r3 -; CHECK-NEXT: .LBB7_4: # %entry ; CHECK-NEXT: lwsync ; CHECK-NEXT: mr r3, r8 ; CHECK-NEXT: mr r4, r9 @@ -1033,10 +1030,7 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) { ; LE-PWR8-NEXT: mr r10, r7 ; LE-PWR8-NEXT: stqcx. r10, 0, r3 ; LE-PWR8-NEXT: bne cr0, .LBB7_1 -; LE-PWR8-NEXT: b .LBB7_4 ; LE-PWR8-NEXT: .LBB7_3: # %entry -; LE-PWR8-NEXT: stqcx. r8, 0, r3 -; LE-PWR8-NEXT: .LBB7_4: # %entry ; LE-PWR8-NEXT: lwsync ; LE-PWR8-NEXT: mr r3, r9 ; LE-PWR8-NEXT: mr r4, r8 @@ -1057,10 +1051,7 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) { ; AIX64-PWR8-NEXT: mr r10, r6 ; AIX64-PWR8-NEXT: stqcx. r10, 0, r3 ; AIX64-PWR8-NEXT: bne cr0, L..BB7_1 -; AIX64-PWR8-NEXT: b L..BB7_4 ; AIX64-PWR8-NEXT: L..BB7_3: # %entry -; AIX64-PWR8-NEXT: stqcx. r8, 0, r3 -; AIX64-PWR8-NEXT: L..BB7_4: # %entry ; AIX64-PWR8-NEXT: lwsync ; AIX64-PWR8-NEXT: mr r3, r8 ; AIX64-PWR8-NEXT: mr r4, r9 @@ -1121,10 +1112,7 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) { ; CHECK-NEXT: mr r10, r6 ; CHECK-NEXT: stqcx. r10, 0, r3 ; CHECK-NEXT: bne cr0, .LBB8_1 -; CHECK-NEXT: b .LBB8_4 ; CHECK-NEXT: .LBB8_3: # %entry -; CHECK-NEXT: stqcx. r8, 0, r3 -; CHECK-NEXT: .LBB8_4: # %entry ; CHECK-NEXT: mr r3, r8 ; CHECK-NEXT: mr r4, r9 ; CHECK-NEXT: blr @@ -1168,10 +1156,7 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) { ; LE-PWR8-NEXT: mr r10, r7 ; LE-PWR8-NEXT: stqcx. r10, 0, r3 ; LE-PWR8-NEXT: bne cr0, .LBB8_1 -; LE-PWR8-NEXT: b .LBB8_4 ; LE-PWR8-NEXT: .LBB8_3: # %entry -; LE-PWR8-NEXT: stqcx. r8, 0, r3 -; LE-PWR8-NEXT: .LBB8_4: # %entry ; LE-PWR8-NEXT: mr r3, r9 ; LE-PWR8-NEXT: mr r4, r8 ; LE-PWR8-NEXT: blr @@ -1192,10 +1177,7 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) { ; AIX64-PWR8-NEXT: mr r10, r6 ; AIX64-PWR8-NEXT: stqcx. r10, 0, r3 ; AIX64-PWR8-NEXT: bne cr0, L..BB8_1 -; AIX64-PWR8-NEXT: b L..BB8_4 ; AIX64-PWR8-NEXT: L..BB8_3: # %entry -; AIX64-PWR8-NEXT: stqcx. r8, 0, r3 -; AIX64-PWR8-NEXT: L..BB8_4: # %entry ; AIX64-PWR8-NEXT: mr r3, r8 ; AIX64-PWR8-NEXT: mr r4, r9 ; AIX64-PWR8-NEXT: blr @@ -1255,10 +1237,7 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) { ; CHECK-NEXT: mr r10, r6 ; CHECK-NEXT: stqcx. r10, 0, r3 ; CHECK-NEXT: bne cr0, .LBB9_1 -; CHECK-NEXT: b .LBB9_4 ; CHECK-NEXT: .LBB9_3: # %entry -; CHECK-NEXT: stqcx. r8, 0, r3 -; CHECK-NEXT: .LBB9_4: # %entry ; CHECK-NEXT: lwsync ; CHECK-NEXT: mr r3, r8 ; CHECK-NEXT: mr r4, r9 @@ -1303,10 +1282,7 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) { ; LE-PWR8-NEXT: mr r10, r7 ; LE-PWR8-NEXT: stqcx. r10, 0, r3 ; LE-PWR8-NEXT: bne cr0, .LBB9_1 -; LE-PWR8-NEXT: b .LBB9_4 ; LE-PWR8-NEXT: .LBB9_3: # %entry -; LE-PWR8-NEXT: stqcx. r8, 0, r3 -; LE-PWR8-NEXT: .LBB9_4: # %entry ; LE-PWR8-NEXT: lwsync ; LE-PWR8-NEXT: mr r3, r9 ; LE-PWR8-NEXT: mr r4, r8 @@ -1328,10 +1304,7 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) { ; AIX64-PWR8-NEXT: mr r10, r6 ; AIX64-PWR8-NEXT: stqcx. r10, 0, r3 ; AIX64-PWR8-NEXT: bne cr0, L..BB9_1 -; AIX64-PWR8-NEXT: b L..BB9_4 ; AIX64-PWR8-NEXT: L..BB9_3: # %entry -; AIX64-PWR8-NEXT: stqcx. r8, 0, r3 -; AIX64-PWR8-NEXT: L..BB9_4: # %entry ; AIX64-PWR8-NEXT: lwsync ; AIX64-PWR8-NEXT: mr r3, r8 ; AIX64-PWR8-NEXT: mr r4, r9 @@ -1392,10 +1365,7 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) { ; CHECK-NEXT: mr r10, r6 ; CHECK-NEXT: stqcx. r10, 0, r3 ; CHECK-NEXT: bne cr0, .LBB10_1 -; CHECK-NEXT: b .LBB10_4 ; CHECK-NEXT: .LBB10_3: # %entry -; CHECK-NEXT: stqcx. r8, 0, r3 -; CHECK-NEXT: .LBB10_4: # %entry ; CHECK-NEXT: lwsync ; CHECK-NEXT: mr r3, r8 ; CHECK-NEXT: mr r4, r9 @@ -1440,10 +1410,7 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) { ; LE-PWR8-NEXT: mr r10, r7 ; LE-PWR8-NEXT: stqcx. r10, 0, r3 ; LE-PWR8-NEXT: bne cr0, .LBB10_1 -; LE-PWR8-NEXT: b .LBB10_4 ; LE-PWR8-NEXT: .LBB10_3: # %entry -; LE-PWR8-NEXT: stqcx. r8, 0, r3 -; LE-PWR8-NEXT: .LBB10_4: # %entry ; LE-PWR8-NEXT: lwsync ; LE-PWR8-NEXT: mr r3, r9 ; LE-PWR8-NEXT: mr r4, r8 @@ -1465,10 +1432,7 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) { ; AIX64-PWR8-NEXT: mr r10, r6 ; AIX64-PWR8-NEXT: stqcx. r10, 0, r3 ; AIX64-PWR8-NEXT: bne cr0, L..BB10_1 -; AIX64-PWR8-NEXT: b L..BB10_4 ; AIX64-PWR8-NEXT: L..BB10_3: # %entry -; AIX64-PWR8-NEXT: stqcx. r8, 0, r3 -; AIX64-PWR8-NEXT: L..BB10_4: # %entry ; AIX64-PWR8-NEXT: lwsync ; AIX64-PWR8-NEXT: mr r3, r8 ; AIX64-PWR8-NEXT: mr r4, r9 @@ -1529,10 +1493,7 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) { ; CHECK-NEXT: mr r10, r6 ; CHECK-NEXT: stqcx. r10, 0, r3 ; CHECK-NEXT: bne cr0, .LBB11_1 -; CHECK-NEXT: b .LBB11_4 ; CHECK-NEXT: .LBB11_3: # %entry -; CHECK-NEXT: stqcx. r8, 0, r3 -; CHECK-NEXT: .LBB11_4: # %entry ; CHECK-NEXT: lwsync ; CHECK-NEXT: xor r3, r4, r8 ; CHECK-NEXT: xor r4, r5, r9 @@ -1578,10 +1539,7 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) { ; LE-PWR8-NEXT: mr r10, r7 ; LE-PWR8-NEXT: stqcx. r10, 0, r3 ; LE-PWR8-NEXT: bne cr0, .LBB11_1 -; LE-PWR8-NEXT: b .LBB11_4 ; LE-PWR8-NEXT: .LBB11_3: # %entry -; LE-PWR8-NEXT: stqcx. r8, 0, r3 -; LE-PWR8-NEXT: .LBB11_4: # %entry ; LE-PWR8-NEXT: lwsync ; LE-PWR8-NEXT: xor r3, r5, r8 ; LE-PWR8-NEXT: xor r4, r4, r9 @@ -1606,10 +1564,7 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) { ; AIX64-PWR8-NEXT: mr r10, r6 ; AIX64-PWR8-NEXT: stqcx. r10, 0, r3 ; AIX64-PWR8-NEXT: bne cr0, L..BB11_1 -; AIX64-PWR8-NEXT: b L..BB11_4 ; AIX64-PWR8-NEXT: L..BB11_3: # %entry -; AIX64-PWR8-NEXT: stqcx. r8, 0, r3 -; AIX64-PWR8-NEXT: L..BB11_4: # %entry ; AIX64-PWR8-NEXT: lwsync ; AIX64-PWR8-NEXT: xor r3, r4, r8 ; AIX64-PWR8-NEXT: xor r4, r5, r9 @@ -1651,3 +1606,132 @@ entry: %1 = extractvalue { i128, i1 } %0, 1 ret i1 %1 } + +;; TODO: Optimize CAS at exit block when bool value is returned. +define i1 @bool_cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) { +; CHECK-LABEL: bool_cas_weak_acquire_acquire: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: .LBB12_1: # %entry +; CHECK-NEXT: # +; CHECK-NEXT: lqarx r8, 0, r3 +; CHECK-NEXT: xor r11, r9, r5 +; CHECK-NEXT: xor r10, r8, r4 +; CHECK-NEXT: or. r11, r11, r10 +; CHECK-NEXT: bne cr0, .LBB12_3 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: # +; CHECK-NEXT: mr r11, r7 +; CHECK-NEXT: mr r10, r6 +; CHECK-NEXT: stqcx. r10, 0, r3 +; CHECK-NEXT: bne cr0, .LBB12_1 +; CHECK-NEXT: .LBB12_3: # %entry +; CHECK-NEXT: lwsync +; CHECK-NEXT: xor r3, r4, r8 +; CHECK-NEXT: xor r4, r5, r9 +; CHECK-NEXT: or r3, r4, r3 +; CHECK-NEXT: cntlzd r3, r3 +; CHECK-NEXT: rldicl r3, r3, 58, 63 +; CHECK-NEXT: blr +; +; PWR7-LABEL: bool_cas_weak_acquire_acquire: +; PWR7: # %bb.0: # %entry +; PWR7-NEXT: mflr r0 +; PWR7-NEXT: stdu r1, -128(r1) +; PWR7-NEXT: std r0, 144(r1) +; PWR7-NEXT: .cfi_def_cfa_offset 128 +; PWR7-NEXT: .cfi_offset lr, 16 +; PWR7-NEXT: std r5, 120(r1) +; PWR7-NEXT: std r4, 112(r1) +; PWR7-NEXT: addi r4, r1, 112 +; PWR7-NEXT: mr r5, r6 +; PWR7-NEXT: mr r6, r7 +; PWR7-NEXT: li r7, 2 +; PWR7-NEXT: li r8, 2 +; PWR7-NEXT: bl __atomic_compare_exchange_16 +; PWR7-NEXT: nop +; PWR7-NEXT: addi r1, r1, 128 +; PWR7-NEXT: ld r0, 16(r1) +; PWR7-NEXT: mtlr r0 +; PWR7-NEXT: blr +; +; LE-PWR8-LABEL: bool_cas_weak_acquire_acquire: +; LE-PWR8: # %bb.0: # %entry +; LE-PWR8-NEXT: .LBB12_1: # %entry +; LE-PWR8-NEXT: # +; LE-PWR8-NEXT: lqarx r8, 0, r3 +; LE-PWR8-NEXT: xor r11, r9, r4 +; LE-PWR8-NEXT: xor r10, r8, r5 +; LE-PWR8-NEXT: or. r11, r11, r10 +; LE-PWR8-NEXT: bne cr0, .LBB12_3 +; LE-PWR8-NEXT: # %bb.2: # %entry +; LE-PWR8-NEXT: # +; LE-PWR8-NEXT: mr r11, r6 +; LE-PWR8-NEXT: mr r10, r7 +; LE-PWR8-NEXT: stqcx. r10, 0, r3 +; LE-PWR8-NEXT: bne cr0, .LBB12_1 +; LE-PWR8-NEXT: .LBB12_3: # %entry +; LE-PWR8-NEXT: lwsync +; LE-PWR8-NEXT: xor r3, r5, r8 +; LE-PWR8-NEXT: xor r4, r4, r9 +; LE-PWR8-NEXT: or r3, r4, r3 +; LE-PWR8-NEXT: cntlzd r3, r3 +; LE-PWR8-NEXT: rldicl r3, r3, 58, 63 +; LE-PWR8-NEXT: blr +; +; AIX64-PWR8-LABEL: bool_cas_weak_acquire_acquire: +; AIX64-PWR8: # %bb.0: # %entry +; AIX64-PWR8-NEXT: L..BB12_1: # %entry +; AIX64-PWR8-NEXT: # +; AIX64-PWR8-NEXT: lqarx r8, 0, r3 +; AIX64-PWR8-NEXT: xor r11, r9, r5 +; AIX64-PWR8-NEXT: xor r10, r8, r4 +; AIX64-PWR8-NEXT: or. r11, r11, r10 +; AIX64-PWR8-NEXT: bne cr0, L..BB12_3 +; AIX64-PWR8-NEXT: # %bb.2: # %entry +; AIX64-PWR8-NEXT: # +; AIX64-PWR8-NEXT: mr r11, r7 +; AIX64-PWR8-NEXT: mr r10, r6 +; AIX64-PWR8-NEXT: stqcx. r10, 0, r3 +; AIX64-PWR8-NEXT: bne cr0, L..BB12_1 +; AIX64-PWR8-NEXT: L..BB12_3: # %entry +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: xor r3, r4, r8 +; AIX64-PWR8-NEXT: xor r4, r5, r9 +; AIX64-PWR8-NEXT: or r3, r4, r3 +; AIX64-PWR8-NEXT: cntlzd r3, r3 +; AIX64-PWR8-NEXT: rldicl r3, r3, 58, 63 +; AIX64-PWR8-NEXT: blr +; +; PPC-PWR8-LABEL: bool_cas_weak_acquire_acquire: +; PPC-PWR8: # %bb.0: # %entry +; PPC-PWR8-NEXT: mflr r0 +; PPC-PWR8-NEXT: stwu r1, -48(r1) +; PPC-PWR8-NEXT: stw r0, 52(r1) +; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48 +; PPC-PWR8-NEXT: .cfi_offset lr, 4 +; PPC-PWR8-NEXT: mr r4, r3 +; PPC-PWR8-NEXT: lwz r3, 60(r1) +; PPC-PWR8-NEXT: stw r8, 44(r1) +; PPC-PWR8-NEXT: stw r7, 40(r1) +; PPC-PWR8-NEXT: stw r6, 36(r1) +; PPC-PWR8-NEXT: stw r5, 32(r1) +; PPC-PWR8-NEXT: addi r5, r1, 32 +; PPC-PWR8-NEXT: addi r6, r1, 16 +; PPC-PWR8-NEXT: li r7, 2 +; PPC-PWR8-NEXT: li r8, 2 +; PPC-PWR8-NEXT: stw r10, 20(r1) +; PPC-PWR8-NEXT: stw r9, 16(r1) +; PPC-PWR8-NEXT: stw r3, 28(r1) +; PPC-PWR8-NEXT: lwz r3, 56(r1) +; PPC-PWR8-NEXT: stw r3, 24(r1) +; PPC-PWR8-NEXT: li r3, 16 +; PPC-PWR8-NEXT: bl __atomic_compare_exchange +; PPC-PWR8-NEXT: lwz r0, 52(r1) +; PPC-PWR8-NEXT: addi r1, r1, 48 +; PPC-PWR8-NEXT: mtlr r0 +; PPC-PWR8-NEXT: blr +entry: + %0 = cmpxchg weak ptr %a, i128 %cmp, i128 %new acquire acquire + %1 = extractvalue { i128, i1 } %0, 1 + ret i1 %1 +} From b29fb9c9f4ae16233df10d104724d608aa7bdc3a Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 12 Oct 2023 20:25:43 -0700 Subject: [PATCH 044/720] [llvm] Remove "using namespace llvm;" from header files (NFC) --- llvm/include/llvm/ADT/GenericUniformityImpl.h | 2 -- llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h | 2 -- llvm/include/llvm/Transforms/Instrumentation/CFGMST.h | 2 -- 3 files changed, 6 deletions(-) diff --git a/llvm/include/llvm/ADT/GenericUniformityImpl.h b/llvm/include/llvm/ADT/GenericUniformityImpl.h index ddd0746ccd916..b7d0a1228ebfc 100644 --- a/llvm/include/llvm/ADT/GenericUniformityImpl.h +++ b/llvm/include/llvm/ADT/GenericUniformityImpl.h @@ -49,8 +49,6 @@ #define DEBUG_TYPE "uniformity" -using namespace llvm; - namespace llvm { template auto unique(Range &&R) { diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h index 59378bc10873e..50f9aae73dc53 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -84,8 +84,6 @@ #include "llvm/Transforms/Utils/SCCPSolver.h" #include "llvm/Transforms/Utils/SizeOpts.h" -using namespace llvm; - namespace llvm { // Map of potential specializations for each function. The FunctionSpecializer // keeps the discovered specialisation opportunities for the module in a single diff --git a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h index 269441db7a558..6ed8a6c6eaf01 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h +++ b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h @@ -28,8 +28,6 @@ #define DEBUG_TYPE "cfgmst" -using namespace llvm; - namespace llvm { /// An union-find based Minimum Spanning Tree for CFG From 797b76791df4dbfc45f3002d2b9d58029495a63d Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 12 Oct 2023 20:43:03 -0700 Subject: [PATCH 045/720] [IR] Move isConvergenceControlIntrinsic under "namespace llvm" (NFC) While I am at it, this patch removes "using namespace llvm;". --- llvm/include/llvm/IR/GenericConvergenceVerifierImpl.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/include/llvm/IR/GenericConvergenceVerifierImpl.h b/llvm/include/llvm/IR/GenericConvergenceVerifierImpl.h index 2ba81015cb7b6..e2ece30b18641 100644 --- a/llvm/include/llvm/IR/GenericConvergenceVerifierImpl.h +++ b/llvm/include/llvm/IR/GenericConvergenceVerifierImpl.h @@ -31,8 +31,6 @@ #include "llvm/ADT/Twine.h" #include "llvm/IR/Intrinsics.h" -using namespace llvm; - #define Check(C, ...) \ do { \ if (!(C)) { \ @@ -49,6 +47,7 @@ using namespace llvm; } \ } while (false) +namespace llvm { static bool isConvergenceControlIntrinsic(unsigned IntrinsicID) { switch (IntrinsicID) { default: @@ -60,7 +59,6 @@ static bool isConvergenceControlIntrinsic(unsigned IntrinsicID) { } } -namespace llvm { template void GenericConvergenceVerifier::clear() { Tokens.clear(); CI.clear(); From c40902c41c007ae42ab9a1e80008d81ec4eec24f Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 12 Oct 2023 21:02:02 -0700 Subject: [PATCH 046/720] [AMDGPU] Use llvm::endianness::little (NFC) Note that llvm::support::endianness has been renamed to llvm::endianness. This patch replaces support::endianness::little with llvm::endianness::little. --- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp index d93f747bf6f0a..88c1668f62800 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp @@ -414,7 +414,7 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI, if (Desc.operands()[i].OperandType == AMDGPU::OPERAND_REG_IMM_FP64) Imm = Hi_32(Imm); - support::endian::write(CB, Imm, support::endianness::little); + support::endian::write(CB, Imm, llvm::endianness::little); // Only one literal value allowed break; From 2045cca0c3d27f046c96257abfa11c769ce9b1ce Mon Sep 17 00:00:00 2001 From: Aart Bik <39774503+aartbik@users.noreply.github.com> Date: Thu, 12 Oct 2023 21:03:07 -0700 Subject: [PATCH 047/720] [mlir][sparse] add a forwarding insertion to SparseTensorStorage (#68939) --- .../mlir/Dialect/SparseTensor/IR/Enums.h | 5 +- .../ExecutionEngine/SparseTensor/Storage.h | 189 +++++++++++------- .../ExecutionEngine/SparseTensorRuntime.h | 35 ++-- .../Transforms/SparseTensorConversion.cpp | 2 +- .../ExecutionEngine/SparseTensor/Storage.cpp | 7 + .../ExecutionEngine/SparseTensorRuntime.cpp | 55 ++--- .../test/Dialect/SparseTensor/conversion.mlir | 2 +- .../Dialect/SparseTensor/sparse_expand.mlir | 6 +- .../SparseTensor/sparse_fill_zero.mlir | 2 +- 9 files changed, 174 insertions(+), 129 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h index ca9555248130f..f1643d66c26a1 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -143,11 +143,10 @@ constexpr bool isComplexPrimaryType(PrimaryType valTy) { /// The actions performed by @newSparseTensor. enum class Action : uint32_t { kEmpty = 0, - // newSparseTensor no longer handles `kFromFile=1`, so we leave this - // number reserved to help catch any code that still needs updating. + kEmptyForward = 1, kFromCOO = 2, kSparseToSparse = 3, - kEmptyCOO = 4, + kFuture = 4, // not used kToCOO = 5, kToIterator = 6, kPack = 7, diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 607be1cbf956a..0d95c60a08689 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -33,7 +33,6 @@ assert((isCompressedDLT(dlt) || isSingletonDLT(dlt)) && \ "Level is neither compressed nor singleton"); \ } while (false) -#define ASSERT_DENSE_DLT(dlt) assert(isDenseDLT(dlt) && "Level is not dense"); namespace mlir { namespace sparse_tensor { @@ -44,6 +43,12 @@ class SparseTensorEnumeratorBase; template class SparseTensorEnumerator; +//===----------------------------------------------------------------------===// +// +// SparseTensorStorage +// +//===----------------------------------------------------------------------===// + /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects /// of the tensor (e.g., shape, sparsity, mapping). In addition, @@ -97,7 +102,7 @@ class SparseTensorStorageBase { /// Safely looks up the size of the given tensor-dimension. uint64_t getDimSize(uint64_t d) const { - assert(d < getDimRank() && "Dimension is out of bounds"); + assert(d < getDimRank()); return dimSizes[d]; } @@ -106,7 +111,7 @@ class SparseTensorStorageBase { /// Safely looks up the size of the given storage-level. uint64_t getLvlSize(uint64_t l) const { - assert(l < getLvlRank() && "Level is out of bounds"); + assert(l < getLvlRank()); return lvlSizes[l]; } @@ -115,7 +120,7 @@ class SparseTensorStorageBase { /// Safely looks up the type of the given level. DimLevelType getLvlType(uint64_t l) const { - assert(l < getLvlRank() && "Level is out of bounds"); + assert(l < getLvlRank()); return lvlTypes[l]; } @@ -173,6 +178,13 @@ class SparseTensorStorageBase { MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETVALUES) #undef DECL_GETVALUES + /// Element-wise forwarding insertions. The first argument is the + /// dimension-coordinates for the value being inserted. +#define DECL_FORWARDINGINSERT(VNAME, V) \ + virtual void forwardingInsert(const uint64_t *, V); + MLIR_SPARSETENSOR_FOREVERY_V(DECL_FORWARDINGINSERT) +#undef DECL_FORWARDINGINSERT + /// Element-wise insertion in lexicographic coordinate order. The first /// argument is the level-coordinates for the value being inserted. #define DECL_LEXINSERT(VNAME, V) virtual void lexInsert(const uint64_t *, V); @@ -182,24 +194,17 @@ class SparseTensorStorageBase { /// Expanded insertion. Note that this method resets the /// values/filled-switch array back to all-zero/false while only /// iterating over the nonzero elements. - /// - /// Arguments: - /// * `lvlCoords` the level-coordinates shared by the values being inserted. - /// * `values` a map from last-level coordinates to their associated value. - /// * `filled` a map from last-level coordinates to bool, indicating - /// whether `values` contains a valid value to be inserted. - /// * `added` a map from `[0..count)` to last-level coordinates for - /// which `filled` is true and `values` contains the assotiated value. - /// * `count` the size of `added`. - /// * `expsz` the size of the expanded vector (verification only). #define DECL_EXPINSERT(VNAME, V) \ virtual void expInsert(uint64_t *, V *, bool *, uint64_t *, uint64_t, \ uint64_t); MLIR_SPARSETENSOR_FOREVERY_V(DECL_EXPINSERT) #undef DECL_EXPINSERT - /// Finishes insertion. - virtual void endInsert() = 0; + /// Finalizes forwarding insertions. + virtual void endForwardingInsert() = 0; + + /// Finalizes lexicographic insertions. + virtual void endLexInsert() = 0; private: const std::vector dimSizes; @@ -207,6 +212,8 @@ class SparseTensorStorageBase { const std::vector lvlTypes; const std::vector dim2lvlVec; const std::vector lvl2dimVec; + +protected: const MapRef map; // non-owning pointers into dim2lvl/lvl2dim vectors }; @@ -229,7 +236,8 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const uint64_t *lvl2dim) : SparseTensorStorageBase(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim), - positions(lvlRank), coordinates(lvlRank), lvlCursor(lvlRank) {} + positions(lvlRank), coordinates(lvlRank), lvlCursor(lvlRank), lvlCOO() { + } public: /// Constructs a sparse tensor with the given encoding, and allocates @@ -242,11 +250,12 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, const uint64_t *lvlSizes, const DimLevelType *lvlTypes, const uint64_t *dim2lvl, - const uint64_t *lvl2dim, bool initializeValuesIfAllDense); + const uint64_t *lvl2dim, SparseTensorCOO *coo, + bool initializeValuesIfAllDense); /// Constructs a sparse tensor with the given encoding, and initializes /// the contents from the COO. This ctor performs the same heuristic - /// overhead-storage allocation as the ctor taking a `bool`. + /// overhead-storage allocation as the ctor above. SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, const DimLevelType *lvlTypes, const uint64_t *dim2lvl, const uint64_t *lvl2dim, @@ -279,10 +288,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { static SparseTensorStorage * newEmpty(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, const uint64_t *lvlSizes, const DimLevelType *lvlTypes, - const uint64_t *dim2lvl, const uint64_t *lvl2dim) { - return new SparseTensorStorage( - dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim, true); - } + const uint64_t *dim2lvl, const uint64_t *lvl2dim, bool forwarding); /// Allocates a new sparse tensor and initializes it from the given COO. /// The preconditions are as per the `SparseTensorStorageBase` ctor @@ -303,19 +309,6 @@ class SparseTensorStorage final : public SparseTensorStorageBase { /// Allocates a new sparse tensor and initializes it with the contents /// of another sparse tensor. - /// - /// Preconditions: - /// * as per the `SparseTensorStorageBase` ctor. - /// * `src2lvl` must be valid for `srcRank`, must map coordinates valid - /// for `source.getDimSizes()` to coordinates valid for `lvlSizes`, - /// and therefore must be the inverse of `lvl2dim`. - /// * `source` must have the same value type `V`. - /// - /// Asserts: - /// * `dimRank` and `lvlRank` are nonzero. - /// * `srcRank == source.getDimRank()`. - /// * `lvlSizes` contains only nonzero sizes. - /// * `source.getDimSizes()` is a refinement of `dimShape`. // // TODO: The `dimRank` and `dimShape` arguments are only used for // verifying that the source tensor has the expected shape. So if we @@ -337,10 +330,6 @@ class SparseTensorStorage final : public SparseTensorStorageBase { /// Allocates a new sparse tensor and initialize it with the data stored level /// buffers directly. - /// - /// Precondition: - /// * as per the `SparseTensorStorageBase` ctor. - /// * the data integrity stored in `buffers` is guaranteed by users already. static SparseTensorStorage *packFromLvlBuffers( uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank, const uint64_t *lvlSizes, const DimLevelType *lvlTypes, @@ -352,12 +341,12 @@ class SparseTensorStorage final : public SparseTensorStorageBase { /// Partially specialize these getter methods based on template types. void getPositions(std::vector

**out, uint64_t lvl) final { assert(out && "Received nullptr for out parameter"); - assert(lvl < getLvlRank() && "Level is out of bounds"); + assert(lvl < getLvlRank()); *out = &positions[lvl]; } void getCoordinates(std::vector **out, uint64_t lvl) final { assert(out && "Received nullptr for out parameter"); - assert(lvl < getLvlRank() && "Level is out of bounds"); + assert(lvl < getLvlRank()); *out = &coordinates[lvl]; } void getValues(std::vector **out) final { @@ -365,15 +354,23 @@ class SparseTensorStorage final : public SparseTensorStorageBase { *out = &values; } + /// Returns coordinate at given position. uint64_t getCrd(uint64_t lvl, uint64_t pos) const final { ASSERT_COMPRESSED_OR_SINGLETON_LVL(lvl); - assert(pos < coordinates[lvl].size() && "Position is out of bounds"); + assert(pos < coordinates[lvl].size()); return coordinates[lvl][pos]; // Converts the stored `C` into `uint64_t`. } + /// Partially specialize forwarding insertions based on template types. + void forwardingInsert(const uint64_t *dimCoords, V val) final { + assert(dimCoords && lvlCOO); + map.pushforward(dimCoords, lvlCursor.data()); + lvlCOO->add(lvlCursor, val); + } + /// Partially specialize lexicographical insertions based on template types. void lexInsert(const uint64_t *lvlCoords, V val) final { - assert(lvlCoords && "Received nullptr for level-coordinates"); + assert(lvlCoords); // TODO: get rid of this! canonicalize all-dense "sparse" array into dense // tensors. bool allDense = std::all_of(getLvlTypes().begin(), getLvlTypes().end(), @@ -429,8 +426,22 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } } + /// Finalizes forwarding insertions. + void endForwardingInsert() final { + // Ensure lvlCOO is sorted. + assert(lvlCOO); + lvlCOO->sort(); + // Now actually insert the `elements`. + const auto &elements = lvlCOO->getElements(); + const uint64_t nse = elements.size(); + assert(values.size() == 0); + values.reserve(nse); + fromCOO(elements, 0, nse, 0); + delete lvlCOO; + } + /// Finalizes lexicographic insertions. - void endInsert() final { + void endLexInsert() final { if (values.empty()) finalizeSegment(0); else @@ -533,7 +544,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { /// does not check that `pos` is semantically valid (i.e., larger than /// the previous position and smaller than `coordinates[lvl].capacity()`). void appendPos(uint64_t lvl, uint64_t pos, uint64_t count = 1) { - assert(isCompressedLvl(lvl) && "Level is not compressed"); + assert(isCompressedLvl(lvl)); positions[lvl].insert(positions[lvl].end(), count, detail::checkOverflowCast

(pos)); } @@ -552,7 +563,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { if (isCompressedDLT(dlt) || isSingletonDLT(dlt)) { coordinates[lvl].push_back(detail::checkOverflowCast(crd)); } else { // Dense level. - ASSERT_DENSE_DLT(dlt); + assert(isDenseDLT(dlt)); assert(crd >= full && "Coordinate was already filled"); if (crd == full) return; // Short-circuit, since it'll be a nop. @@ -572,7 +583,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { // Subscript assignment to `std::vector` requires that the `pos`-th // entry has been initialized; thus we must be sure to check `size()` // here, instead of `capacity()` as would be ideal. - assert(pos < coordinates[lvl].size() && "Position is out of bounds"); + assert(pos < coordinates[lvl].size()); coordinates[lvl][pos] = detail::checkOverflowCast(crd); } @@ -644,7 +655,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } else if (isSingletonDLT(dlt)) { return; // Nothing to finalize. } else { // Dense dimension. - ASSERT_DENSE_DLT(dlt); + assert(isDenseDLT(dlt)); const uint64_t sz = getLvlSizes()[l]; assert(sz >= full && "Segment is overfull"); count = detail::checkedMul(count, sz - full); @@ -663,7 +674,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { void endPath(uint64_t diffLvl) { const uint64_t lvlRank = getLvlRank(); const uint64_t lastLvl = lvlRank - 1; - assert(diffLvl <= lvlRank && "Level-diff is out of bounds"); + assert(diffLvl <= lvlRank); const uint64_t stop = lvlRank - diffLvl; for (uint64_t i = 0; i < stop; ++i) { const uint64_t l = lastLvl - i; @@ -676,7 +687,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full, V val) { const uint64_t lvlRank = getLvlRank(); - assert(diffLvl <= lvlRank && "Level-diff is out of bounds"); + assert(diffLvl <= lvlRank); for (uint64_t l = diffLvl; l < lvlRank; ++l) { const uint64_t c = lvlCoords[l]; appendCrd(l, full, c); @@ -716,11 +727,17 @@ class SparseTensorStorage final : public SparseTensorStorageBase { std::vector> coordinates; std::vector values; std::vector lvlCursor; // cursor for lexicographic insertion. + SparseTensorCOO *lvlCOO; // COO used during forwarding }; #undef ASSERT_COMPRESSED_OR_SINGLETON_LVL //===----------------------------------------------------------------------===// +// +// SparseTensorEnumerator +// +//===----------------------------------------------------------------------===// + /// A (higher-order) function object for enumerating the elements of some /// `SparseTensorStorage` under a permutation. That is, the `forallElements` /// method encapsulates the loop-nest for enumerating the elements of @@ -808,7 +825,6 @@ class SparseTensorEnumeratorBase { std::vector trgCursor; // in target order. }; -//===----------------------------------------------------------------------===// template class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; @@ -848,8 +864,7 @@ class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { // Recover the `` type parameters of `src`. const auto &src = static_cast(this->src); if (l == src.getLvlRank()) { - assert(parentPos < src.values.size() && - "Value position is out of bounds"); + assert(parentPos < src.values.size()); // TODO: yield(this->trgCursor, src.values[parentPos]); return; @@ -860,13 +875,12 @@ class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { // Look up the bounds of the `l`-level segment determined by the // `(l - 1)`-level position `parentPos`. const std::vector

&positionsL = src.positions[l]; - assert(parentPos + 1 < positionsL.size() && - "Parent position is out of bounds"); + assert(parentPos + 1 < positionsL.size()); const uint64_t pstart = static_cast(positionsL[parentPos]); const uint64_t pstop = static_cast(positionsL[parentPos + 1]); // Loop-invariant code for looking up the `l`-level coordinates. const std::vector &coordinatesL = src.coordinates[l]; - assert(pstop <= coordinatesL.size() && "Stop position is out of bounds"); + assert(pstop <= coordinatesL.size()); for (uint64_t pos = pstart; pos < pstop; ++pos) { cursorL = static_cast(coordinatesL[pos]); forallElements(yield, pos, l + 1); @@ -875,7 +889,7 @@ class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { cursorL = src.getCrd(l, parentPos); forallElements(yield, parentPos, l + 1); } else { // Dense level. - ASSERT_DENSE_DLT(dlt); + assert(isDenseDLT(dlt)); const uint64_t sz = src.getLvlSizes()[l]; const uint64_t pstart = parentPos * sz; for (uint64_t c = 0; c < sz; ++c) { @@ -887,6 +901,11 @@ class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { }; //===----------------------------------------------------------------------===// +// +// SparseTensorNNZ +// +//===----------------------------------------------------------------------===// + /// Statistics regarding the number of nonzero subtensors in /// a source tensor, for direct sparse=>sparse conversion a la /// . @@ -959,7 +978,23 @@ class SparseTensorNNZ final { }; //===----------------------------------------------------------------------===// -// Definitions of the ctors and factories of `SparseTensorStorage`. +// +// SparseTensorStorage Factories +// +//===----------------------------------------------------------------------===// + +template +SparseTensorStorage *SparseTensorStorage::newEmpty( + uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, + const uint64_t *lvlSizes, const DimLevelType *lvlTypes, + const uint64_t *dim2lvl, const uint64_t *lvl2dim, bool forwarding) { + SparseTensorCOO *lvlCOO = nullptr; + if (forwarding) + lvlCOO = new SparseTensorCOO(lvlRank, lvlSizes); + return new SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, + lvlTypes, dim2lvl, lvl2dim, lvlCOO, + !forwarding); +} // TODO: MapRef template @@ -967,8 +1002,7 @@ SparseTensorStorage *SparseTensorStorage::newFromCOO( uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank, const DimLevelType *lvlTypes, const uint64_t *dim2lvl, const uint64_t *lvl2dim, SparseTensorCOO &lvlCOO) { - assert(dimShape && "Got nullptr for dimension shape"); - assert(lvl2dim && "Got nullptr for level-to-dimension mapping"); + assert(dimShape && dim2lvl && lvl2dim); const auto &lvlSizes = lvlCOO.getDimSizes(); assert(lvlRank == lvlSizes.size() && "Level-rank mismatch"); // Must reconstruct `dimSizes` from `lvlSizes`. While this is easy @@ -1026,14 +1060,21 @@ SparseTensorStorage *SparseTensorStorage::packFromLvlBuffers( return tensor; } +//===----------------------------------------------------------------------===// +// +// SparseTensorStorage Constructors +// +//===----------------------------------------------------------------------===// + template SparseTensorStorage::SparseTensorStorage( uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, const uint64_t *lvlSizes, const DimLevelType *lvlTypes, - const uint64_t *dim2lvl, const uint64_t *lvl2dim, + const uint64_t *dim2lvl, const uint64_t *lvl2dim, SparseTensorCOO *coo, bool initializeValuesIfAllDense) : SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim) { + lvlCOO = coo; // Provide hints on capacity of positions and coordinates. // TODO: needs much fine-tuning based on actual sparsity; currently // we reserve position/coordinate space based on all previous dense @@ -1054,7 +1095,7 @@ SparseTensorStorage::SparseTensorStorage( sz = 1; allDense = false; } else { // Dense level. - ASSERT_DENSE_DLT(dlt); + assert(isDenseDLT(dlt)); sz = detail::checkedMul(sz, lvlSizes[l]); } } @@ -1062,6 +1103,7 @@ SparseTensorStorage::SparseTensorStorage( values.resize(sz, 0); } +// TODO: share more code with forwarding methods? template SparseTensorStorage::SparseTensorStorage( // NOLINT uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, @@ -1069,14 +1111,14 @@ SparseTensorStorage::SparseTensorStorage( // NOLINT const uint64_t *lvl2dim, SparseTensorCOO &lvlCOO) : SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlCOO.getDimSizes().data(), lvlTypes, dim2lvl, - lvl2dim, false) { + lvl2dim, nullptr, false) { + // Ensure lvlCOO is sorted. assert(lvlRank == lvlCOO.getDimSizes().size() && "Level-rank mismatch"); - // Ensure the preconditions of `fromCOO`. (One is already ensured by - // using `lvlSizes = lvlCOO.getDimSizes()` in the ctor above.) lvlCOO.sort(); // Now actually insert the `elements`. const auto &elements = lvlCOO.getElements(); const uint64_t nse = elements.size(); + assert(values.size() == 0); values.reserve(nse); fromCOO(elements, 0, nse, 0); } @@ -1123,7 +1165,7 @@ SparseTensorStorage::SparseTensorStorage( if (isCompressedDLT(dlt) || isSingletonDLT(dlt)) coordinates[l].resize(parentSz, 0); else - ASSERT_DENSE_DLT(dlt); // Future-proofing. + assert(isDenseDLT(dlt)); } values.resize(parentSz, 0); // Both allocate and zero-initialize. } @@ -1137,7 +1179,7 @@ SparseTensorStorage::SparseTensorStorage( // however, it's semantically invalid here since that entry // does not represent a segment of `coordinates[l]`. Moreover, that // entry must be immutable for `assembledSize` to remain valid. - assert(parentPos < parentSz && "Parent position is out of bounds"); + assert(parentPos < parentSz); const uint64_t currentPos = positions[l][parentPos]; // This increment won't overflow the `P` type, since it can't // exceed the original value of `positions[l][parentPos+1]` @@ -1150,12 +1192,12 @@ SparseTensorStorage::SparseTensorStorage( writeCrd(l, parentPos, lvlCoords[l]); // the new parentPos equals the old parentPos. } else { // Dense level. - ASSERT_DENSE_DLT(dlt); + assert(isDenseDLT(dlt)); parentPos = parentPos * getLvlSizes()[l] + lvlCoords[l]; } parentSz = assembledSize(parentSz, l); } - assert(parentPos < values.size() && "Value position is out of bounds"); + assert(parentPos < values.size()); values[parentPos] = val; }); // The finalizeYieldPos loop @@ -1175,8 +1217,7 @@ SparseTensorStorage::SparseTensorStorage( } else { // Both dense and singleton are no-ops for the finalizeYieldPos loop. // This assertion is for future-proofing. - assert((isDenseDLT(dlt) || isSingletonDLT(dlt)) && - "Level is neither dense nor singleton"); + assert((isDenseDLT(dlt) || isSingletonDLT(dlt))); } parentSz = assembledSize(parentSz, l); } @@ -1210,7 +1251,7 @@ SparseTensorStorage::SparseTensorStorage( positions[l].assign(posPtr, posPtr + parentSz + 1); coordinates[l].assign(crdPtr, crdPtr + positions[l][parentSz]); } else { - assert(isDenseLvl(l) && "Level is not dense"); + assert(isDenseLvl(l)); } parentSz = assembledSize(parentSz, l); } @@ -1235,8 +1276,6 @@ SparseTensorStorage::SparseTensorStorage( values.assign(valPtr, valPtr + parentSz); } -#undef ASSERT_DENSE_DLT - } // namespace sparse_tensor } // namespace mlir diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index e723a35434584..f9312c866f363 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -37,7 +37,6 @@ extern "C" { // //===----------------------------------------------------------------------===// -/// The @newSparseTensor function for constructing a new sparse tensor. /// This is the "swiss army knife" method for materializing sparse /// tensors into the computation. The types of the `ptr` argument and /// the result depend on the action, as explained in the following table @@ -45,14 +44,13 @@ extern "C" { /// a coordinate-scheme object, and "Iterator" means an iterator object). /// /// Action: `ptr`: Returns: -/// kEmpty unused STS, empty -/// kEmptyCOO unused COO, empty -/// kFromFile char* filename STS, read from the file +/// kEmpty - STS, empty +/// kEmptyForward - STS, empty, with forwarding COO /// kFromCOO COO STS, copied from the COO source -/// kToCOO STS COO, copied from the STS source /// kSparseToSparse STS STS, copied from the STS source -/// kToIterator STS Iterator, call @getNext to use and -/// @delSparseTensorIterator to free. +/// kToCOO STS COO, copied from the STS source +/// kToIterator STS Iterator (@getNext/@delSparseTensorIterator) +/// kPack buffers STS, from level buffers MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensor( // NOLINT StridedMemRefType *dimSizesRef, StridedMemRefType *lvlSizesRef, @@ -84,19 +82,15 @@ MLIR_SPARSETENSOR_FOREVERY_O(DECL_SPARSEPOSITIONS) MLIR_SPARSETENSOR_FOREVERY_O(DECL_SPARSECOORDINATES) #undef DECL_SPARSECOORDINATES -/// Coordinate-scheme method for adding a new element. -/// TODO: remove dim2lvl -#define DECL_ADDELT(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_addElt##VNAME( \ - void *lvlCOO, StridedMemRefType *vref, \ - StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *dim2lvlRef); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_ADDELT) -#undef DECL_ADDELT +/// Tensor-storage method for a dim to lvl forwarding insertion. +#define DECL_FORWARDINGINSERT(VNAME, V) \ + MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_forwardingInsert##VNAME( \ + void *tensor, StridedMemRefType *vref, \ + StridedMemRefType *dimCoordsRef); \ + MLIR_SPARSETENSOR_FOREVERY_V(DECL_FORWARDINGINSERT) +#undef DECL_FORWARDINGINSERT /// Coordinate-scheme method for getting the next element while iterating. -/// The `cref` argument uses the same coordinate-space as the `iter` (which -/// can be either dim- or lvl-coords, depending on context). #define DECL_GETNEXT(VNAME, V) \ MLIR_CRUNNERUTILS_EXPORT bool _mlir_ciface_getNext##VNAME( \ void *iter, StridedMemRefType *cref, \ @@ -185,8 +179,11 @@ MLIR_CRUNNERUTILS_EXPORT index_type sparseLvlSize(void *tensor, index_type l); /// Tensor-storage method to get the size of the given dimension. MLIR_CRUNNERUTILS_EXPORT index_type sparseDimSize(void *tensor, index_type d); +/// Tensor-storage method to finalize forwarding insertions. +MLIR_CRUNNERUTILS_EXPORT void endForwardingInsert(void *tensor); + /// Tensor-storage method to finalize lexicographic insertions. -MLIR_CRUNNERUTILS_EXPORT void endInsert(void *tensor); +MLIR_CRUNNERUTILS_EXPORT void endLexInsert(void *tensor); /// Coordinate-scheme method to write to file in extended FROSTT format. #define DECL_OUTSPARSETENSOR(VNAME, V) \ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index 8e2dbcf864f97..ce3b49915319c 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -596,7 +596,7 @@ class SparseTensorLoadConverter : public OpConversionPattern { ConversionPatternRewriter &rewriter) const override { if (op.getHasInserts()) { // Finalize any pending insertions. - StringRef name = "endInsert"; + StringRef name = "endLexInsert"; createFuncCall(rewriter, op->getLoc(), name, {}, adaptor.getOperands(), EmitCInterface::Off); } diff --git a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp index 1d654cae3b4b1..050dff2da1fa4 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp @@ -80,6 +80,13 @@ MLIR_SPARSETENSOR_FOREVERY_FIXED_O(IMPL_GETCOORDINATES) MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETVALUES) #undef IMPL_GETVALUES +#define IMPL_FORWARDINGINSERT(VNAME, V) \ + void SparseTensorStorageBase::forwardingInsert(const uint64_t *, V) { \ + FATAL_PIV("forwardingInsert" #VNAME); \ + } +MLIR_SPARSETENSOR_FOREVERY_V(IMPL_FORWARDINGINSERT) +#undef IMPL_FORWARDINGINSERT + #define IMPL_LEXINSERT(VNAME, V) \ void SparseTensorStorageBase::lexInsert(const uint64_t *, V) { \ FATAL_PIV("lexInsert" #VNAME); \ diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 83ceecaf5a30e..cd1b663578a48 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -177,9 +177,16 @@ extern "C" { #define CASE(p, c, v, P, C, V) \ if (posTp == (p) && crdTp == (c) && valTp == (v)) { \ switch (action) { \ - case Action::kEmpty: \ + case Action::kEmpty: { \ return SparseTensorStorage::newEmpty( \ - dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim); \ + dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim, \ + false); \ + } \ + case Action::kEmptyForward: { \ + return SparseTensorStorage::newEmpty( \ + dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim, \ + true); \ + } \ case Action::kFromCOO: { \ assert(ptr && "Received nullptr for SparseTensorCOO object"); \ auto &coo = *static_cast *>(ptr); \ @@ -193,8 +200,9 @@ extern "C" { dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim, \ dimRank, tensor); \ } \ - case Action::kEmptyCOO: \ - return new SparseTensorCOO(lvlRank, lvlSizes); \ + case Action::kFuture: { \ + break; \ + } \ case Action::kToCOO: { \ assert(ptr && "Received nullptr for SparseTensorStorage object"); \ auto &tensor = *static_cast *>(ptr); \ @@ -405,29 +413,20 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES) #undef IMPL_SPARSECOORDINATES #undef IMPL_GETOVERHEAD -// TODO: use MapRef here for translation of coordinates -// TODO: remove dim2lvl -#define IMPL_ADDELT(VNAME, V) \ - void *_mlir_ciface_addElt##VNAME( \ - void *lvlCOO, StridedMemRefType *vref, \ - StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *dim2lvlRef) { \ - assert(lvlCOO &&vref); \ +#define IMPL_FORWARDINGINSERT(VNAME, V) \ + void _mlir_ciface_forwardingInsert##VNAME( \ + void *t, StridedMemRefType *vref, \ + StridedMemRefType *dimCoordsRef) { \ + assert(t &&vref); \ ASSERT_NO_STRIDE(dimCoordsRef); \ - ASSERT_NO_STRIDE(dim2lvlRef); \ - const uint64_t rank = MEMREF_GET_USIZE(dimCoordsRef); \ - ASSERT_USIZE_EQ(dim2lvlRef, rank); \ const index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef); \ - const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ - std::vector lvlCoords(rank); \ - for (uint64_t d = 0; d < rank; ++d) \ - lvlCoords[dim2lvl[d]] = dimCoords[d]; \ - V *value = MEMREF_GET_PAYLOAD(vref); \ - static_cast *>(lvlCOO)->add(lvlCoords, *value); \ - return lvlCOO; \ + assert(dimCoords); \ + const V *value = MEMREF_GET_PAYLOAD(vref); \ + static_cast(t)->forwardingInsert(dimCoords, \ + *value); \ } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_ADDELT) -#undef IMPL_ADDELT +MLIR_SPARSETENSOR_FOREVERY_V(IMPL_FORWARDINGINSERT) +#undef IMPL_FORWARDINGINSERT // NOTE: the `cref` argument uses the same coordinate-space as the `iter` // (which can be either dim- or lvl-coords, depending on context). @@ -692,8 +691,12 @@ index_type sparseDimSize(void *tensor, index_type d) { return static_cast(tensor)->getDimSize(d); } -void endInsert(void *tensor) { - return static_cast(tensor)->endInsert(); +void endForwardingInsert(void *tensor) { + return static_cast(tensor)->endForwardingInsert(); +} + +void endLexInsert(void *tensor) { + return static_cast(tensor)->endLexInsert(); } #define IMPL_OUTSPARSETENSOR(VNAME, V) \ diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 29093a055ab2e..96300a98a6a4b 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -296,7 +296,7 @@ func.func @sparse_reconstruct(%arg0: tensor<128xf32, #SparseVector>) -> tensor<1 // CHECK-LABEL: func @sparse_reconstruct_ins( // CHECK-SAME: %[[A:.*]]: !llvm.ptr -// CHECK: call @endInsert(%[[A]]) : (!llvm.ptr) -> () +// CHECK: call @endLexInsert(%[[A]]) : (!llvm.ptr) -> () // CHECK: return %[[A]] : !llvm.ptr func.func @sparse_reconstruct_ins(%arg0: tensor<128xf32, #SparseVector>) -> tensor<128xf32, #SparseVector> { %0 = sparse_tensor.load %arg0 hasInserts : tensor<128xf32, #SparseVector> diff --git a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir index d19d7fe2871d6..9d8db10aa4230 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir @@ -62,7 +62,7 @@ // CHECK-CONVERT: memref.dealloc %[[A]] : memref // CHECK-CONVERT: memref.dealloc %[[B]] : memref // CHECK-CONVERT: memref.dealloc %[[C]] : memref -// CHECK-CONVERT: call @endInsert +// CHECK-CONVERT: call @endLexInsert // func.func @kernel(%arga: tensor) -> tensor { %c0 = arith.constant 0 : index @@ -115,7 +115,7 @@ func.func @kernel(%arga: tensor) -> tensor { // CHECK-CONVERT: memref.dealloc %[[A]] : memref // CHECK-CONVERT: memref.dealloc %[[B]] : memref // CHECK-CONVERT: memref.dealloc %[[C]] : memref -// CHECK-CONVERT: call @endInsert +// CHECK-CONVERT: call @endLexInsert // func.func @matmul1(%A: tensor<8x2xf64, #CSR>, %B: tensor<2x4xf64, #CSR>) -> tensor<8x4xf64, #CSR> { @@ -163,7 +163,7 @@ func.func @matmul1(%A: tensor<8x2xf64, #CSR>, // CHECK-CONVERT: memref.dealloc %[[A]] : memref // CHECK-CONVERT: memref.dealloc %[[B]] : memref // CHECK-CONVERT: memref.dealloc %[[C]] : memref -// CHECK-CONVERT: call @endInsert +// CHECK-CONVERT: call @endLexInsert // func.func @matmul2(%A: tensor<8x2xf64, #CSC>, %B: tensor<2x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> { diff --git a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir index 7d852ca9cc1aa..8ecbc1da965a1 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir @@ -112,7 +112,7 @@ // CHECK: memref.dealloc %[[VAL_20]] : memref<300xf64> // CHECK: memref.dealloc %[[VAL_22]] : memref<300xi1> // CHECK: memref.dealloc %[[VAL_24]] : memref<300xindex> -// CHECK: call @endInsert(%[[VAL_19]]) : (!llvm.ptr) -> () +// CHECK: call @endLexInsert(%[[VAL_19]]) : (!llvm.ptr) -> () // CHECK: return %[[VAL_19]] : !llvm.ptr // CHECK: } func.func @fill_zero_after_alloc(%arg0: tensor<100x200xf64, #DCSR>, From 4a0ccfa865437fe29ef2ecb18152df7694dddb7f Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 12 Oct 2023 21:21:44 -0700 Subject: [PATCH 048/720] Use llvm::endianness::{big,little,native} (NFC) Note that llvm::support::endianness has been renamed to llvm::endianness while becoming an enum class as opposed to an enum. This patch replaces support::{big,little,native} with llvm::endianness::{big,little,native}. --- bolt/lib/Core/DebugData.cpp | 91 ++++++++++--------- bolt/lib/Rewrite/PseudoProbeRewriter.cpp | 3 +- lld/COFF/DebugTypes.cpp | 4 +- lld/COFF/Driver.cpp | 2 +- lld/COFF/InputFiles.cpp | 4 +- lld/COFF/PDB.cpp | 13 +-- lld/ELF/Arch/Mips.cpp | 6 +- lld/ELF/Arch/RISCV.cpp | 2 +- lld/ELF/InputFiles.cpp | 6 +- lld/MachO/InputFiles.cpp | 2 +- lld/wasm/WriterUtils.cpp | 4 +- .../lldb-server/tests/MessageObjects.cpp | 13 +-- llvm/include/llvm/BinaryFormat/MsgPack.h | 2 +- llvm/include/llvm/Bitstream/BitstreamReader.h | 3 +- llvm/include/llvm/Bitstream/BitstreamWriter.h | 3 +- .../llvm/DebugInfo/MSF/MappedBlockStream.h | 8 +- .../llvm/DebugInfo/PDB/Native/FormatUtil.h | 14 ++- .../llvm/ExecutionEngine/Orc/MachOBuilder.h | 2 +- llvm/include/llvm/MC/MCMachObjectWriter.h | 3 +- llvm/include/llvm/Object/COFF.h | 3 +- llvm/include/llvm/Object/ELFObjectFile.h | 13 ++- llvm/include/llvm/Object/ELFTypes.h | 8 +- llvm/include/llvm/Object/FaultMapParser.h | 2 +- llvm/include/llvm/Object/GOFF.h | 2 +- .../llvm/ProfileData/InstrProfReader.h | 6 +- llvm/lib/DebugInfo/BTF/BTFParser.cpp | 5 +- .../CodeView/LazyRandomTypeCollection.cpp | 4 +- .../CodeView/SimpleTypeSerializer.cpp | 2 +- .../DebugInfo/CodeView/SymbolSerializer.cpp | 4 +- .../DebugInfo/CodeView/TypeIndexDiscovery.cpp | 2 +- llvm/lib/DebugInfo/GSYM/GsymReader.cpp | 19 ++-- .../LogicalView/Readers/LVCodeViewReader.cpp | 13 +-- llvm/lib/DebugInfo/PDB/Native/InputFile.cpp | 2 +- .../JITLink/COFFLinkGraphBuilder.cpp | 3 +- .../ExecutionEngine/JITLink/ELF_aarch32.cpp | 4 +- .../lib/ExecutionEngine/JITLink/ELF_ppc64.cpp | 8 +- .../JITLink/JITLinkMemoryManager.cpp | 4 +- .../JITLink/MachOLinkGraphBuilder.cpp | 3 +- .../Orc/Debugging/DebugInfoSupport.cpp | 5 +- .../Orc/Debugging/DebuggerSupportPlugin.cpp | 4 +- .../RuntimeDyld/RuntimeDyldELF.cpp | 3 +- .../RuntimeDyld/RuntimeDyldImpl.h | 18 ++-- llvm/lib/InterfaceStub/ELFObjHandler.cpp | 2 +- llvm/lib/MC/DXContainerPSVInfo.cpp | 24 ++--- llvm/lib/MC/ELFObjectWriter.cpp | 9 +- llvm/lib/MC/GOFFObjectWriter.cpp | 3 +- llvm/lib/MC/MCAsmBackend.cpp | 8 +- llvm/lib/MC/MCAssembler.cpp | 2 +- llvm/lib/MC/MCCodeView.cpp | 2 +- llvm/lib/MC/MCDXContainerWriter.cpp | 2 +- llvm/lib/MC/MCDwarf.cpp | 5 +- llvm/lib/MC/MCStreamer.cpp | 2 +- llvm/lib/MC/MachObjectWriter.cpp | 2 +- llvm/lib/MC/SPIRVObjectWriter.cpp | 2 +- llvm/lib/MC/WasmObjectWriter.cpp | 4 +- llvm/lib/MC/WinCOFFObjectWriter.cpp | 2 +- llvm/lib/MC/XCOFFObjectWriter.cpp | 2 +- llvm/lib/ObjCopy/ELF/ELFObject.cpp | 9 +- llvm/lib/Object/Archive.cpp | 2 +- llvm/lib/Object/ArchiveWriter.cpp | 5 +- llvm/lib/Object/COFFObjectFile.cpp | 2 +- llvm/lib/Object/WindowsResource.cpp | 2 +- llvm/lib/ObjectYAML/COFFEmitter.cpp | 5 +- .../ObjectYAML/CodeViewYAMLDebugSections.cpp | 2 +- llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp | 4 +- llvm/lib/ObjectYAML/XCOFFEmitter.cpp | 2 +- llvm/lib/ProfileData/InstrProfWriter.cpp | 6 +- llvm/lib/ProfileData/SampleProfWriter.cpp | 9 +- llvm/lib/Remarks/YAMLRemarkParser.cpp | 4 +- llvm/lib/Support/CodeGenCoverage.cpp | 3 +- llvm/lib/Support/ELFAttributeParser.cpp | 2 +- .../MCTargetDesc/AArch64AsmBackend.cpp | 5 +- .../MCTargetDesc/AArch64MCCodeEmitter.cpp | 2 +- .../AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp | 2 +- .../AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp | 4 +- .../Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 44 +++++---- .../ARM/MCTargetDesc/ARMAsmBackendDarwin.h | 3 +- .../ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h | 2 +- .../ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 3 +- .../Target/AVR/MCTargetDesc/AVRAsmBackend.h | 2 +- .../Target/BPF/MCTargetDesc/BPFAsmBackend.cpp | 6 +- .../BPF/MCTargetDesc/BPFMCCodeEmitter.cpp | 4 +- .../CSKY/MCTargetDesc/CSKYAsmBackend.cpp | 2 +- .../Target/CSKY/MCTargetDesc/CSKYAsmBackend.h | 2 +- .../CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp | 5 +- .../MCTargetDesc/DirectXMCTargetDesc.cpp | 3 +- .../MCTargetDesc/HexagonAsmBackend.cpp | 7 +- .../MCTargetDesc/HexagonMCCodeEmitter.cpp | 2 +- .../Lanai/MCTargetDesc/LanaiAsmBackend.cpp | 2 +- .../Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp | 2 +- .../MCTargetDesc/LoongArchAsmBackend.h | 4 +- .../MCTargetDesc/LoongArchMCCodeEmitter.cpp | 4 +- .../M68k/MCTargetDesc/M68kAsmBackend.cpp | 2 +- .../Target/M68k/MCTargetDesc/M68kBaseInfo.h | 4 +- .../M68k/MCTargetDesc/M68kMCCodeEmitter.cpp | 2 +- .../MSP430/MCTargetDesc/MSP430AsmBackend.cpp | 2 +- .../MCTargetDesc/MSP430MCCodeEmitter.cpp | 3 +- .../Mips/MCTargetDesc/MipsAsmBackend.cpp | 6 +- .../Target/Mips/MCTargetDesc/MipsAsmBackend.h | 3 +- .../Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 3 +- .../PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 8 +- .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 3 +- .../RISCV/MCTargetDesc/RISCVAsmBackend.cpp | 8 +- .../RISCV/MCTargetDesc/RISCVAsmBackend.h | 4 +- .../RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp | 16 ++-- .../SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp | 2 +- .../SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp | 6 +- .../Sparc/MCTargetDesc/SparcAsmBackend.cpp | 10 +- .../Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp | 5 +- .../MCTargetDesc/SystemZMCAsmBackend.cpp | 3 +- .../Target/VE/MCTargetDesc/VEAsmBackend.cpp | 8 +- .../VE/MCTargetDesc/VEMCCodeEmitter.cpp | 2 +- .../MCTargetDesc/WebAssemblyAsmBackend.cpp | 2 +- .../MCTargetDesc/WebAssemblyMCCodeEmitter.cpp | 16 ++-- .../X86/Disassembler/X86Disassembler.cpp | 2 +- .../Target/X86/MCTargetDesc/X86AsmBackend.cpp | 2 +- .../Xtensa/MCTargetDesc/XtensaAsmBackend.cpp | 3 +- .../llvm-exegesis/lib/X86/X86Counter.cpp | 3 +- llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp | 5 +- llvm/tools/llvm-jitlink/llvm-jitlink.cpp | 3 +- llvm/tools/llvm-objdump/llvm-objdump.cpp | 7 +- llvm/tools/llvm-rc/ResourceFileWriter.cpp | 2 +- llvm/tools/llvm-rc/ResourceFileWriter.h | 4 +- llvm/tools/llvm-readobj/COFFDumper.cpp | 11 ++- llvm/tools/llvm-readobj/ELFDumper.cpp | 18 ++-- llvm/tools/llvm-readobj/MachODumper.cpp | 4 +- .../llvm-readobj/WindowsResourceDumper.cpp | 2 +- llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp | 4 +- llvm/tools/obj2yaml/coff2yaml.cpp | 2 +- .../CodeView/RandomAccessVisitorTest.cpp | 2 +- .../ExecutionEngine/JITLink/JITLinkMocks.cpp | 3 +- .../JITLink/LinkGraphTests.cpp | 34 +++---- .../JITLink/MemoryManagerErrorTests.cpp | 3 +- .../ExecutionEngine/JITLink/StubsTests.cpp | 12 +-- .../Orc/ObjectLinkingLayerTest.cpp | 18 ++-- llvm/unittests/MC/StringTableBuilderTest.cpp | 4 +- llvm/unittests/ProfileData/InstrProfTest.cpp | 6 +- llvm/unittests/Support/ARMAttributeParser.cpp | 4 +- llvm/unittests/Support/BinaryStreamTest.cpp | 8 +- .../Support/CSKYAttributeParserTest.cpp | 6 +- .../Support/ELFAttributeParserTest.cpp | 2 +- .../Support/RISCVAttributeParserTest.cpp | 2 +- 142 files changed, 463 insertions(+), 391 deletions(-) diff --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp index 22ca8c5acccf1..7a532fbbb5c2e 100644 --- a/bolt/lib/Core/DebugData.cpp +++ b/bolt/lib/Core/DebugData.cpp @@ -124,14 +124,14 @@ writeAddressRanges(raw_svector_ostream &Stream, const DebugAddressRangesVector &AddressRanges, const bool WriteRelativeRanges = false) { for (const DebugAddressRange &Range : AddressRanges) { - support::endian::write(Stream, Range.LowPC, support::little); + support::endian::write(Stream, Range.LowPC, llvm::endianness::little); support::endian::write( Stream, WriteRelativeRanges ? Range.HighPC - Range.LowPC : Range.HighPC, - support::little); + llvm::endianness::little); } // Finish with 0 entries. - support::endian::write(Stream, 0ULL, support::little); - support::endian::write(Stream, 0ULL, support::little); + support::endian::write(Stream, 0ULL, llvm::endianness::little); + support::endian::write(Stream, 0ULL, llvm::endianness::little); return AddressRanges.size() * 16 + 16; } @@ -209,13 +209,15 @@ getDWARF5Header(const LocListsRangelistsHeader &Header) { getDWARF5RngListLocListHeaderSize() - sizeof(UnitLengthType); support::endian::write(*HeaderStream, Header.UnitLength + HeaderSize, - support::little); - support::endian::write(*HeaderStream, Header.Version, support::little); - support::endian::write(*HeaderStream, Header.AddressSize, support::little); + llvm::endianness::little); + support::endian::write(*HeaderStream, Header.Version, + llvm::endianness::little); + support::endian::write(*HeaderStream, Header.AddressSize, + llvm::endianness::little); support::endian::write(*HeaderStream, Header.SegmentSelector, - support::little); + llvm::endianness::little); support::endian::write(*HeaderStream, Header.OffsetEntryCount, - support::little); + llvm::endianness::little); return HeaderBuffer; } @@ -254,17 +256,18 @@ static bool emitWithBase(raw_ostream &OS, const DebugVector &Entries, } support::endian::write(OS, static_cast(BaseAddressx), - support::little); + llvm::endianness::little); uint32_t BaseIndex = AddrWriter.getIndexFromAddress(Base, CU); encodeULEB128(BaseIndex, OS); for (auto &OffsetEntry : Offsets) { support::endian::write(OS, static_cast(OffsetPair), - support::little); + llvm::endianness::little); encodeULEB128(OffsetEntry.StartOffset, OS); encodeULEB128(OffsetEntry.EndOffset, OS); Func(OffsetEntry.Index); } - support::endian::write(OS, static_cast(EndOfList), support::little); + support::endian::write(OS, static_cast(EndOfList), + llvm::endianness::little); return true; } @@ -291,7 +294,7 @@ DebugRangeListsSectionWriter::addRanges(DebugAddressRangesVector &Ranges) { const DebugAddressRange &Range = Ranges[I]; support::endian::write(*CUBodyStream, static_cast(dwarf::DW_RLE_startx_length), - support::little); + llvm::endianness::little); uint32_t Index = AddrWriter->getIndexFromAddress(Range.LowPC, *CU); encodeULEB128(Index, *CUBodyStream); encodeULEB128(Range.HighPC - Range.LowPC, *CUBodyStream); @@ -301,7 +304,7 @@ DebugRangeListsSectionWriter::addRanges(DebugAddressRangesVector &Ranges) { if (WrittenStartxLength) support::endian::write(*CUBodyStream, static_cast(dwarf::DW_RLE_end_of_list), - support::little); + llvm::endianness::little); CurrentOffset = CUBodyBuffer->size(); return RangeEntries.size() - 1; } @@ -315,7 +318,7 @@ void DebugRangeListsSectionWriter::finalizeSection() { const uint32_t SizeOfArraySection = RangeEntries.size() * SizeOfArrayEntry; for (uint32_t Offset : RangeEntries) support::endian::write(*CUArrayStream, Offset + SizeOfArraySection, - support::little); + llvm::endianness::little); std::unique_ptr Header = getDWARF5Header( {static_cast(SizeOfArraySection + CUBodyBuffer.get()->size()), @@ -359,17 +362,17 @@ void DebugARangesSectionWriter::writeARangesSection( uint32_t Size = 8 + 4 + 2 * sizeof(uint64_t) * (AddressRanges.size() + 1); // Header field #1: set size. - support::endian::write(RangesStream, Size, support::little); + support::endian::write(RangesStream, Size, llvm::endianness::little); // Header field #2: version number, 2 as per the specification. support::endian::write(RangesStream, static_cast(2), - support::little); + llvm::endianness::little); assert(CUMap.count(Offset) && "Original CU offset is not found in CU Map"); // Header field #3: debug info offset of the correspondent compile unit. support::endian::write( RangesStream, static_cast(CUMap.find(Offset)->second.Offset), - support::little); + llvm::endianness::little); // Header field #4: address size. // 8 since we only write ELF64 binaries for now. @@ -380,7 +383,7 @@ void DebugARangesSectionWriter::writeARangesSection( // Padding before address table - 4 bytes in the 64-bit-pointer case. support::endian::write(RangesStream, static_cast(0), - support::little); + llvm::endianness::little); writeAddressRanges(RangesStream, AddressRanges, true); } @@ -473,10 +476,10 @@ void DebugAddrWriter::update(DIEBuilder &DIEBlder, DWARFUnit &CU) { break; case 4: support::endian::write(*AddressStream, static_cast(Address), - support::little); + llvm::endianness::little); break; case 8: - support::endian::write(*AddressStream, Address, support::little); + support::endian::write(*AddressStream, Address, llvm::endianness::little); break; } }; @@ -492,11 +495,12 @@ void DebugAddrWriter::update(DIEBuilder &DIEBlder, DWARFUnit &CU) { void DebugAddrWriterDwarf5::update(DIEBuilder &DIEBlder, DWARFUnit &CU) { // Need to layout all sections within .debug_addr // Within each section sort Address by index. - const endianness Endian = - BC->DwCtx->isLittleEndian() ? support::little : support::big; + const endianness Endian = BC->DwCtx->isLittleEndian() + ? llvm::endianness::little + : llvm::endianness::big; const DWARFSection &AddrSec = BC->DwCtx->getDWARFObj().getAddrSection(); DWARFDataExtractor AddrData(BC->DwCtx->getDWARFObj(), AddrSec, - Endian == support::little, 0); + Endian == llvm::endianness::little, 0); DWARFDebugAddrTable AddrTable; DIDumpOptions DumpOpts; constexpr uint32_t HeaderSize = 8; @@ -594,11 +598,11 @@ void DebugLocWriter::addList(DIEBuilder &DIEBldr, DIE &Die, DIEValue &AttrInfo, for (const DebugLocationEntry &Entry : LocList) { support::endian::write(*LocStream, static_cast(Entry.LowPC), - support::little); + llvm::endianness::little); support::endian::write(*LocStream, static_cast(Entry.HighPC), - support::little); + llvm::endianness::little); support::endian::write(*LocStream, static_cast(Entry.Expr.size()), - support::little); + llvm::endianness::little); *LocStream << StringRef(reinterpret_cast(Entry.Expr.data()), Entry.Expr.size()); LocSectionOffset += 2 * 8 + 2 + Entry.Expr.size(); @@ -618,15 +622,17 @@ std::unique_ptr DebugLocWriter::getBuffer() { void DebugLocWriter::finalize(DIEBuilder &DIEBldr, DIE &Die) {} static void writeEmptyListDwarf5(raw_svector_ostream &Stream) { - support::endian::write(Stream, static_cast(4), support::little); + support::endian::write(Stream, static_cast(4), + llvm::endianness::little); support::endian::write(Stream, static_cast(dwarf::DW_LLE_start_end), - support::little); + llvm::endianness::little); const char Zeroes[16] = {0}; Stream << StringRef(Zeroes, 16); encodeULEB128(0, Stream); - support::endian::write( - Stream, static_cast(dwarf::DW_LLE_end_of_list), support::little); + support::endian::write(Stream, + static_cast(dwarf::DW_LLE_end_of_list), + llvm::endianness::little); } static void writeLegacyLocList(DIEValue &AttrInfo, @@ -645,21 +651,21 @@ static void writeLegacyLocList(DIEValue &AttrInfo, for (const DebugLocationEntry &Entry : LocList) { support::endian::write(LocStream, static_cast(dwarf::DW_LLE_startx_length), - support::little); + llvm::endianness::little); const uint32_t Index = AddrWriter.getIndexFromAddress(Entry.LowPC, CU); encodeULEB128(Index, LocStream); support::endian::write(LocStream, static_cast(Entry.HighPC - Entry.LowPC), - support::little); + llvm::endianness::little); support::endian::write(LocStream, static_cast(Entry.Expr.size()), - support::little); + llvm::endianness::little); LocStream << StringRef(reinterpret_cast(Entry.Expr.data()), Entry.Expr.size()); } support::endian::write(LocStream, static_cast(dwarf::DW_LLE_end_of_list), - support::little); + llvm::endianness::little); replaceLocValbyForm(DIEBldr, Die, AttrInfo, AttrInfo.getForm(), EntryOffset); } @@ -701,7 +707,7 @@ static void writeDWARF5LocList(uint32_t &NumberOfEntries, DIEValue &AttrInfo, const DebugLocationEntry &Entry = LocList[I]; support::endian::write(LocBodyStream, static_cast(dwarf::DW_LLE_startx_length), - support::little); + llvm::endianness::little); const uint32_t Index = AddrWriter.getIndexFromAddress(Entry.LowPC, CU); encodeULEB128(Index, LocBodyStream); encodeULEB128(Entry.HighPC - Entry.LowPC, LocBodyStream); @@ -713,7 +719,7 @@ static void writeDWARF5LocList(uint32_t &NumberOfEntries, DIEValue &AttrInfo, if (WrittenStartxLength) support::endian::write(LocBodyStream, static_cast(dwarf::DW_LLE_end_of_list), - support::little); + llvm::endianness::little); } void DebugLoclistWriter::addList(DIEBuilder &DIEBldr, DIE &Die, @@ -753,7 +759,7 @@ void DebugLoclistWriter::finalizeDWARF5(DIEBuilder &DIEBldr, DIE &Die) { support::endian::write( *LocArrayStream, static_cast(SizeOfArraySection + RelativeOffset), - support::little); + llvm::endianness::little); std::unique_ptr Header = getDWARF5Header( {static_cast(SizeOfArraySection + LocBodyBuffer.get()->size()), @@ -884,11 +890,11 @@ void DebugStrOffsetsWriter::finalizeSection(DWARFUnit &Unit, if (RetVal == ProcessedBaseOffsets.end() || StrOffsetSectionWasModified) { // Writing out the header for each section. support::endian::write(*StrOffsetsStream, CurrentSectionSize + 4, - support::little); + llvm::endianness::little); support::endian::write(*StrOffsetsStream, static_cast(5), - support::little); + llvm::endianness::little); support::endian::write(*StrOffsetsStream, static_cast(0), - support::little); + llvm::endianness::little); uint64_t BaseOffset = StrOffsetsBuffer->size(); ProcessedBaseOffsets[*Val] = BaseOffset; @@ -897,7 +903,8 @@ void DebugStrOffsetsWriter::finalizeSection(DWARFUnit &Unit, StrListBaseAttrInfo.getForm(), DIEInteger(BaseOffset)); for (const auto &Entry : IndexToAddressMap) - support::endian::write(*StrOffsetsStream, Entry.second, support::little); + support::endian::write(*StrOffsetsStream, Entry.second, + llvm::endianness::little); } else { DIEBldr.replaceValue(&Die, dwarf::DW_AT_str_offsets_base, StrListBaseAttrInfo.getForm(), diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp index 316b83cfbd38a..51038dbead330 100644 --- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp +++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp @@ -248,7 +248,8 @@ void PseudoProbeRewriter::encodePseudoProbes() { auto EmitInt = [&](uint64_t Value, uint32_t Size) { const bool IsLittleEndian = BC.AsmInfo->isLittleEndian(); uint64_t Swapped = support::endian::byte_swap( - Value, IsLittleEndian ? support::little : support::big); + Value, + IsLittleEndian ? llvm::endianness::little : llvm::endianness::big); unsigned Index = IsLittleEndian ? 0 : 8 - Size; auto Entry = StringRef(reinterpret_cast(&Swapped) + Index, Size); Contents.append(Entry.begin(), Entry.end()); diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp index 5071b7b79d23e..a4c808e4c9a04 100644 --- a/lld/COFF/DebugTypes.cpp +++ b/lld/COFF/DebugTypes.cpp @@ -311,7 +311,7 @@ Error TpiSource::mergeDebugT(TypeMerger *m) { "use remapTpiWithGHashes when ghash is enabled"); CVTypeArray types; - BinaryStreamReader reader(file->debugTypes, support::little); + BinaryStreamReader reader(file->debugTypes, llvm::endianness::little); cantFail(reader.readArray(types, reader.getLength())); // When dealing with PCH.OBJ, some indices were already merged. @@ -588,7 +588,7 @@ void TpiSource::loadGHashes() { ownedGHashes = false; } else { CVTypeArray types; - BinaryStreamReader reader(file->debugTypes, support::little); + BinaryStreamReader reader(file->debugTypes, llvm::endianness::little); cantFail(reader.readArray(types, reader.getLength())); assignGHashesFromVector(GloballyHashedType::hashTypes(types)); } diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 278f5e71b14f5..0fbfefdf43cf1 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -1210,7 +1210,7 @@ static void readCallGraphsFromObjectFiles(COFFLinkerContext &ctx) { ArrayRef contents; cantFail( obj->getCOFFObj()->getSectionContents(obj->callgraphSec, contents)); - BinaryStreamReader reader(contents, support::little); + BinaryStreamReader reader(contents, llvm::endianness::little); while (!reader.empty()) { uint32_t fromIndex, toIndex; uint64_t count; diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index a7a08fb2fa6ea..b66ef418b3039 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -709,7 +709,7 @@ void ObjFile::initializeFlags() { DebugSubsectionArray subsections; - BinaryStreamReader reader(data, support::little); + BinaryStreamReader reader(data, llvm::endianness::little); ExitOnError exitOnErr; exitOnErr(reader.readArray(subsections, data.size())); @@ -775,7 +775,7 @@ void ObjFile::initializeDependencies() { // Get the first type record. It will indicate if this object uses a type // server (/Zi) or a PCH file (/Yu). CVTypeArray types; - BinaryStreamReader reader(data, support::little); + BinaryStreamReader reader(data, llvm::endianness::little); cantFail(reader.readArray(types, reader.getLength())); CVTypeArray::Iterator firstType = types.begin(); if (firstType == types.end()) diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index 0c4e0a80cf9a1..f77ff0d4eab80 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -656,7 +656,7 @@ Error PDBLinker::writeAllModuleSymbolRecords(ObjFile *file, auto contents = SectionChunk::consumeDebugMagic(sectionContents, ".debug$S"); DebugSubsectionArray subsections; - BinaryStreamReader reader(contents, support::little); + BinaryStreamReader reader(contents, llvm::endianness::little); exitOnErr(reader.readArray(subsections, contents.size())); uint32_t nextRelocIndex = 0; @@ -758,7 +758,7 @@ void DebugSHandler::handleDebugS(SectionChunk *debugChunk) { ArrayRef contents = debugChunk->getContents(); contents = SectionChunk::consumeDebugMagic(contents, ".debug$S"); DebugSubsectionArray subsections; - BinaryStreamReader reader(contents, support::little); + BinaryStreamReader reader(contents, llvm::endianness::little); ExitOnError exitOnErr; exitOnErr(reader.readArray(subsections, contents.size())); debugChunk->sortRelocations(); @@ -868,7 +868,7 @@ Error UnrelocatedDebugSubsection::commit(BinaryStreamWriter &writer) const { debugChunk->file->debugTypesObj) { TpiSource *source = debugChunk->file->debugTypesObj; DebugInlineeLinesSubsectionRef inlineeLines; - BinaryStreamReader storageReader(relocatedBytes, support::little); + BinaryStreamReader storageReader(relocatedBytes, llvm::endianness::little); ExitOnError exitOnErr; exitOnErr(inlineeLines.initialize(storageReader)); for (const InlineeSourceLine &line : inlineeLines) { @@ -962,7 +962,7 @@ void DebugSHandler::finish() { // Copy each frame data record, add in rvaStart, translate string table // indices, and add the record to the PDB. DebugFrameDataSubsectionRef fds; - BinaryStreamReader reader(subsecData, support::little); + BinaryStreamReader reader(subsecData, llvm::endianness::little); exitOnErr(fds.initialize(reader)); for (codeview::FrameData fd : fds) { fd.RvaStart += rvaStart; @@ -1050,7 +1050,8 @@ void PDBLinker::addDebugSymbols(TpiSource *source) { ArrayRef relocatedDebugContents = relocateDebugChunk(*debugChunk); FixedStreamArray fpoRecords; - BinaryStreamReader reader(relocatedDebugContents, support::little); + BinaryStreamReader reader(relocatedDebugContents, + llvm::endianness::little); uint32_t count = relocatedDebugContents.size() / sizeof(object::FpoData); exitOnErr(reader.readArray(fpoRecords, count)); @@ -1772,7 +1773,7 @@ static bool findLineTable(const SectionChunk *c, uint32_t addr, ArrayRef contents = SectionChunk::consumeDebugMagic(dbgC->getContents(), ".debug$S"); DebugSubsectionArray subsections; - BinaryStreamReader reader(contents, support::little); + BinaryStreamReader reader(contents, llvm::endianness::little); exitOnErr(reader.readArray(subsections, contents.size())); for (const DebugSubsectionRecord &ss : subsections) { diff --git a/lld/ELF/Arch/Mips.cpp b/lld/ELF/Arch/Mips.cpp index d5a335c659322..d6c70aeba95dd 100644 --- a/lld/ELF/Arch/Mips.cpp +++ b/lld/ELF/Arch/Mips.cpp @@ -219,7 +219,7 @@ template static uint32_t readShuffle(const uint8_t *loc) { // words in a big-endian order. That is why we have to swap these // words to get a correct value. uint32_t v = read32(loc); - if (E == support::little) + if (E == llvm::endianness::little) return (v << 16) | (v >> 16); return v; } @@ -237,12 +237,12 @@ static void writeShuffleValue(uint8_t *loc, uint64_t v, uint8_t bitsSize, uint8_t shift) { // See comments in readShuffle for purpose of this code. uint16_t *words = (uint16_t *)loc; - if (E == support::little) + if (E == llvm::endianness::little) std::swap(words[0], words[1]); writeValue(loc, v, bitsSize, shift); - if (E == support::little) + if (E == llvm::endianness::little) std::swap(words[0], words[1]); } diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index d0d75118e30dd..6413dcd7dcd79 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -933,7 +933,7 @@ mergeAttributesSection(const SmallVector §ions) { const auto &attributesTags = RISCVAttrs::getRISCVAttributeTags(); for (const InputSectionBase *sec : sections) { RISCVAttributeParser parser; - if (Error e = parser.parse(sec->content(), support::little)) + if (Error e = parser.parse(sec->content(), llvm::endianness::little)) warn(toString(sec) + ": " + llvm::toString(std::move(e))); for (const auto &tag : attributesTags) { switch (RISCVAttrs::AttrType(tag.attr)) { diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 3413586f6b854..a0d4be8ff9885 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -604,9 +604,9 @@ template void ObjFile::parse(bool ignoreComdats) { check(this->getObj().getSectionContents(sec)); StringRef name = check(obj.getSectionName(sec, shstrtab)); this->sections[i] = &InputSection::discarded; - if (Error e = - attributes.parse(contents, ekind == ELF32LEKind ? support::little - : support::big)) { + if (Error e = attributes.parse(contents, ekind == ELF32LEKind + ? llvm::endianness::little + : llvm::endianness::big)) { InputSection isec(*this, sec, name); warn(toString(&isec) + ": " + llvm::toString(std::move(e))); } else { diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index 8f737beee768b..09c6ea9b19b5d 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -320,7 +320,7 @@ static std::optional getRecordSize(StringRef segname, StringRef name) { static Error parseCallGraph(ArrayRef data, std::vector &callGraph) { TimeTraceScope timeScope("Parsing call graph section"); - BinaryStreamReader reader(data, support::little); + BinaryStreamReader reader(data, llvm::endianness::little); while (!reader.empty()) { uint32_t fromIndex, toIndex; uint64_t count; diff --git a/lld/wasm/WriterUtils.cpp b/lld/wasm/WriterUtils.cpp index ead22291b5ebd..cc8ed0b1de237 100644 --- a/lld/wasm/WriterUtils.cpp +++ b/lld/wasm/WriterUtils.cpp @@ -111,12 +111,12 @@ void writeU8(raw_ostream &os, uint8_t byte, const Twine &msg) { void writeU32(raw_ostream &os, uint32_t number, const Twine &msg) { debugWrite(os.tell(), msg + "[0x" + utohexstr(number) + "]"); - support::endian::write(os, number, support::little); + support::endian::write(os, number, llvm::endianness::little); } void writeU64(raw_ostream &os, uint64_t number, const Twine &msg) { debugWrite(os.tell(), msg + "[0x" + utohexstr(number) + "]"); - support::endian::write(os, number, support::little); + support::endian::write(os, number, llvm::endianness::little); } void writeValueType(raw_ostream &os, ValType type, const Twine &msg) { diff --git a/lldb/unittests/tools/lldb-server/tests/MessageObjects.cpp b/lldb/unittests/tools/lldb-server/tests/MessageObjects.cpp index 7ccc9210daad0..da4dc10d4b87c 100644 --- a/lldb/unittests/tools/lldb-server/tests/MessageObjects.cpp +++ b/lldb/unittests/tools/lldb-server/tests/MessageObjects.cpp @@ -42,9 +42,9 @@ Expected ProcessInfo::create(StringRef response) { process_info.m_triple = fromHex(elements["triple"]); StringRef endian_str = elements["endian"]; if (endian_str == "little") - process_info.m_endian = support::little; + process_info.m_endian = llvm::endianness::little; else if (endian_str == "big") - process_info.m_endian = support::big; + process_info.m_endian = llvm::endianness::big; else return make_parsing_error("ProcessInfo: endian"); @@ -84,7 +84,7 @@ JThreadsInfo::parseRegisters(const StructuredData::Dictionary &Dict, return make_parsing_error("JThreadsInfo: register key[{0}]", i); auto RegValOr = - parseRegisterValue(RegInfos[Register], ValueStr, support::big); + parseRegisterValue(RegInfos[Register], ValueStr, llvm::endianness::big); if (!RegValOr) return RegValOr.takeError(); Result[Register] = std::move(*RegValOr); @@ -214,9 +214,10 @@ Expected parseRegisterValue(const RegisterInfo &Info, StringExtractor(HexValue).GetHexBytes(Bytes, '\xcc'); RegisterValue Value; Status ST; - Value.SetFromMemoryData( - Info, Bytes.data(), Bytes.size(), - Endian == support::little ? eByteOrderLittle : eByteOrderBig, ST); + Value.SetFromMemoryData(Info, Bytes.data(), Bytes.size(), + Endian == llvm::endianness::little ? eByteOrderLittle + : eByteOrderBig, + ST); if (ST.Fail()) return ST.ToError(); return Value; diff --git a/llvm/include/llvm/BinaryFormat/MsgPack.h b/llvm/include/llvm/BinaryFormat/MsgPack.h index 7fe6442e33737..01edae0ee0ebd 100644 --- a/llvm/include/llvm/BinaryFormat/MsgPack.h +++ b/llvm/include/llvm/BinaryFormat/MsgPack.h @@ -21,7 +21,7 @@ namespace llvm { namespace msgpack { /// The endianness of all multi-byte encoded values in MessagePack. -constexpr llvm::endianness Endianness = support::big; +constexpr llvm::endianness Endianness = llvm::endianness::big; /// The first byte identifiers of MessagePack object formats. namespace FirstByte { diff --git a/llvm/include/llvm/Bitstream/BitstreamReader.h b/llvm/include/llvm/Bitstream/BitstreamReader.h index 978ab7c2422b4..dbc98d1ad7258 100644 --- a/llvm/include/llvm/Bitstream/BitstreamReader.h +++ b/llvm/include/llvm/Bitstream/BitstreamReader.h @@ -168,7 +168,8 @@ class SimpleBitstreamCursor { unsigned BytesRead; if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) { BytesRead = sizeof(word_t); - CurWord = support::endian::read(NextCharPtr); + CurWord = + support::endian::read(NextCharPtr); } else { // Short read. BytesRead = BitcodeBytes.size() - NextChar; diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h index 423af4c2cc6c0..8a59d0444e367 100644 --- a/llvm/include/llvm/Bitstream/BitstreamWriter.h +++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h @@ -75,7 +75,8 @@ class BitstreamWriter { std::vector BlockInfoRecords; void WriteWord(unsigned Value) { - Value = support::endian::byte_swap(Value); + Value = + support::endian::byte_swap(Value); Out.append(reinterpret_cast(&Value), reinterpret_cast(&Value + 1)); } diff --git a/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h b/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h index 39593b759bb5e..04a64d28e0ccd 100644 --- a/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h +++ b/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h @@ -54,7 +54,9 @@ class MappedBlockStream : public BinaryStream { createDirectoryStream(const MSFLayout &Layout, BinaryStreamRef MsfData, BumpPtrAllocator &Allocator); - llvm::endianness getEndian() const override { return support::little; } + llvm::endianness getEndian() const override { + return llvm::endianness::little; + } Error readBytes(uint64_t Offset, uint64_t Size, ArrayRef &Buffer) override; @@ -119,7 +121,9 @@ class WritableMappedBlockStream : public WritableBinaryStream { createFpmStream(const MSFLayout &Layout, WritableBinaryStreamRef MsfData, BumpPtrAllocator &Allocator, bool AltFpm = false); - llvm::endianness getEndian() const override { return support::little; } + llvm::endianness getEndian() const override { + return llvm::endianness::little; + } Error readBytes(uint64_t Offset, uint64_t Size, ArrayRef &Buffer) override; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h b/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h index ed745eaf97274..01de8b49dd78f 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h @@ -107,10 +107,9 @@ namespace detail { template struct EndianAdapter final : public FormatAdapter> { - using EndianType = - support::detail::packed_endian_specific_integral; + T, llvm::endianness::little, support::unaligned>> { + using EndianType = support::detail::packed_endian_specific_integral< + T, llvm::endianness::little, support::unaligned>; explicit EndianAdapter(EndianType &&Item) : FormatAdapter(std::move(Item)) {} @@ -122,10 +121,9 @@ struct EndianAdapter final } // namespace detail template -detail::EndianAdapter -fmtle(support::detail::packed_endian_specific_integral - Value) { +detail::EndianAdapter fmtle(support::detail::packed_endian_specific_integral< + T, llvm::endianness::little, support::unaligned> + Value) { return detail::EndianAdapter(std::move(Value)); } } // namespace pdb diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOBuilder.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOBuilder.h index fba688309ec19..2bc66b11e2704 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/MachOBuilder.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOBuilder.h @@ -507,7 +507,7 @@ struct MachO64LE { using NList = MachO::nlist_64; using Relocation = MachO::relocation_info; - static constexpr llvm::endianness Endianness = support::little; + static constexpr llvm::endianness Endianness = llvm::endianness::little; static constexpr uint32_t Magic = MachO::MH_MAGIC_64; static constexpr MachO::LoadCommandType SegmentCmd = MachO::LC_SEGMENT_64; static constexpr MachO::LoadCommandType SymTabCmd = MachO::LC_SYMTAB; diff --git a/llvm/include/llvm/MC/MCMachObjectWriter.h b/llvm/include/llvm/MC/MCMachObjectWriter.h index 05d816671b1a4..1683543082e28 100644 --- a/llvm/include/llvm/MC/MCMachObjectWriter.h +++ b/llvm/include/llvm/MC/MCMachObjectWriter.h @@ -131,7 +131,8 @@ class MachObjectWriter : public MCObjectWriter { : TargetObjectWriter(std::move(MOTW)), StringTable(TargetObjectWriter->is64Bit() ? StringTableBuilder::MachO64 : StringTableBuilder::MachO), - W(OS, IsLittleEndian ? support::little : support::big) {} + W(OS, + IsLittleEndian ? llvm::endianness::little : llvm::endianness::big) {} support::endian::Writer W; diff --git a/llvm/include/llvm/Object/COFF.h b/llvm/include/llvm/Object/COFF.h index 24ed885d7b656..a548b2c15c5fd 100644 --- a/llvm/include/llvm/Object/COFF.h +++ b/llvm/include/llvm/Object/COFF.h @@ -1298,7 +1298,8 @@ class BaseRelocRef { class ResourceSectionRef { public: ResourceSectionRef() = default; - explicit ResourceSectionRef(StringRef Ref) : BBS(Ref, support::little) {} + explicit ResourceSectionRef(StringRef Ref) + : BBS(Ref, llvm::endianness::little) {} Error load(const COFFObjectFile *O); Error load(const COFFObjectFile *O, const SectionRef &S); diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index dc3d6bb58710c..d7947d85739eb 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -458,8 +458,9 @@ template class ELFObjectFile : public ELFObjectFileBase { bool isDyldType() const { return isDyldELFObject; } static bool classof(const Binary *v) { - return v->getType() == getELFType(ELFT::TargetEndianness == support::little, - ELFT::Is64Bits); + return v->getType() == + getELFType(ELFT::TargetEndianness == llvm::endianness::little, + ELFT::Is64Bits); } elf_symbol_iterator_range getDynamicSymbolIterators() const override; @@ -1128,7 +1129,8 @@ ELFObjectFile::ELFObjectFile(MemoryBufferRef Object, ELFFile EF, const Elf_Shdr *DotSymtabSec, const Elf_Shdr *DotSymtabShndx) : ELFObjectFileBase( - getELFType(ELFT::TargetEndianness == support::little, ELFT::Is64Bits), + getELFType(ELFT::TargetEndianness == llvm::endianness::little, + ELFT::Is64Bits), Object), EF(EF), DotDynSymSec(DotDynSymSec), DotSymtabSec(DotSymtabSec), DotSymtabShndxSec(DotSymtabShndx) {} @@ -1197,7 +1199,8 @@ uint8_t ELFObjectFile::getBytesInAddress() const { template StringRef ELFObjectFile::getFileFormatName() const { - constexpr bool IsLittleEndian = ELFT::TargetEndianness == support::little; + constexpr bool IsLittleEndian = + ELFT::TargetEndianness == llvm::endianness::little; switch (EF.getHeader().e_ident[ELF::EI_CLASS]) { case ELF::ELFCLASS32: switch (EF.getHeader().e_machine) { @@ -1275,7 +1278,7 @@ StringRef ELFObjectFile::getFileFormatName() const { } template Triple::ArchType ELFObjectFile::getArch() const { - bool IsLittleEndian = ELFT::TargetEndianness == support::little; + bool IsLittleEndian = ELFT::TargetEndianness == llvm::endianness::little; switch (EF.getHeader().e_machine) { case ELF::EM_68K: return Triple::m68k; diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h index f7a1d02e534c5..45fc52288bdd4 100644 --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -90,10 +90,10 @@ template struct ELFType { using Off = packed; }; -using ELF32LE = ELFType; -using ELF32BE = ELFType; -using ELF64LE = ELFType; -using ELF64BE = ELFType; +using ELF32LE = ELFType; +using ELF32BE = ELFType; +using ELF64LE = ELFType; +using ELF64BE = ELFType; // Use an alignment of 2 for the typedefs since that is the worst case for // ELF files in archives. diff --git a/llvm/include/llvm/Object/FaultMapParser.h b/llvm/include/llvm/Object/FaultMapParser.h index bed2dba154f3c..028d3900d9452 100644 --- a/llvm/include/llvm/Object/FaultMapParser.h +++ b/llvm/include/llvm/Object/FaultMapParser.h @@ -42,7 +42,7 @@ class FaultMapParser { template static T read(const uint8_t *P, const uint8_t *E) { assert(P + sizeof(T) <= E && "out of bounds read!"); - return support::endian::read(P); + return support::endian::read(P); } public: diff --git a/llvm/include/llvm/Object/GOFF.h b/llvm/include/llvm/Object/GOFF.h index 31f2f82fffd6a..91762457ae056 100644 --- a/llvm/include/llvm/Object/GOFF.h +++ b/llvm/include/llvm/Object/GOFF.h @@ -69,7 +69,7 @@ class Record { static void get(const uint8_t *Bytes, uint8_t ByteIndex, T &Value) { assert(ByteIndex + sizeof(T) <= GOFF::RecordLength && "Byte index out of bounds!"); - Value = support::endian::read(&Bytes[ByteIndex]); + Value = support::endian::read(&Bytes[ByteIndex]); } }; diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index 5b71c984b5d5c..172b4c9f61875 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -416,9 +416,9 @@ class RawInstrProfReader : public InstrProfReader { if (!ShouldSwapBytes) return llvm::endianness::native; if (llvm::endianness::native == llvm::endianness::little) - return support::big; + return llvm::endianness::big; else - return support::little; + return llvm::endianness::little; } inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) { @@ -477,7 +477,7 @@ class InstrProfLookupTrait { // Endianness of the input value profile data. // It should be LE by default, but can be changed // for testing purpose. - llvm::endianness ValueProfDataEndianness = support::little; + llvm::endianness ValueProfDataEndianness = llvm::endianness::little; public: InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion) diff --git a/llvm/lib/DebugInfo/BTF/BTFParser.cpp b/llvm/lib/DebugInfo/BTF/BTFParser.cpp index d1ed5d097e146..4bc0c94340b4e 100644 --- a/llvm/lib/DebugInfo/BTF/BTFParser.cpp +++ b/llvm/lib/DebugInfo/BTF/BTFParser.cpp @@ -203,13 +203,12 @@ const BTF::CommonType VoidTypeInst = {0, BTF::BTF_KIND_UNKN << 24, {0}}; // `BTFParser::Types` vector and the process stops. Error BTFParser::parseTypesInfo(ParseContext &Ctx, uint64_t TypesInfoStart, StringRef RawData) { - using support::big; - using support::little; using support::endian::byte_swap; TypesBuffer = OwningArrayRef(arrayRefFromStringRef(RawData)); // Switch endianness if necessary. - endianness Endianness = Ctx.Obj.isLittleEndian() ? little : big; + endianness Endianness = Ctx.Obj.isLittleEndian() ? llvm::endianness::little + : llvm::endianness::big; uint32_t *TypesBuffer32 = (uint32_t *)TypesBuffer.data(); for (uint64_t I = 0; I < TypesBuffer.size() / 4; ++I) TypesBuffer32[I] = byte_swap(TypesBuffer32[I], Endianness); diff --git a/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp index 460f95d96a29e..2343386e031c5 100644 --- a/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp +++ b/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp @@ -69,13 +69,13 @@ void LazyRandomTypeCollection::reset(BinaryStreamReader &Reader, } void LazyRandomTypeCollection::reset(StringRef Data, uint32_t RecordCountHint) { - BinaryStreamReader Reader(Data, support::little); + BinaryStreamReader Reader(Data, llvm::endianness::little); reset(Reader, RecordCountHint); } void LazyRandomTypeCollection::reset(ArrayRef Data, uint32_t RecordCountHint) { - BinaryStreamReader Reader(Data, support::little); + BinaryStreamReader Reader(Data, llvm::endianness::little); reset(Reader, RecordCountHint); } diff --git a/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp b/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp index cf0c877fdbf8b..25725853fb397 100644 --- a/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp +++ b/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp @@ -34,7 +34,7 @@ SimpleTypeSerializer::~SimpleTypeSerializer() = default; template ArrayRef SimpleTypeSerializer::serialize(T &Record) { - BinaryStreamWriter Writer(ScratchBuffer, support::little); + BinaryStreamWriter Writer(ScratchBuffer, llvm::endianness::little); TypeRecordMapping Mapping(Writer); // Write the record prefix first with a dummy length but real kind. diff --git a/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp b/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp index 5fb8d497b9573..e52f3e56f1155 100644 --- a/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp +++ b/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp @@ -20,8 +20,8 @@ using namespace llvm::codeview; SymbolSerializer::SymbolSerializer(BumpPtrAllocator &Allocator, CodeViewContainer Container) - : Storage(Allocator), Stream(RecordBuffer, support::little), Writer(Stream), - Mapping(Writer, Container) {} + : Storage(Allocator), Stream(RecordBuffer, llvm::endianness::little), + Writer(Stream), Mapping(Writer, Container) {} Error SymbolSerializer::visitSymbolBegin(CVSymbol &Record) { assert(!CurrentSymbol && "Already in a symbol mapping!"); diff --git a/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp index e903a37a8c8e0..59e2a85c4d4c0 100644 --- a/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp +++ b/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp @@ -470,7 +470,7 @@ static void resolveTypeIndexReferences(ArrayRef RecordData, RecordData = RecordData.drop_front(sizeof(RecordPrefix)); - BinaryStreamReader Reader(RecordData, support::little); + BinaryStreamReader Reader(RecordData, llvm::endianness::little); for (const auto &Ref : Refs) { Reader.setOffset(Ref.Offset); FixedStreamArray Run; diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp index 7e6eec71d1ad2..1fe90ef579a3d 100644 --- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp @@ -74,7 +74,8 @@ GsymReader::parse() { break; case GSYM_CIGAM: // This is a GSYM file, but not native endianness. - Endian = sys::IsBigEndianHost ? support::little : support::big; + Endian = sys::IsBigEndianHost ? llvm::endianness::little + : llvm::endianness::big; Swap.reset(new SwappedData); break; default: @@ -82,7 +83,7 @@ GsymReader::parse() { "not a GSYM file"); } - bool DataIsLittleEndian = HostByteOrder != support::little; + bool DataIsLittleEndian = HostByteOrder != llvm::endianness::little; // Read a correctly byte swapped header if we need to. if (Swap) { DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4); @@ -259,10 +260,11 @@ llvm::Expected GsymReader::getFunctionInfo(uint64_t Addr) const { // Address info offsets size should have been checked in parse(). assert(*AddressIndex < AddrInfoOffsets.size()); auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex]; - assert((Endian == support::big || Endian == support::little) && - "Endian must be either big or little"); + assert( + (Endian == llvm::endianness::big || Endian == llvm::endianness::little) && + "Endian must be either big or little"); DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), - Endian == support::little, 4); + Endian == llvm::endianness::little, 4); if (std::optional OptAddr = getAddress(*AddressIndex)) { auto ExpectedFI = FunctionInfo::decode(Data, *OptAddr); if (ExpectedFI) { @@ -284,10 +286,11 @@ llvm::Expected GsymReader::lookup(uint64_t Addr) const { // Address info offsets size should have been checked in parse(). assert(*AddressIndex < AddrInfoOffsets.size()); auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex]; - assert((Endian == support::big || Endian == support::little) && - "Endian must be either big or little"); + assert( + (Endian == llvm::endianness::big || Endian == llvm::endianness::little) && + "Endian must be either big or little"); DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), - Endian == support::little, 4); + Endian == llvm::endianness::little, 4); if (std::optional OptAddr = getAddress(*AddressIndex)) return FunctionInfo::lookup(Data, *this, *OptAddr, Addr); return createStringError(std::errc::invalid_argument, diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp index d14d81f3f76d3..d1789fe587f3a 100644 --- a/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp @@ -349,7 +349,7 @@ Error LVCodeViewReader::initializeFileAndStringTables( if (Error E = Reader.readFixedString(Contents, SubSectionSize)) return createStringError(errorToErrorCode(std::move(E)), getFileName()); - BinaryStreamRef ST(Contents, support::little); + BinaryStreamRef ST(Contents, llvm::endianness::little); switch (DebugSubsectionKind(SubType)) { case DebugSubsectionKind::FileChecksums: if (Error E = CVFileChecksumTable.initialize(ST)) @@ -478,8 +478,8 @@ Error LVCodeViewReader::loadPrecompiledObject(PrecompRecord &Precomp, if (Magic != COFF::DEBUG_SECTION_MAGIC) return errorCodeToError(object_error::parse_failed); - ReaderPrecomp = - std::make_unique(*DataOrErr, support::little); + ReaderPrecomp = std::make_unique( + *DataOrErr, llvm::endianness::little); cantFail( ReaderPrecomp->readArray(CVTypesPrecomp, ReaderPrecomp->getLength())); @@ -550,7 +550,7 @@ Error LVCodeViewReader::traverseTypeSection(StringRef SectionName, // Get the first type record. It will indicate if this object uses a type // server (/Zi) or a PCH file (/Yu). CVTypeArray CVTypes; - BinaryStreamReader Reader(*DataOrErr, support::little); + BinaryStreamReader Reader(*DataOrErr, llvm::endianness::little); cantFail(Reader.readArray(CVTypes, Reader.getLength())); CVTypeArray::Iterator FirstType = CVTypes.begin(); @@ -664,7 +664,7 @@ Error LVCodeViewReader::traverseSymbolSection(StringRef SectionName, if (Magic != COFF::DEBUG_SECTION_MAGIC) return createStringError(object_error::parse_failed, getFileName()); - BinaryStreamReader FSReader(Data, support::little); + BinaryStreamReader FSReader(Data, llvm::endianness::little); if (Error Err = initializeFileAndStringTables(FSReader)) return Err; @@ -752,7 +752,8 @@ Error LVCodeViewReader::traverseSymbolSection(StringRef SectionName, W.printString("Symbol Name", SymbolName); }); - BinaryStreamReader Reader(FunctionLineTables[SymbolName], support::little); + BinaryStreamReader Reader(FunctionLineTables[SymbolName], + llvm::endianness::little); DebugLinesSubsectionRef Lines; if (Error E = Lines.initialize(Reader)) diff --git a/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp b/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp index 85c22483fa90e..cddee3e1c273f 100644 --- a/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp @@ -107,7 +107,7 @@ static inline bool isCodeViewDebugSubsection(object::SectionRef Section, return false; } - Reader = BinaryStreamReader(*ContentsOrErr, support::little); + Reader = BinaryStreamReader(*ContentsOrErr, llvm::endianness::little); uint32_t Magic; if (Reader.bytesRemaining() < sizeof(uint32_t)) return false; diff --git a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp index 0496847a0c26c..1fd2a33d3f11f 100644 --- a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp @@ -45,7 +45,8 @@ COFFLinkGraphBuilder::getPointerSize(const object::COFFObjectFile &Obj) { llvm::endianness COFFLinkGraphBuilder::getEndianness(const object::COFFObjectFile &Obj) { - return Obj.isLittleEndian() ? support::little : support::big; + return Obj.isLittleEndian() ? llvm::endianness::little + : llvm::endianness::big; } uint64_t COFFLinkGraphBuilder::getSectionSize(const object::COFFObjectFile &Obj, diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp index 525ece4eea9c0..23946c7de9adb 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp @@ -265,7 +265,7 @@ createLinkGraphFromELFObject_aarch32(MemoryBufferRef ObjectBuffer) { case Triple::arm: case Triple::thumb: { auto &ELFFile = cast>(**ELFObj).getELFFile(); - return ELFLinkGraphBuilder_aarch32( + return ELFLinkGraphBuilder_aarch32( (*ELFObj)->getFileName(), ELFFile, TT, std::move(*Features), ArmCfg) .buildGraph(); @@ -273,7 +273,7 @@ createLinkGraphFromELFObject_aarch32(MemoryBufferRef ObjectBuffer) { case Triple::armeb: case Triple::thumbeb: { auto &ELFFile = cast>(**ELFObj).getELFFile(); - return ELFLinkGraphBuilder_aarch32( + return ELFLinkGraphBuilder_aarch32( (*ELFObj)->getFileName(), ELFFile, TT, std::move(*Features), ArmCfg) .buildGraph(); diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp index a095059496dc1..bf1d22ac9a430 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp @@ -506,26 +506,26 @@ void link_ELF_ppc64(std::unique_ptr G, Expected> createLinkGraphFromELFObject_ppc64(MemoryBufferRef ObjectBuffer) { - return createLinkGraphFromELFObject_ppc64( + return createLinkGraphFromELFObject_ppc64( std::move(ObjectBuffer)); } Expected> createLinkGraphFromELFObject_ppc64le(MemoryBufferRef ObjectBuffer) { - return createLinkGraphFromELFObject_ppc64( + return createLinkGraphFromELFObject_ppc64( std::move(ObjectBuffer)); } /// jit-link the given object buffer, which must be a ELF ppc64 object file. void link_ELF_ppc64(std::unique_ptr G, std::unique_ptr Ctx) { - return link_ELF_ppc64(std::move(G), std::move(Ctx)); + return link_ELF_ppc64(std::move(G), std::move(Ctx)); } /// jit-link the given object buffer, which must be a ELF ppc64le object file. void link_ELF_ppc64le(std::unique_ptr G, std::unique_ptr Ctx) { - return link_ELF_ppc64(std::move(G), std::move(Ctx)); + return link_ELF_ppc64(std::move(G), std::move(Ctx)); } } // end namespace llvm::jitlink diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp index 57e17aa78fed9..474a0b5160bcb 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp @@ -155,8 +155,8 @@ void SimpleSegmentAlloc::Create(JITLinkMemoryManager &MemMgr, "__---.finalize", "__R--.finalize", "__-W-.finalize", "__RW-.finalize", "__--X.finalize", "__R-X.finalize", "__-WX.finalize", "__RWX.finalize"}; - auto G = - std::make_unique("", Triple(), 0, support::native, nullptr); + auto G = std::make_unique("", Triple(), 0, + llvm::endianness::native, nullptr); orc::AllocGroupSmallMap ContentBlocks; orc::ExecutorAddr NextAddr(0x100000); diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp index 8afedd016f9a1..bcbc429cae127 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp @@ -108,7 +108,8 @@ MachOLinkGraphBuilder::getPointerSize(const object::MachOObjectFile &Obj) { llvm::endianness MachOLinkGraphBuilder::getEndianness(const object::MachOObjectFile &Obj) { - return Obj.isLittleEndian() ? support::little : support::big; + return Obj.isLittleEndian() ? llvm::endianness::little + : llvm::endianness::big; } Section &MachOLinkGraphBuilder::getCommonSection() { diff --git a/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp b/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp index febd2e73aa176..b541db3672f4e 100644 --- a/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp @@ -113,8 +113,9 @@ llvm::orc::createDWARFContext(LinkGraph &G) { std::make_unique(std::move(SecData)); } } - auto Ctx = DWARFContext::create(DWARFSectionData, G.getPointerSize(), - G.getEndianness() == support::little); + auto Ctx = + DWARFContext::create(DWARFSectionData, G.getPointerSize(), + G.getEndianness() == llvm::endianness::little); dumpDWARFContext(*Ctx); return std::make_pair(std::move(Ctx), std::move(DWARFSectionData)); } diff --git a/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.cpp index 6959b068aa6e9..cdc1158ce1c4c 100644 --- a/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.cpp @@ -374,7 +374,7 @@ void GDBJITDebugInfoRegistrationPlugin::modifyPassConfigForMachO( case Triple::aarch64: // Supported, continue. assert(LG.getPointerSize() == 8 && "Graph has incorrect pointer size"); - assert(LG.getEndianness() == support::little && + assert(LG.getEndianness() == llvm::endianness::little && "Graph has incorrect endianness"); break; default: @@ -384,7 +384,7 @@ void GDBJITDebugInfoRegistrationPlugin::modifyPassConfigForMachO( << "MachO graph " << LG.getName() << "(triple = " << LG.getTargetTriple().str() << ", pointer size = " << LG.getPointerSize() << ", endianness = " - << (LG.getEndianness() == support::big ? "big" : "little") + << (LG.getEndianness() == llvm::endianness::big ? "big" : "little") << ")\n"; }); return; diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index d439b1b4ebfbf..9fdabf310d6ec 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -35,7 +35,8 @@ static void or32AArch64Imm(void *L, uint64_t Imm) { } template static void write(bool isBE, void *P, T V) { - isBE ? write(P, V) : write(P, V); + isBE ? write(P, V) + : write(P, V); } static void write32AArch64Addr(void *L, uint64_t Imm) { diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 1ae3ac179787f..73e2b365f109a 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -318,18 +318,24 @@ class RuntimeDyldImpl { std::string ErrorStr; void writeInt16BE(uint8_t *Addr, uint16_t Value) { - llvm::support::endian::write( - Addr, Value, IsTargetLittleEndian ? support::little : support::big); + llvm::support::endian::write(Addr, Value, + IsTargetLittleEndian + ? llvm::endianness::little + : llvm::endianness::big); } void writeInt32BE(uint8_t *Addr, uint32_t Value) { - llvm::support::endian::write( - Addr, Value, IsTargetLittleEndian ? support::little : support::big); + llvm::support::endian::write(Addr, Value, + IsTargetLittleEndian + ? llvm::endianness::little + : llvm::endianness::big); } void writeInt64BE(uint8_t *Addr, uint64_t Value) { - llvm::support::endian::write( - Addr, Value, IsTargetLittleEndian ? support::little : support::big); + llvm::support::endian::write(Addr, Value, + IsTargetLittleEndian + ? llvm::endianness::little + : llvm::endianness::big); } virtual void setMipsABI(const ObjectFile &Obj) { diff --git a/llvm/lib/InterfaceStub/ELFObjHandler.cpp b/llvm/lib/InterfaceStub/ELFObjHandler.cpp index 49ed27e265d40..c1256563d0d62 100644 --- a/llvm/lib/InterfaceStub/ELFObjHandler.cpp +++ b/llvm/lib/InterfaceStub/ELFObjHandler.cpp @@ -57,7 +57,7 @@ static void initELFHeader(typename ELFT::Ehdr &ElfHeader, uint16_t Machine) { ElfHeader.e_ident[EI_MAG2] = ElfMagic[EI_MAG2]; ElfHeader.e_ident[EI_MAG3] = ElfMagic[EI_MAG3]; ElfHeader.e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32; - bool IsLittleEndian = ELFT::TargetEndianness == support::little; + bool IsLittleEndian = ELFT::TargetEndianness == llvm::endianness::little; ElfHeader.e_ident[EI_DATA] = IsLittleEndian ? ELFDATA2LSB : ELFDATA2MSB; ElfHeader.e_ident[EI_VERSION] = EV_CURRENT; ElfHeader.e_ident[EI_OSABI] = ELFOSABI_NONE; diff --git a/llvm/lib/MC/DXContainerPSVInfo.cpp b/llvm/lib/MC/DXContainerPSVInfo.cpp index bdc6f79a68c0c..48182fcd31df0 100644 --- a/llvm/lib/MC/DXContainerPSVInfo.cpp +++ b/llvm/lib/MC/DXContainerPSVInfo.cpp @@ -87,15 +87,15 @@ void PSVRuntimeInfo::write(raw_ostream &OS, uint32_t Version) const { } // Write the size of the info. - support::endian::write(OS, InfoSize, support::little); + support::endian::write(OS, InfoSize, llvm::endianness::little); // Write the info itself. OS.write(reinterpret_cast(&BaseData), InfoSize); uint32_t ResourceCount = static_cast(Resources.size()); - support::endian::write(OS, ResourceCount, support::little); + support::endian::write(OS, ResourceCount, llvm::endianness::little); if (ResourceCount > 0) - support::endian::write(OS, BindingSize, support::little); + support::endian::write(OS, BindingSize, llvm::endianness::little); for (const auto &Res : Resources) OS.write(reinterpret_cast(&Res), BindingSize); @@ -126,22 +126,22 @@ void PSVRuntimeInfo::write(raw_ostream &OS, uint32_t Version) const { } support::endian::write(OS, static_cast(StrTabBuilder.getSize()), - support::little); + llvm::endianness::little); // Write the string table. StrTabBuilder.write(OS); // Write the index table size, then table. support::endian::write(OS, static_cast(IndexBuffer.size()), - support::little); + llvm::endianness::little); for (auto I : IndexBuffer) - support::endian::write(OS, I, support::little); + support::endian::write(OS, I, llvm::endianness::little); if (SignatureElements.size() > 0) { // write the size of the signature elements. support::endian::write(OS, static_cast(sizeof(v0::SignatureElement)), - support::little); + llvm::endianness::little); // write the signature elements. OS.write(reinterpret_cast(&SignatureElements[0]), @@ -150,16 +150,16 @@ void PSVRuntimeInfo::write(raw_ostream &OS, uint32_t Version) const { for (const auto &MaskVector : OutputVectorMasks) support::endian::write_array(OS, ArrayRef(MaskVector), - support::little); + llvm::endianness::little); support::endian::write_array(OS, ArrayRef(PatchOrPrimMasks), - support::little); + llvm::endianness::little); for (const auto &MaskVector : InputOutputMap) support::endian::write_array(OS, ArrayRef(MaskVector), - support::little); + llvm::endianness::little); support::endian::write_array(OS, ArrayRef(InputPatchMap), - support::little); + llvm::endianness::little); support::endian::write_array(OS, ArrayRef(PatchOutputMap), - support::little); + llvm::endianness::little); } void Signature::write(raw_ostream &OS) { diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index 816aa21321095..8490fefe7ff53 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -152,8 +152,9 @@ struct ELFWriter { public: ELFWriter(ELFObjectWriter &OWriter, raw_pwrite_stream &OS, bool IsLittleEndian, DwoMode Mode) - : OWriter(OWriter), - W(OS, IsLittleEndian ? support::little : support::big), Mode(Mode) {} + : OWriter(OWriter), W(OS, IsLittleEndian ? llvm::endianness::little + : llvm::endianness::big), + Mode(Mode) {} void WriteWord(uint64_t Word) { if (is64Bit()) @@ -406,8 +407,8 @@ void ELFWriter::writeHeader(const MCAssembler &Asm) { W.OS << char(is64Bit() ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS] // e_ident[EI_DATA] - W.OS << char(W.Endian == support::little ? ELF::ELFDATA2LSB - : ELF::ELFDATA2MSB); + W.OS << char(W.Endian == llvm::endianness::little ? ELF::ELFDATA2LSB + : ELF::ELFDATA2MSB); W.OS << char(ELF::EV_CURRENT); // e_ident[EI_VERSION] // e_ident[EI_OSABI] diff --git a/llvm/lib/MC/GOFFObjectWriter.cpp b/llvm/lib/MC/GOFFObjectWriter.cpp index 33244cbf88d91..addeb6db95969 100644 --- a/llvm/lib/MC/GOFFObjectWriter.cpp +++ b/llvm/lib/MC/GOFFObjectWriter.cpp @@ -137,7 +137,8 @@ class GOFFOstream : public raw_ostream { // Support for endian-specific data. template void writebe(value_type Value) { - Value = support::endian::byte_swap(Value, support::big); + Value = + support::endian::byte_swap(Value, llvm::endianness::big); write(reinterpret_cast(&Value), sizeof(value_type)); } }; diff --git a/llvm/lib/MC/MCAsmBackend.cpp b/llvm/lib/MC/MCAsmBackend.cpp index 0e6fee8643df5..4b1064a07e83c 100644 --- a/llvm/lib/MC/MCAsmBackend.cpp +++ b/llvm/lib/MC/MCAsmBackend.cpp @@ -33,11 +33,11 @@ MCAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { auto TW = createObjectTargetWriter(); switch (TW->getFormat()) { case Triple::ELF: - return createELFObjectWriter(cast(std::move(TW)), OS, - Endian == support::little); + return createELFObjectWriter(cast(std::move(TW)), + OS, Endian == llvm::endianness::little); case Triple::MachO: return createMachObjectWriter(cast(std::move(TW)), - OS, Endian == support::little); + OS, Endian == llvm::endianness::little); case Triple::COFF: return createWinCOFFObjectWriter( cast(std::move(TW)), OS); @@ -72,7 +72,7 @@ MCAsmBackend::createDwoObjectWriter(raw_pwrite_stream &OS, case Triple::ELF: return createELFDwoObjectWriter( cast(std::move(TW)), OS, DwoOS, - Endian == support::little); + Endian == llvm::endianness::little); case Triple::Wasm: return createWasmDwoObjectWriter( cast(std::move(TW)), OS, DwoOS); diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp index c71f0250a31d7..55558820b670d 100644 --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -606,7 +606,7 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm, // Duplicate V into Data as byte vector to reduce number of // writes done. As such, do endian conversion here. for (unsigned I = 0; I != VSize; ++I) { - unsigned index = Endian == support::little ? I : (VSize - I - 1); + unsigned index = Endian == llvm::endianness::little ? I : (VSize - I - 1); Data[I] = uint8_t(V >> (index * 8)); } for (unsigned I = VSize; I < MaxChunkSize; ++I) diff --git a/llvm/lib/MC/MCCodeView.cpp b/llvm/lib/MC/MCCodeView.cpp index f09997ebdf10a..d234ce110918e 100644 --- a/llvm/lib/MC/MCCodeView.cpp +++ b/llvm/lib/MC/MCCodeView.cpp @@ -654,7 +654,7 @@ void CodeViewContext::encodeDefRange(MCAsmLayout &Layout, } unsigned NumGaps = J - I - 1; - support::endian::Writer LEWriter(OS, support::little); + support::endian::Writer LEWriter(OS, llvm::endianness::little); unsigned Bias = 0; // We must split the range into chunks of MaxDefRange, this is a fundamental diff --git a/llvm/lib/MC/MCDXContainerWriter.cpp b/llvm/lib/MC/MCDXContainerWriter.cpp index 028bfe6e79a12..0580dc7e42826 100644 --- a/llvm/lib/MC/MCDXContainerWriter.cpp +++ b/llvm/lib/MC/MCDXContainerWriter.cpp @@ -30,7 +30,7 @@ class DXContainerObjectWriter : public MCObjectWriter { public: DXContainerObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS) - : W(OS, support::little), TargetObjectWriter(std::move(MOTW)) {} + : W(OS, llvm::endianness::little), TargetObjectWriter(std::move(MOTW)) {} ~DXContainerObjectWriter() override {} diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp index 189fe2b238425..7925fba876f86 100644 --- a/llvm/lib/MC/MCDwarf.cpp +++ b/llvm/lib/MC/MCDwarf.cpp @@ -1940,8 +1940,9 @@ void MCDwarfFrameEmitter::encodeAdvanceLoc(MCContext &Context, if (AddrDelta == 0) return; - llvm::endianness E = - Context.getAsmInfo()->isLittleEndian() ? support::little : support::big; + llvm::endianness E = Context.getAsmInfo()->isLittleEndian() + ? llvm::endianness::little + : llvm::endianness::big; if (isUIntN(6, AddrDelta)) { uint8_t Opcode = dwarf::DW_CFA_advance_loc | AddrDelta; diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index 7f9c0c3b0b8df..2371cb2384414 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -137,7 +137,7 @@ void MCStreamer::emitIntValue(uint64_t Value, unsigned Size) { "Invalid size"); const bool IsLittleEndian = Context.getAsmInfo()->isLittleEndian(); uint64_t Swapped = support::endian::byte_swap( - Value, IsLittleEndian ? support::little : support::big); + Value, IsLittleEndian ? llvm::endianness::little : llvm::endianness::big); unsigned Index = IsLittleEndian ? 0 : 8 - Size; emitBytes(StringRef(reinterpret_cast(&Swapped) + Index, Size)); } diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp index 04097dfe2e9f7..d17e6e125d872 100644 --- a/llvm/lib/MC/MachObjectWriter.cpp +++ b/llvm/lib/MC/MachObjectWriter.cpp @@ -630,7 +630,7 @@ void MachObjectWriter::computeSymbolTable( // Set the Index and the IsExtern bit. unsigned Index = Rel.Sym->getIndex(); assert(isInt<24>(Index)); - if (W.Endian == support::little) + if (W.Endian == llvm::endianness::little) Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27); else Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4); diff --git a/llvm/lib/MC/SPIRVObjectWriter.cpp b/llvm/lib/MC/SPIRVObjectWriter.cpp index cb49f5eeca8d9..39856e96e9be5 100644 --- a/llvm/lib/MC/SPIRVObjectWriter.cpp +++ b/llvm/lib/MC/SPIRVObjectWriter.cpp @@ -24,7 +24,7 @@ class SPIRVObjectWriter : public MCObjectWriter { public: SPIRVObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS) - : W(OS, support::little), TargetObjectWriter(std::move(MOTW)) {} + : W(OS, llvm::endianness::little), TargetObjectWriter(std::move(MOTW)) {} ~SPIRVObjectWriter() override {} diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp index bb8c68410a6b5..b99df3837cc21 100644 --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -1438,12 +1438,12 @@ void WasmObjectWriter::prepareImports( uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { - support::endian::Writer MainWriter(*OS, support::little); + support::endian::Writer MainWriter(*OS, llvm::endianness::little); W = &MainWriter; if (IsSplitDwarf) { uint64_t TotalSize = writeOneObject(Asm, Layout, DwoMode::NonDwoOnly); assert(DwoOS); - support::endian::Writer DwoWriter(*DwoOS, support::little); + support::endian::Writer DwoWriter(*DwoOS, llvm::endianness::little); W = &DwoWriter; return TotalSize + writeOneObject(Asm, Layout, DwoMode::DwoOnly); } else { diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp index fd8af1f8cdb8b..1f73cb9884e0a 100644 --- a/llvm/lib/MC/WinCOFFObjectWriter.cpp +++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp @@ -260,7 +260,7 @@ void COFFSymbol::set_name_offset(uint32_t Offset) { WinCOFFWriter::WinCOFFWriter(WinCOFFObjectWriter &OWriter, raw_pwrite_stream &OS, DwoMode Mode) - : OWriter(OWriter), W(OS, support::little), Mode(Mode) { + : OWriter(OWriter), W(OS, llvm::endianness::little), Mode(Mode) { Header.Machine = OWriter.TargetObjectWriter->getMachine(); // Some relocations on ARM64 (the 21 bit ADRP relocations) have a slightly // limited range for the immediate offset (+/- 1 MB); create extra offset diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp index f1cce9b8c94d3..343e2fc877bc3 100644 --- a/llvm/lib/MC/XCOFFObjectWriter.cpp +++ b/llvm/lib/MC/XCOFFObjectWriter.cpp @@ -448,7 +448,7 @@ class XCOFFObjectWriter : public MCObjectWriter { XCOFFObjectWriter::XCOFFObjectWriter( std::unique_ptr MOTW, raw_pwrite_stream &OS) - : W(OS, support::big), TargetObjectWriter(std::move(MOTW)), + : W(OS, llvm::endianness::big), TargetObjectWriter(std::move(MOTW)), Strings(StringTableBuilder::XCOFF), Text(".text", XCOFF::STYP_TEXT, /* IsVirtual */ false, CsectGroups{&ProgramCodeCsects, &ReadOnlyCsects}), diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp index eaeef11b127e4..0a54d3798d8bf 100644 --- a/llvm/lib/ObjCopy/ELF/ELFObject.cpp +++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp @@ -1987,8 +1987,9 @@ template void ELFWriter::writeEhdr() { Ehdr.e_ident[EI_MAG2] = 'L'; Ehdr.e_ident[EI_MAG3] = 'F'; Ehdr.e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32; - Ehdr.e_ident[EI_DATA] = - ELFT::TargetEndianness == support::big ? ELFDATA2MSB : ELFDATA2LSB; + Ehdr.e_ident[EI_DATA] = ELFT::TargetEndianness == llvm::endianness::big + ? ELFDATA2MSB + : ELFDATA2LSB; Ehdr.e_ident[EI_VERSION] = EV_CURRENT; Ehdr.e_ident[EI_OSABI] = Obj.OSABI; Ehdr.e_ident[EI_ABIVERSION] = Obj.ABIVersion; @@ -2695,11 +2696,11 @@ uint64_t IHexWriter::writeEntryPointRecord(uint8_t *Buf) { if (Obj.Entry <= 0xFFFFFU) { Data[0] = ((Obj.Entry & 0xF0000U) >> 12) & 0xFF; support::endian::write(&Data[2], static_cast(Obj.Entry), - support::big); + llvm::endianness::big); HexData = IHexRecord::getLine(IHexRecord::StartAddr80x86, 0, Data); } else { support::endian::write(Data, static_cast(Obj.Entry), - support::big); + llvm::endianness::big); HexData = IHexRecord::getLine(IHexRecord::StartAddr, 0, Data); } memcpy(Buf, HexData.data(), HexData.size()); diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp index bf9a1ce1de88b..fdd87824e2293 100644 --- a/llvm/lib/Object/Archive.cpp +++ b/llvm/lib/Object/Archive.cpp @@ -1421,7 +1421,7 @@ BigArchive::BigArchive(MemoryBufferRef Source, Error &Err) // 64-bit global symbol tables, we need to merge them into a single table. raw_string_ostream Out(MergedGlobalSymtabBuf); uint64_t SymNum = SymtabInfos[0].SymNum + SymtabInfos[1].SymNum; - write(Out, SymNum, support::big); + write(Out, SymNum, llvm::endianness::big); // Merge symbol offset. Out << SymtabInfos[0].SymbolOffsetTable; Out << SymtabInfos[1].SymbolOffsetTable; diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp index a67c657b48ba0..2f70c9edd13ed 100644 --- a/llvm/lib/Object/ArchiveWriter.cpp +++ b/llvm/lib/Object/ArchiveWriter.cpp @@ -203,11 +203,12 @@ static bool isBSDLike(object::Archive::Kind Kind) { template static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) { support::endian::write(Out, Val, - isBSDLike(Kind) ? support::little : support::big); + isBSDLike(Kind) ? llvm::endianness::little + : llvm::endianness::big); } template static void printLE(raw_ostream &Out, T Val) { - support::endian::write(Out, Val, support::little); + support::endian::write(Out, Val, llvm::endianness::little); } static void printRestOfMemberHeader( diff --git a/llvm/lib/Object/COFFObjectFile.cpp b/llvm/lib/Object/COFFObjectFile.cpp index 08eb0d034c53a..574f7a7cf1f45 100644 --- a/llvm/lib/Object/COFFObjectFile.cpp +++ b/llvm/lib/Object/COFFObjectFile.cpp @@ -1907,7 +1907,7 @@ Error ResourceSectionRef::load(const COFFObjectFile *O, const SectionRef &S) { Expected Contents = Section.getContents(); if (!Contents) return Contents.takeError(); - BBS = BinaryByteStream(*Contents, support::little); + BBS = BinaryByteStream(*Contents, llvm::endianness::little); const coff_section *COFFSect = Obj->getCOFFSection(Section); ArrayRef OrigRelocs = Obj->getRelocations(COFFSect); Relocs.reserve(OrigRelocs.size()); diff --git a/llvm/lib/Object/WindowsResource.cpp b/llvm/lib/Object/WindowsResource.cpp index 0764dc8f75233..61ca49e290da1 100644 --- a/llvm/lib/Object/WindowsResource.cpp +++ b/llvm/lib/Object/WindowsResource.cpp @@ -50,7 +50,7 @@ WindowsResource::WindowsResource(MemoryBufferRef Source) : Binary(Binary::ID_WinRes, Source) { size_t LeadingSize = WIN_RES_MAGIC_SIZE + WIN_RES_NULL_ENTRY_SIZE; BBS = BinaryByteStream(Data.getBuffer().drop_front(LeadingSize), - support::little); + llvm::endianness::little); } // static diff --git a/llvm/lib/ObjectYAML/COFFEmitter.cpp b/llvm/lib/ObjectYAML/COFFEmitter.cpp index 015c293fba46f..7088223b9b672 100644 --- a/llvm/lib/ObjectYAML/COFFEmitter.cpp +++ b/llvm/lib/ObjectYAML/COFFEmitter.cpp @@ -182,7 +182,7 @@ toDebugS(ArrayRef Subsections, } uint8_t *Buffer = Allocator.Allocate(Size); MutableArrayRef Output(Buffer, Size); - BinaryStreamWriter Writer(Output, support::little); + BinaryStreamWriter Writer(Output, llvm::endianness::little); Err(Writer.writeInteger(COFF::DEBUG_SECTION_MAGIC)); for (const auto &B : Builders) { @@ -314,7 +314,8 @@ template raw_ostream &operator<<(raw_ostream &OS, const binary_le_impl &BLE) { char Buffer[sizeof(BLE.Value)]; - support::endian::write(Buffer, BLE.Value); + support::endian::write(Buffer, + BLE.Value); OS.write(Buffer, sizeof(BLE.Value)); return OS; } diff --git a/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp index 02f053bb0e0f6..662eb63f835d6 100644 --- a/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp +++ b/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp @@ -900,7 +900,7 @@ YAMLDebugSubsection::fromCodeViewSubection(const StringsAndChecksumsRef &SC, std::vector llvm::CodeViewYAML::fromDebugS(ArrayRef Data, const StringsAndChecksumsRef &SC) { - BinaryStreamReader Reader(Data, support::little); + BinaryStreamReader Reader(Data, llvm::endianness::little); uint32_t Magic; ExitOnError Err("Invalid .debug$S section!"); diff --git a/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp b/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp index e4e2b2a6d21a6..99689786a13cc 100644 --- a/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp +++ b/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp @@ -784,7 +784,7 @@ std::vector llvm::CodeViewYAML::fromDebugT(ArrayRef DebugTorP, StringRef SectionName) { ExitOnError Err("Invalid " + std::string(SectionName) + " section!"); - BinaryStreamReader Reader(DebugTorP, support::little); + BinaryStreamReader Reader(DebugTorP, llvm::endianness::little); CVTypeArray Types; uint32_t Magic; @@ -813,7 +813,7 @@ ArrayRef llvm::CodeViewYAML::toDebugT(ArrayRef Leafs, } uint8_t *ResultBuffer = Alloc.Allocate(Size); MutableArrayRef Output(ResultBuffer, Size); - BinaryStreamWriter Writer(Output, support::little); + BinaryStreamWriter Writer(Output, llvm::endianness::little); ExitOnError Err("Error writing type record to " + std::string(SectionName) + " section"); Err(Writer.writeInteger(COFF::DEBUG_SECTION_MAGIC)); diff --git a/llvm/lib/ObjectYAML/XCOFFEmitter.cpp b/llvm/lib/ObjectYAML/XCOFFEmitter.cpp index 7ad878f04c883..ccf768c06aebf 100644 --- a/llvm/lib/ObjectYAML/XCOFFEmitter.cpp +++ b/llvm/lib/ObjectYAML/XCOFFEmitter.cpp @@ -33,7 +33,7 @@ constexpr uint32_t MaxRawDataSize = UINT32_MAX; class XCOFFWriter { public: XCOFFWriter(XCOFFYAML::Object &Obj, raw_ostream &OS, yaml::ErrorHandler EH) - : Obj(Obj), W(OS, support::big), ErrHandler(EH), + : Obj(Obj), W(OS, llvm::endianness::big), ErrHandler(EH), StrTblBuilder(StringTableBuilder::XCOFF) { Is64Bit = Obj.Header.Magic == (llvm::yaml::Hex16)XCOFF::XCOFF64; } diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index efcc245130561..2873e06266e44 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -49,9 +49,9 @@ namespace llvm { class ProfOStream { public: ProfOStream(raw_fd_ostream &FD) - : IsFDOStream(true), OS(FD), LE(FD, support::little) {} + : IsFDOStream(true), OS(FD), LE(FD, llvm::endianness::little) {} ProfOStream(raw_string_ostream &STR) - : IsFDOStream(false), OS(STR), LE(STR, support::little) {} + : IsFDOStream(false), OS(STR), LE(STR, llvm::endianness::little) {} uint64_t tell() { return OS.tell(); } void write(uint64_t V) { LE.write(V); } @@ -106,7 +106,7 @@ class InstrProfRecordWriterTrait { using hash_value_type = uint64_t; using offset_type = uint64_t; - llvm::endianness ValueProfDataEndianness = support::little; + llvm::endianness ValueProfDataEndianness = llvm::endianness::little; InstrProfSummaryBuilder *SummaryBuilder; InstrProfSummaryBuilder *CSSummaryBuilder; diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp index 74990238d3796..c11cd4dfa6a5a 100644 --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -353,7 +353,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeNameTable() { // retrieve the name using the name index without having to read the // whole name table. encodeULEB128(NameTable.size(), OS); - support::endian::Writer Writer(OS, support::little); + support::endian::Writer Writer(OS, llvm::endianness::little); for (auto N : V) Writer.write(hashFuncName(N)); return sampleprof_error::success; @@ -394,7 +394,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeCSNameTableSection() { auto &OS = *OutputStream; encodeULEB128(OrderedContexts.size(), OS); - support::endian::Writer Writer(OS, support::little); + support::endian::Writer Writer(OS, llvm::endianness::little); for (auto Context : OrderedContexts) { auto Frames = Context.getContextFrames(); encodeULEB128(Frames.size(), OS); @@ -741,7 +741,7 @@ void SampleProfileWriterExtBinaryBase::setToCompressSection(SecType Type) { } void SampleProfileWriterExtBinaryBase::allocSecHdrTable() { - support::endian::Writer Writer(*OutputStream, support::little); + support::endian::Writer Writer(*OutputStream, llvm::endianness::little); Writer.write(static_cast(SectionHdrLayout.size())); SecHdrTableOffset = OutputStream->tell(); @@ -771,7 +771,8 @@ std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() { // but it needs to be read before SecLBRProfile (the order in // SectionHdrLayout). So we use IndexMap above to switch the order. support::endian::SeekableWriter Writer( - static_cast(*OutputStream), support::little); + static_cast(*OutputStream), + llvm::endianness::little); for (uint32_t LayoutIdx = 0; LayoutIdx < SectionHdrLayout.size(); LayoutIdx++) { assert(IndexMap[LayoutIdx] < SecHdrTable.size() && diff --git a/llvm/lib/Remarks/YAMLRemarkParser.cpp b/llvm/lib/Remarks/YAMLRemarkParser.cpp index 8a7d00aa1dc08..947adbba10a21 100644 --- a/llvm/lib/Remarks/YAMLRemarkParser.cpp +++ b/llvm/lib/Remarks/YAMLRemarkParser.cpp @@ -75,7 +75,7 @@ static Expected parseVersion(StringRef &Buf) { "Expecting version number."); uint64_t Version = - support::endian::read(Buf.data()); + support::endian::read(Buf.data()); if (Version != remarks::CurrentRemarkVersion) return createStringError(std::errc::illegal_byte_sequence, "Mismatching remark version. Got %" PRId64 @@ -90,7 +90,7 @@ static Expected parseStrTabSize(StringRef &Buf) { return createStringError(std::errc::illegal_byte_sequence, "Expecting string table size."); uint64_t StrTabSize = - support::endian::read(Buf.data()); + support::endian::read(Buf.data()); Buf = Buf.drop_front(sizeof(uint64_t)); return StrTabSize; } diff --git a/llvm/lib/Support/CodeGenCoverage.cpp b/llvm/lib/Support/CodeGenCoverage.cpp index d5ab77b9c66f4..0df45b4ff2ba7 100644 --- a/llvm/lib/Support/CodeGenCoverage.cpp +++ b/llvm/lib/Support/CodeGenCoverage.cpp @@ -58,7 +58,8 @@ bool CodeGenCoverage::parse(MemoryBuffer &Buffer, StringRef BackendName) { if (std::distance(CurPtr, Buffer.getBufferEnd()) < 8) return false; // Data is invalid. Not enough bytes for another rule id. - uint64_t RuleID = support::endian::read64(CurPtr, support::native); + uint64_t RuleID = + support::endian::read64(CurPtr, llvm::endianness::native); CurPtr += 8; // ~0ull terminates the rule id list. diff --git a/llvm/lib/Support/ELFAttributeParser.cpp b/llvm/lib/Support/ELFAttributeParser.cpp index 3deaab877b385..d3100c9ebb211 100644 --- a/llvm/lib/Support/ELFAttributeParser.cpp +++ b/llvm/lib/Support/ELFAttributeParser.cpp @@ -191,7 +191,7 @@ Error ELFAttributeParser::parseSubsection(uint32_t length) { Error ELFAttributeParser::parse(ArrayRef section, llvm::endianness endian) { unsigned sectionNumber = 0; - de = DataExtractor(section, endian == support::little, 0); + de = DataExtractor(section, endian == llvm::endianness::little, 0); // For early returns, we have more specific errors, consume the Error in // cursor. diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index be248125d8263..c7ff14c252f12 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -40,7 +40,8 @@ class AArch64AsmBackend : public MCAsmBackend { public: AArch64AsmBackend(const Target &T, const Triple &TT, bool IsLittleEndian) - : MCAsmBackend(IsLittleEndian ? support::little : support::big), + : MCAsmBackend(IsLittleEndian ? llvm::endianness::little + : llvm::endianness::big), TheTriple(TT) {} unsigned getNumFixupKinds() const override { @@ -360,7 +361,7 @@ AArch64AsmBackend::getFixupKind(StringRef Name) const { /// getFixupKindContainereSizeInBytes - The number of bytes of the /// container involved in big endian or 0 if the item is little endian unsigned AArch64AsmBackend::getFixupKindContainereSizeInBytes(unsigned Kind) const { - if (Endian == support::little) + if (Endian == llvm::endianness::little) return 0; switch (Kind) { diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 727f79909811d..dbc4323a860f5 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -685,7 +685,7 @@ void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, } uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI); - support::endian::write(CB, Binary, support::little); + support::endian::write(CB, Binary, llvm::endianness::little); ++MCNumEmitted; // Keep track of the # of mi's emitted. } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 65efb553d9d63..e18c04e623149 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -28,7 +28,7 @@ namespace { class AMDGPUAsmBackend : public MCAsmBackend { public: - AMDGPUAsmBackend(const Target &T) : MCAsmBackend(support::little) {} + AMDGPUAsmBackend(const Target &T) : MCAsmBackend(llvm::endianness::little) {} unsigned getNumFixupKinds() const override { return AMDGPU::NumTargetFixupKinds; }; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index bbbfbe4faa0fb..6c539df7677ee 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -142,11 +142,11 @@ void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, } void R600MCCodeEmitter::emit(uint32_t Value, SmallVectorImpl &CB) const { - support::endian::write(CB, Value, support::little); + support::endian::write(CB, Value, llvm::endianness::little); } void R600MCCodeEmitter::emit(uint64_t Value, SmallVectorImpl &CB) const { - support::endian::write(CB, Value, support::little); + support::endian::write(CB, Value, llvm::endianness::little); } unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const { diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 34ff0732c9fd0..9230ff7baedad 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -196,8 +196,9 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); - return (Endian == support::little ? InfosLE - : InfosBE)[Kind - FirstTargetFixupKind]; + return (Endian == llvm::endianness::little + ? InfosLE + : InfosBE)[Kind - FirstTargetFixupKind]; } void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) { @@ -493,7 +494,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, // inst{14-12} = Mid3; // inst{7-0} = Lo8; Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8); - return swapHalfWords(Value, Endian == support::little); + return swapHalfWords(Value, Endian == llvm::endianness::little); } case ARM::fixup_arm_thumb_upper_8_15: if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF()) @@ -532,7 +533,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, // Same addressing mode as fixup_arm_pcrel_10, // but with 16-bit halfwords swapped. if (Kind == ARM::fixup_t2_ldst_pcrel_12) - return swapHalfWords(Value, Endian == support::little); + return swapHalfWords(Value, Endian == llvm::endianness::little); return Value; } @@ -565,7 +566,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, out |= (Value & 0x700) << 4; out |= (Value & 0x0FF); - return swapHalfWords(out, Endian == support::little); + return swapHalfWords(out, Endian == llvm::endianness::little); } case ARM::fixup_arm_condbranch: @@ -602,7 +603,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, out |= (Value & 0x1FF800) << 5; // imm6 field out |= (Value & 0x0007FF); // imm11 field - return swapHalfWords(out, Endian == support::little); + return swapHalfWords(out, Endian == llvm::endianness::little); } case ARM::fixup_t2_condbranch: { Value = Value - 4; @@ -620,7 +621,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, out |= (Value & 0x1F800) << 5; // imm6 field out |= (Value & 0x007FF); // imm11 field - return swapHalfWords(out, Endian == support::little); + return swapHalfWords(out, Endian == llvm::endianness::little); } case ARM::fixup_arm_thumb_bl: { if (!isInt<25>(Value - 4) || @@ -656,7 +657,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits); uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | (uint16_t)imm11Bits); - return joinHalfWords(FirstHalf, SecondHalf, Endian == support::little); + return joinHalfWords(FirstHalf, SecondHalf, + Endian == llvm::endianness::little); } case ARM::fixup_arm_thumb_blx: { // The value doesn't encode the low two bits (always zero) and is offset by @@ -692,7 +694,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits); uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | ((uint16_t)imm10LBits) << 1); - return joinHalfWords(FirstHalf, SecondHalf, Endian == support::little); + return joinHalfWords(FirstHalf, SecondHalf, + Endian == llvm::endianness::little); } case ARM::fixup_thumb_adr_pcrel_10: case ARM::fixup_arm_thumb_cp: @@ -783,7 +786,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, // Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords // swapped. if (Kind == ARM::fixup_t2_pcrel_10) - return swapHalfWords(Value, Endian == support::little); + return swapHalfWords(Value, Endian == llvm::endianness::little); return Value; } @@ -814,7 +817,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, // Same addressing mode as fixup_arm_pcrel_9, but with 16-bit halfwords // swapped. if (Kind == ARM::fixup_t2_pcrel_9) - return swapHalfWords(Value, Endian == support::little); + return swapHalfWords(Value, Endian == llvm::endianness::little); return Value; } @@ -840,7 +843,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, EncValue |= (Value & 0x800) << 15; EncValue |= (Value & 0x700) << 4; EncValue |= (Value & 0xff); - return swapHalfWords(EncValue, Endian == support::little); + return swapHalfWords(EncValue, Endian == llvm::endianness::little); } case ARM::fixup_bf_branch: { const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value); @@ -849,7 +852,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, return 0; } uint32_t out = (((Value - 4) >> 1) & 0xf) << 23; - return swapHalfWords(out, Endian == support::little); + return swapHalfWords(out, Endian == llvm::endianness::little); } case ARM::fixup_bf_target: case ARM::fixup_bfl_target: @@ -865,7 +868,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, out |= (((Value - 4) >> 1) & 0x1) << 11; out |= (((Value - 4) >> 1) & 0x7fe); out |= (((Value - 4) >> 1) & HighBitMask) << 5; - return swapHalfWords(out, Endian == support::little); + return swapHalfWords(out, Endian == llvm::endianness::little); } case ARM::fixup_bfcsel_else_target: { // If this is a fixup of a branch future's else target then it should be a @@ -879,7 +882,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, return 0; } uint32_t out = ((Value >> 2) & 1) << 17; - return swapHalfWords(out, Endian == support::little); + return swapHalfWords(out, Endian == llvm::endianness::little); } case ARM::fixup_wls: case ARM::fixup_le: { @@ -894,7 +897,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, real_value = -real_value; out |= ((real_value >> 1) & 0x1) << 11; out |= ((real_value >> 1) & 0x7fe); - return swapHalfWords(out, Endian == support::little); + return swapHalfWords(out, Endian == llvm::endianness::little); } } } @@ -1089,7 +1092,7 @@ void ARMAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, // Used to point to big endian bytes. unsigned FullSizeBytes; - if (Endian == support::big) { + if (Endian == llvm::endianness::big) { FullSizeBytes = getFixupKindContainerSizeBytes(Kind); assert((Offset + FullSizeBytes) <= Data.size() && "Invalid fixup size!"); assert(NumBytes <= FullSizeBytes && "Invalid fixup size!"); @@ -1099,7 +1102,8 @@ void ARMAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, // the fixup value. The Value has been "split up" into the appropriate // bitfields above. for (unsigned i = 0; i != NumBytes; ++i) { - unsigned Idx = Endian == support::little ? i : (FullSizeBytes - 1 - i); + unsigned Idx = + Endian == llvm::endianness::little ? i : (FullSizeBytes - 1 - i); Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff); } } @@ -1348,12 +1352,12 @@ MCAsmBackend *llvm::createARMLEAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, const MCTargetOptions &Options) { - return createARMAsmBackend(T, STI, MRI, Options, support::little); + return createARMAsmBackend(T, STI, MRI, Options, llvm::endianness::little); } MCAsmBackend *llvm::createARMBEAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, const MCTargetOptions &Options) { - return createARMAsmBackend(T, STI, MRI, Options, support::big); + return createARMAsmBackend(T, STI, MRI, Options, llvm::endianness::big); } diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h index ace573c8fa96c..ac0c9b101cae1 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h @@ -22,7 +22,8 @@ class ARMAsmBackendDarwin : public ARMAsmBackend { const MachO::CPUSubTypeARM Subtype; ARMAsmBackendDarwin(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI) - : ARMAsmBackend(T, STI.getTargetTriple().isThumb(), support::little), + : ARMAsmBackend(T, STI.getTargetTriple().isThumb(), + llvm::endianness::little), MRI(MRI), TT(STI.getTargetTriple()), Subtype((MachO::CPUSubTypeARM)cantFail( MachO::getCPUSubType(STI.getTargetTriple()))) {} diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h index 6e447df9e4cb0..86ce6efe662a2 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h @@ -17,7 +17,7 @@ namespace { class ARMAsmBackendWinCOFF : public ARMAsmBackend { public: ARMAsmBackendWinCOFF(const Target &T, bool isThumb) - : ARMAsmBackend(T, isThumb, support::little) {} + : ARMAsmBackend(T, isThumb, llvm::endianness::little) {} std::unique_ptr createObjectTargetWriter() const override { return createARMWinCOFFObjectWriter(); diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 616dd6dba7548..3f37acff292b4 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -1899,7 +1899,8 @@ void ARMMCCodeEmitter::encodeInstruction(const MCInst &MI, else llvm_unreachable("Unexpected instruction size!"); - auto Endian = IsLittleEndian ? support::little : support::big; + auto Endian = + IsLittleEndian ? llvm::endianness::little : llvm::endianness::big; uint32_t Binary = getBinaryCodeForInstr(MI, Fixups, STI); if (Size == 2) { support::endian::write(CB, Binary, Endian); diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h index d6a30e4dfa223..3081fe1fd58c0 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h @@ -29,7 +29,7 @@ struct MCFixupKindInfo; class AVRAsmBackend : public MCAsmBackend { public: AVRAsmBackend(Triple::OSType OSType) - : MCAsmBackend(support::little), OSType(OSType) {} + : MCAsmBackend(llvm::endianness::little), OSType(OSType) {} void adjustFixupValue(const MCFixup &Fixup, const MCValue &Target, uint64_t &Value, MCContext *Ctx = nullptr) const; diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp index 9172a52e18e4c..fccc4ee9f74ac 100644 --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -97,7 +97,7 @@ void BPFAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, support::endian::write(&Data[Fixup.getOffset()], Value, Endian); } else if (Fixup.getKind() == FK_PCRel_4) { Value = (uint32_t)((Value - 8) / 8); - if (Endian == support::little) { + if (Endian == llvm::endianness::little) { Data[Fixup.getOffset() + 1] = 0x10; support::endian::write32le(&Data[Fixup.getOffset() + 4], Value); } else { @@ -131,12 +131,12 @@ MCAsmBackend *llvm::createBPFAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, const MCTargetOptions &) { - return new BPFAsmBackend(support::little); + return new BPFAsmBackend(llvm::endianness::little); } MCAsmBackend *llvm::createBPFbeAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, const MCTargetOptions &) { - return new BPFAsmBackend(support::big); + return new BPFAsmBackend(llvm::endianness::big); } diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp index 420a2aad480a1..b807d6904004d 100644 --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp @@ -116,8 +116,8 @@ void BPFMCCodeEmitter::encodeInstruction(const MCInst &MI, const MCSubtargetInfo &STI) const { unsigned Opcode = MI.getOpcode(); raw_svector_ostream OS(CB); - support::endian::Writer OSE(OS, - IsLittleEndian ? support::little : support::big); + support::endian::Writer OSE(OS, IsLittleEndian ? llvm::endianness::little + : llvm::endianness::big); if (Opcode == BPF::LD_imm64 || Opcode == BPF::LD_pseudo) { uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI); diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp index d53d2e9e00e92..76f5a5fc831f9 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp @@ -223,7 +223,7 @@ void CSKYAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, // For each byte of the fragment that the fixup touches, mask in the // bits from the fixup value. - bool IsLittleEndian = (Endian == support::little); + bool IsLittleEndian = (Endian == llvm::endianness::little); bool IsInstFixup = (Kind >= FirstTargetFixupKind); if (IsLittleEndian && IsInstFixup && (NumBytes == 4)) { diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h index 09b3ce6cc82be..8a2f743bdee63 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h @@ -20,7 +20,7 @@ class CSKYAsmBackend : public MCAsmBackend { public: CSKYAsmBackend(const MCSubtargetInfo &STI, const MCTargetOptions &OP) - : MCAsmBackend(support::little) {} + : MCAsmBackend(llvm::endianness::little) {} unsigned int getNumFixupKinds() const override { return CSKY::NumTargetFixupKinds; diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp index 4437c5c998ea6..fc5ddde07f62f 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp @@ -60,8 +60,9 @@ CSKYMCCodeEmitter::getImmOpValueMSBSize(const MCInst &MI, unsigned Idx, static void writeData(uint32_t Bin, unsigned Size, SmallVectorImpl &CB) { if (Size == 4) support::endian::write(CB, static_cast(Bin >> 16), - support::little); - support::endian::write(CB, static_cast(Bin), support::little); + llvm::endianness::little); + support::endian::write(CB, static_cast(Bin), + llvm::endianness::little); } void CSKYMCCodeEmitter::expandJBTF(const MCInst &MI, SmallVectorImpl &CB, diff --git a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp index 77007d853d95d..4a73cbbea3fcc 100644 --- a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp +++ b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp @@ -72,7 +72,8 @@ class DXILMCCodeEmitter : public MCCodeEmitter { class DXILAsmBackend : public MCAsmBackend { public: - DXILAsmBackend(const MCSubtargetInfo &STI) : MCAsmBackend(support::little) {} + DXILAsmBackend(const MCSubtargetInfo &STI) + : MCAsmBackend(llvm::endianness::little) {} ~DXILAsmBackend() override = default; void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 12c84ceb5fd2b..76b4dc4e5afa4 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -62,10 +62,9 @@ class HexagonAsmBackend : public MCAsmBackend { public: HexagonAsmBackend(const Target &T, const Triple &TT, uint8_t OSABI, StringRef CPU) - : MCAsmBackend(support::little), OSABI(OSABI), CPU(CPU), relaxedCnt(0), - MCII(T.createMCInstrInfo()), RelaxTarget(new MCInst *), - Extender(nullptr), MaxPacketSize(HexagonMCInstrInfo::packetSize(CPU)) - {} + : MCAsmBackend(llvm::endianness::little), OSABI(OSABI), CPU(CPU), + relaxedCnt(0), MCII(T.createMCInstrInfo()), RelaxTarget(new MCInst *), + Extender(nullptr), MaxPacketSize(HexagonMCInstrInfo::packetSize(CPU)) {} std::unique_ptr createObjectTargetWriter() const override { diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index 8bf4d0a41298b..96ec81cd86abe 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -443,7 +443,7 @@ void HexagonMCCodeEmitter::encodeSingleInstruction( Binary |= SubBits0 | (SubBits1 << 16); } - support::endian::write(CB, Binary, support::little); + support::endian::write(CB, Binary, llvm::endianness::little); ++MCNumEmitted; } diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp index 3c2a3ac69224d..08ca577a47852 100644 --- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp +++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp @@ -46,7 +46,7 @@ class LanaiAsmBackend : public MCAsmBackend { public: LanaiAsmBackend(const Target &T, Triple::OSType OST) - : MCAsmBackend(support::big), OSType(OST) {} + : MCAsmBackend(llvm::endianness::big), OSType(OST) {} void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp index 5f9c2a100223e..d09966e3695cb 100644 --- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp +++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp @@ -178,7 +178,7 @@ void LanaiMCCodeEmitter::encodeInstruction( unsigned Value = getBinaryCodeForInstr(Inst, Fixups, SubtargetInfo); ++MCNumEmitted; // Keep track of the number of emitted insns. - support::endian::write(CB, Value, support::big); + support::endian::write(CB, Value, llvm::endianness::big); } // Encode Lanai Memory Operand diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h index ae9bb8af04198..f840f9fa2b6a0 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h @@ -31,8 +31,8 @@ class LoongArchAsmBackend : public MCAsmBackend { public: LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit, const MCTargetOptions &Options) - : MCAsmBackend(support::little), STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), - TargetOptions(Options) {} + : MCAsmBackend(llvm::endianness::little), STI(STI), OSABI(OSABI), + Is64Bit(Is64Bit), TargetOptions(Options) {} ~LoongArchAsmBackend() override {} void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp index 03fb9e008ae99..fbe817a2b5475 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp @@ -296,7 +296,7 @@ void LoongArchMCCodeEmitter::expandToVectorLDI( } MCInst TmpInst = MCInstBuilder(Opc).addOperand(MI.getOperand(0)).addImm(Imm); uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); - support::endian::write(CB, Binary, support::little); + support::endian::write(CB, Binary, llvm::endianness::little); } void LoongArchMCCodeEmitter::encodeInstruction( @@ -326,7 +326,7 @@ void LoongArchMCCodeEmitter::encodeInstruction( llvm_unreachable("Unhandled encodeInstruction length!"); case 4: { uint32_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); - support::endian::write(CB, Bits, support::little); + support::endian::write(CB, Bits, llvm::endianness::little); break; } } diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp index b66557ec6c3a3..1b85e6df379cc 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp @@ -41,7 +41,7 @@ namespace { class M68kAsmBackend : public MCAsmBackend { public: - M68kAsmBackend(const Target &T) : MCAsmBackend(support::big) {} + M68kAsmBackend(const Target &T) : MCAsmBackend(llvm::endianness::big) {} unsigned getNumFixupKinds() const override { return 0; } diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h index e52b4961e3c84..1376b06bef6f6 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h @@ -82,11 +82,11 @@ template value_t swapWord(value_t Val) { const unsigned NumWords = sizeof(Val) / 2; if (NumWords <= 1) return Val; - Val = support::endian::byte_swap(Val, support::big); + Val = support::endian::byte_swap(Val, llvm::endianness::big); value_t NewVal = 0; for (unsigned i = 0U; i != NumWords; ++i) { uint16_t Part = (Val >> (i * 16)) & 0xFFFF; - Part = support::endian::byte_swap(Part, support::big); + Part = support::endian::byte_swap(Part, llvm::endianness::big); NewVal |= (Part << (i * 16)); } return NewVal; diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp index 7fc5395671cfc..16460f0a105b8 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp @@ -218,7 +218,7 @@ void M68kMCCodeEmitter::encodeInstruction(const MCInst &MI, for (uint64_t Word : Data) { for (int i = 0; i < 4 && InstSize > 0; ++i, InstSize -= 16) { support::endian::write(CB, static_cast(Word), - support::big); + llvm::endianness::big); Word >>= 16; } } diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp index a667f457bd03f..bd9f6279445af 100644 --- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp +++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp @@ -34,7 +34,7 @@ class MSP430AsmBackend : public MCAsmBackend { public: MSP430AsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI) - : MCAsmBackend(support::little), OSABI(OSABI) {} + : MCAsmBackend(llvm::endianness::little), OSABI(OSABI) {} ~MSP430AsmBackend() override = default; void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp index 985906a353313..51428552d8af0 100644 --- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp +++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp @@ -94,7 +94,8 @@ void MSP430MCCodeEmitter::encodeInstruction(const MCInst &MI, size_t WordCount = Size / 2; while (WordCount--) { - support::endian::write(CB, (uint16_t)BinaryOpCode, support::little); + support::endian::write(CB, (uint16_t)BinaryOpCode, + llvm::endianness::little); BinaryOpCode >>= 16; } } diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 71f333d0d0c38..7eca49e709a0c 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -281,7 +281,7 @@ void MipsAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, bool microMipsLEByteOrder = needsMMLEByteOrder((unsigned) Kind); for (unsigned i = 0; i != NumBytes; ++i) { - unsigned Idx = Endian == support::little + unsigned Idx = Endian == llvm::endianness::little ? (microMipsLEByteOrder ? calculateMMLEIndex(i) : i) : (FullSize - 1 - i); CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i*8); @@ -293,7 +293,7 @@ void MipsAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, // Write out the fixed up bytes back to the code/data bits. for (unsigned i = 0; i != NumBytes; ++i) { - unsigned Idx = Endian == support::little + unsigned Idx = Endian == llvm::endianness::little ? (microMipsLEByteOrder ? calculateMMLEIndex(i) : i) : (FullSize - 1 - i); Data[Offset + Idx] = (uint8_t)((CurVal >> (i*8)) & 0xff); @@ -519,7 +519,7 @@ getFixupKindInfo(MCFixupKind Kind) const { assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); - if (Endian == support::little) + if (Endian == llvm::endianness::little) return LittleEndianInfos[Kind - FirstTargetFixupKind]; return BigEndianInfos[Kind - FirstTargetFixupKind]; } diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h index 749223a6d01b3..228a0b4c407c5 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -32,7 +32,8 @@ class MipsAsmBackend : public MCAsmBackend { public: MipsAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT, StringRef CPU, bool N32) - : MCAsmBackend(TT.isLittleEndian() ? support::little : support::big), + : MCAsmBackend(TT.isLittleEndian() ? llvm::endianness::little + : llvm::endianness::big), TheTriple(TT), IsN32(N32) {} std::unique_ptr diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 6a81a842be7bf..73ee44eec22cd 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -206,7 +206,8 @@ void MipsMCCodeEmitter::encodeInstruction(const MCInst &MI, if (!Size) llvm_unreachable("Desc.getSize() returns 0"); - auto Endian = IsLittleEndian ? support::little : support::big; + auto Endian = + IsLittleEndian ? llvm::endianness::little : llvm::endianness::big; if (Size == 2) { support::endian::write(CB, Binary, Endian); } else if (IsLittleEndian && isMicroMips(STI)) { diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 89d04dbe378e6..8bd27571a750a 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -87,7 +87,8 @@ class PPCAsmBackend : public MCAsmBackend { Triple TT; public: PPCAsmBackend(const Target &T, const Triple &TT) - : MCAsmBackend(TT.isLittleEndian() ? support::little : support::big), + : MCAsmBackend(TT.isLittleEndian() ? llvm::endianness::little + : llvm::endianness::big), TT(TT) {} unsigned getNumFixupKinds() const override { @@ -132,7 +133,7 @@ class PPCAsmBackend : public MCAsmBackend { assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); - return (Endian == support::little + return (Endian == llvm::endianness::little ? InfosLE : InfosBE)[Kind - FirstTargetFixupKind]; } @@ -154,7 +155,8 @@ class PPCAsmBackend : public MCAsmBackend { // from the fixup value. The Value has been "split up" into the appropriate // bitfields above. for (unsigned i = 0; i != NumBytes; ++i) { - unsigned Idx = Endian == support::little ? i : (NumBytes - 1 - i); + unsigned Idx = + Endian == llvm::endianness::little ? i : (NumBytes - 1 - i); Data[Offset + i] |= uint8_t((Value >> (Idx * 8)) & 0xff); } } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index e06b4cdd4e4d5..910b5892d0331 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -508,7 +508,8 @@ void PPCMCCodeEmitter::encodeInstruction(const MCInst &MI, // Output the constant in big/little endian byte order. unsigned Size = getInstSizeInBytes(MI); - llvm::endianness E = IsLittleEndian ? support::little : support::big; + llvm::endianness E = + IsLittleEndian ? llvm::endianness::little : llvm::endianness::big; switch (Size) { case 0: break; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index ca5aeb943c3be..765d44c4575b1 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -238,7 +238,7 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCDwarfLineAddrFragment &DF, OS << uint8_t(dwarf::DW_LNS_fixed_advance_pc); Offset = OS.tell(); Fixup = RISCV::getRelocPairForSize(2); - support::endian::write(OS, 0, support::little); + support::endian::write(OS, 0, llvm::endianness::little); } const MCBinaryExpr &MBE = cast(AddrDelta); @@ -303,15 +303,15 @@ bool RISCVAsmBackend::relaxDwarfCFA(MCDwarfCallFrameFragment &DF, AddFixups(0, {ELF::R_RISCV_SET6, ELF::R_RISCV_SUB6}); } else if (isUInt<8>(Value)) { OS << uint8_t(dwarf::DW_CFA_advance_loc1); - support::endian::write(OS, 0, support::little); + support::endian::write(OS, 0, llvm::endianness::little); AddFixups(1, {ELF::R_RISCV_SET8, ELF::R_RISCV_SUB8}); } else if (isUInt<16>(Value)) { OS << uint8_t(dwarf::DW_CFA_advance_loc2); - support::endian::write(OS, 0, support::little); + support::endian::write(OS, 0, llvm::endianness::little); AddFixups(1, {ELF::R_RISCV_SET16, ELF::R_RISCV_SUB16}); } else if (isUInt<32>(Value)) { OS << uint8_t(dwarf::DW_CFA_advance_loc4); - support::endian::write(OS, 0, support::little); + support::endian::write(OS, 0, llvm::endianness::little); AddFixups(1, {ELF::R_RISCV_SET32, ELF::R_RISCV_SUB32}); } else { llvm_unreachable("unsupported CFA encoding"); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h index 0ea1f32e82963..95596ad5944c8 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h @@ -31,8 +31,8 @@ class RISCVAsmBackend : public MCAsmBackend { public: RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit, const MCTargetOptions &Options) - : MCAsmBackend(support::little, RISCV::fixup_riscv_relax), STI(STI), - OSABI(OSABI), Is64Bit(Is64Bit), TargetOptions(Options) { + : MCAsmBackend(llvm::endianness::little, RISCV::fixup_riscv_relax), + STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), TargetOptions(Options) { RISCVFeatures::validate(STI.getTargetTriple(), STI.getFeatureBits()); } ~RISCVAsmBackend() override = default; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index 716c3ac14d116..c5f2d92e9e47c 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -137,7 +137,7 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, // Emit AUIPC Ra, Func with R_RISCV_CALL relocation type. TmpInst = MCInstBuilder(RISCV::AUIPC).addReg(Ra).addExpr(CallExpr); Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); - support::endian::write(CB, Binary, support::little); + support::endian::write(CB, Binary, llvm::endianness::little); if (MI.getOpcode() == RISCV::PseudoTAIL || MI.getOpcode() == RISCV::PseudoJump) @@ -147,7 +147,7 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, // Emit JALR Ra, Ra, 0 TmpInst = MCInstBuilder(RISCV::JALR).addReg(Ra).addReg(Ra).addImm(0); Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); - support::endian::write(CB, Binary, support::little); + support::endian::write(CB, Binary, llvm::endianness::little); } // Expand PseudoAddTPRel to a simple ADD with the correct relocation. @@ -186,7 +186,7 @@ void RISCVMCCodeEmitter::expandAddTPRel(const MCInst &MI, .addOperand(SrcReg) .addOperand(TPReg); uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); - support::endian::write(CB, Binary, support::little); + support::endian::write(CB, Binary, llvm::endianness::little); } static unsigned getInvertedBranchOp(unsigned BrOp) { @@ -240,14 +240,14 @@ void RISCVMCCodeEmitter::expandLongCondBr(const MCInst &MI, Opcode == RISCV::PseudoLongBNE ? RISCV::C_BEQZ : RISCV::C_BNEZ; MCInst TmpInst = MCInstBuilder(InvOpc).addReg(SrcReg1).addImm(6); uint16_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); - support::endian::write(CB, Binary, support::little); + support::endian::write(CB, Binary, llvm::endianness::little); Offset = 2; } else { unsigned InvOpc = getInvertedBranchOp(Opcode); MCInst TmpInst = MCInstBuilder(InvOpc).addReg(SrcReg1).addReg(SrcReg2).addImm(8); uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); - support::endian::write(CB, Binary, support::little); + support::endian::write(CB, Binary, llvm::endianness::little); Offset = 4; } @@ -255,7 +255,7 @@ void RISCVMCCodeEmitter::expandLongCondBr(const MCInst &MI, MCInst TmpInst = MCInstBuilder(RISCV::JAL).addReg(RISCV::X0).addOperand(SrcSymbol); uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); - support::endian::write(CB, Binary, support::little); + support::endian::write(CB, Binary, llvm::endianness::little); Fixups.clear(); if (SrcSymbol.isExpr()) { @@ -306,12 +306,12 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, llvm_unreachable("Unhandled encodeInstruction length!"); case 2: { uint16_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); - support::endian::write(CB, Bits, support::little); + support::endian::write(CB, Bits, llvm::endianness::little); break; } case 4: { uint32_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); - support::endian::write(CB, Bits, support::little); + support::endian::write(CB, Bits, llvm::endianness::little); break; } } diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp index 016f3c4bf9220..1b80e4b9277bd 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp @@ -59,5 +59,5 @@ MCAsmBackend *llvm::createSPIRVAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, const MCTargetOptions &) { - return new SPIRVAsmBackend(support::little); + return new SPIRVAsmBackend(llvm::endianness::little); } diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp index 60b68dea934ad..8aea26d9963ce 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp @@ -78,10 +78,10 @@ static void emitOperand(const MCOperand &Op, SmallVectorImpl &CB) { if (Op.isReg()) { // Emit the id index starting at 1 (0 is an invalid index). support::endian::write( - CB, Register::virtReg2Index(Op.getReg()) + 1, support::little); + CB, Register::virtReg2Index(Op.getReg()) + 1, llvm::endianness::little); } else if (Op.isImm()) { support::endian::write(CB, static_cast(Op.getImm()), - support::little); + llvm::endianness::little); } else { llvm_unreachable("Unexpected operand type in VReg"); } @@ -113,7 +113,7 @@ void SPIRVMCCodeEmitter::encodeInstruction(const MCInst &MI, const uint64_t OpCode = getBinaryCodeForInstr(MI, Fixups, STI); const uint32_t NumWords = MI.getNumOperands() + 1; const uint32_t FirstWord = (NumWords << 16) | OpCode; - support::endian::write(CB, FirstWord, support::little); + support::endian::write(CB, FirstWord, llvm::endianness::little); // Emit the instruction arguments (emitting the output type first if present). if (hasType(MI, MCII)) diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index 2c0696e8048b5..9e14f96b6caa0 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -136,8 +136,9 @@ namespace { public: SparcAsmBackend(const Target &T) - : MCAsmBackend(StringRef(T.getName()) == "sparcel" ? support::little - : support::big), + : MCAsmBackend(StringRef(T.getName()) == "sparcel" + ? llvm::endianness::little + : llvm::endianness::big), TheTarget(T), Is64Bit(StringRef(TheTarget.getName()) == "sparcv9") {} unsigned getNumFixupKinds() const override { @@ -264,7 +265,7 @@ namespace { assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); - if (Endian == support::little) + if (Endian == llvm::endianness::little) return InfosLE[Kind - FirstTargetFixupKind]; return InfosBE[Kind - FirstTargetFixupKind]; @@ -355,7 +356,8 @@ namespace { // from the fixup value. The Value has been "split up" into the // appropriate bitfields above. for (unsigned i = 0; i != NumBytes; ++i) { - unsigned Idx = Endian == support::little ? i : (NumBytes - 1) - i; + unsigned Idx = + Endian == llvm::endianness::little ? i : (NumBytes - 1) - i; Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff); } } diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp index 9b3282e0736c4..42357e3b1aa92 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp @@ -93,8 +93,9 @@ void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI, const MCSubtargetInfo &STI) const { unsigned Bits = getBinaryCodeForInstr(MI, Fixups, STI); support::endian::write(CB, Bits, - Ctx.getAsmInfo()->isLittleEndian() ? support::little - : support::big); + Ctx.getAsmInfo()->isLittleEndian() + ? llvm::endianness::little + : llvm::endianness::big); // Some instructions have phantom operands that only contribute a fixup entry. unsigned SymOpNo = 0; diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index 20dcf74cb8d92..eafe41124897d 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -107,8 +107,7 @@ static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value, namespace { class SystemZMCAsmBackend : public MCAsmBackend { public: - SystemZMCAsmBackend() - : MCAsmBackend(support::big) {} + SystemZMCAsmBackend() : MCAsmBackend(llvm::endianness::big) {} // Override MCAsmBackend unsigned getNumFixupKinds() const override { diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp index 38d163b370801..2773a7aabab74 100644 --- a/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp +++ b/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp @@ -97,7 +97,8 @@ class VEAsmBackend : public MCAsmBackend { const Target &TheTarget; public: - VEAsmBackend(const Target &T) : MCAsmBackend(support::little), TheTarget(T) {} + VEAsmBackend(const Target &T) + : MCAsmBackend(llvm::endianness::little), TheTarget(T) {} unsigned getNumFixupKinds() const override { return VE::NumTargetFixupKinds; } @@ -174,7 +175,7 @@ class VEAsmBackend : public MCAsmBackend { for (uint64_t i = 0; i < Count; i += 8) support::endian::write(OS, 0x7900000000000000ULL, - support::little); + llvm::endianness::little); return true; } @@ -207,7 +208,8 @@ class ELFVEAsmBackend : public VEAsmBackend { // from the fixup value. The Value has been "split up" into the // appropriate bitfields above. for (unsigned i = 0; i != NumBytes; ++i) { - unsigned Idx = Endian == support::little ? i : (NumBytes - 1) - i; + unsigned Idx = + Endian == llvm::endianness::little ? i : (NumBytes - 1) - i; Data[Offset + Idx] |= static_cast((Value >> (i * 8)) & 0xff); } } diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp index bb643d23e6183..31a07fab042d0 100644 --- a/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp @@ -82,7 +82,7 @@ void VEMCCodeEmitter::encodeInstruction(const MCInst &MI, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); - support::endian::write(CB, Bits, support::little); + support::endian::write(CB, Bits, llvm::endianness::little); ++MCNumEmitted; // Keep track of the # of mi's emitted. } diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp index 85bb52c03e80f..ffab67f8ab2b2 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp @@ -35,7 +35,7 @@ class WebAssemblyAsmBackend final : public MCAsmBackend { public: explicit WebAssemblyAsmBackend(bool Is64Bit, bool IsEmscripten) - : MCAsmBackend(support::little), Is64Bit(Is64Bit), + : MCAsmBackend(llvm::endianness::little), Is64Bit(Is64Bit), IsEmscripten(IsEmscripten) {} unsigned getNumFixupKinds() const override { diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp index 634ed10d4df50..aaca213c4afe9 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp @@ -112,16 +112,20 @@ void WebAssemblyMCCodeEmitter::encodeInstruction( break; case WebAssembly::OPERAND_SIGNATURE: case WebAssembly::OPERAND_VEC_I8IMM: - support::endian::write(OS, MO.getImm(), support::little); + support::endian::write(OS, MO.getImm(), + llvm::endianness::little); break; case WebAssembly::OPERAND_VEC_I16IMM: - support::endian::write(OS, MO.getImm(), support::little); + support::endian::write(OS, MO.getImm(), + llvm::endianness::little); break; case WebAssembly::OPERAND_VEC_I32IMM: - support::endian::write(OS, MO.getImm(), support::little); + support::endian::write(OS, MO.getImm(), + llvm::endianness::little); break; case WebAssembly::OPERAND_VEC_I64IMM: - support::endian::write(OS, MO.getImm(), support::little); + support::endian::write(OS, MO.getImm(), + llvm::endianness::little); break; case WebAssembly::OPERAND_GLOBAL: Ctx.reportError( @@ -137,10 +141,10 @@ void WebAssemblyMCCodeEmitter::encodeInstruction( } else if (MO.isSFPImm()) { uint32_t F = MO.getSFPImm(); - support::endian::write(OS, F, support::little); + support::endian::write(OS, F, llvm::endianness::little); } else if (MO.isDFPImm()) { uint64_t D = MO.getDFPImm(); - support::endian::write(OS, D, support::little); + support::endian::write(OS, D, llvm::endianness::little); } else if (MO.isExpr()) { const MCOperandInfo &Info = Desc.operands()[I]; llvm::MCFixupKind FixupKind; diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 49651da63ecf9..967c7574355db 100644 --- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -194,7 +194,7 @@ template static bool consume(InternalInstruction *insn, T &ptr) { uint64_t offset = insn->readerCursor - insn->startLocation; if (offset + sizeof(T) > r.size()) return true; - ptr = support::endian::read(&r[offset], support::little); + ptr = support::endian::read(&r[offset], llvm::endianness::little); insn->readerCursor += sizeof(T); return false; } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index fbb2fc138d7cc..e01ce4f43143b 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -137,7 +137,7 @@ class X86AsmBackend : public MCAsmBackend { public: X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) - : MCAsmBackend(support::little), STI(STI), + : MCAsmBackend(llvm::endianness::little), STI(STI), MCII(T.createMCInstrInfo()) { if (X86AlignBranchWithin32BBoundaries) { // At the moment, this defaults to aligning fused branches, unconditional diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaAsmBackend.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaAsmBackend.cpp index 61417a2f24559..db4484bb57c1a 100644 --- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaAsmBackend.cpp +++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaAsmBackend.cpp @@ -30,7 +30,8 @@ class XtensaMCAsmBackend : public MCAsmBackend { public: XtensaMCAsmBackend(uint8_t osABI, bool isLE) - : MCAsmBackend(support::little), OSABI(osABI), IsLittleEndian(isLE) {} + : MCAsmBackend(llvm::endianness::little), OSABI(osABI), + IsLittleEndian(isLE) {} unsigned getNumFixupKinds() const override { return Xtensa::NumTargetFixupKinds; diff --git a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp index 3fee0e5d52599..423c45e22bf8c 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp @@ -87,7 +87,8 @@ static llvm::Error parseDataBuffer(const char *DataBuf, size_t DataSize, continue; } DataPtr += sizeof(Header); - uint64_t Count = llvm::support::endian::read64(DataPtr, support::native); + uint64_t Count = + llvm::support::endian::read64(DataPtr, llvm::endianness::native); DataPtr += sizeof(Count); struct perf_branch_entry Entry; diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp index c2de9edf07fe3..46ec4bdc28709 100644 --- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp +++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp @@ -370,8 +370,9 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, return Err; // Save the GSYM file to disk. - llvm::endianness Endian = - Obj.makeTriple().isLittleEndian() ? support::little : support::big; + llvm::endianness Endian = Obj.makeTriple().isLittleEndian() + ? llvm::endianness::little + : llvm::endianness::big; std::optional OptSegmentSize; if (SegmentSize > 0) diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index 0709f292a492c..d73d247599b9e 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -1903,7 +1903,8 @@ static Error runChecks(Session &S, Triple TT, SubtargetFeatures Features) { RuntimeDyldChecker Checker( IsSymbolValid, GetSymbolInfo, GetSectionInfo, GetStubInfo, GetGOTInfo, - S.ES.getTargetTriple().isLittleEndian() ? support::little : support::big, + S.ES.getTargetTriple().isLittleEndian() ? llvm::endianness::little + : llvm::endianness::big, TT, StringRef(), Features, dbgs()); std::string CheckLineStart = "# " + CheckName + ":"; diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 89d59e2a77ad9..537c18bf3440d 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -685,8 +685,9 @@ class AMDGCNPrettyPrinter : public PrettyPrinter { // using the .long directive, or .byte directive if fewer than 4 bytes // remaining if (Bytes.size() >= 4) { - OS << format("\t.long 0x%08" PRIx32 " ", - support::endian::read32(Bytes.data())); + OS << format( + "\t.long 0x%08" PRIx32 " ", + support::endian::read32(Bytes.data())); OS.indent(42); } else { OS << format("\t.byte 0x%02" PRIx8, Bytes[0]); @@ -1168,7 +1169,7 @@ static uint64_t dumpARMELFData(uint64_t SectionAddr, uint64_t Index, ArrayRef MappingSymbols, const MCSubtargetInfo &STI, raw_ostream &OS) { llvm::endianness Endian = - Obj.isLittleEndian() ? support::little : support::big; + Obj.isLittleEndian() ? llvm::endianness::little : llvm::endianness::big; size_t Start = OS.tell(); OS << format("%8" PRIx64 ": ", SectionAddr + Index); if (Index + 4 <= End) { diff --git a/llvm/tools/llvm-rc/ResourceFileWriter.cpp b/llvm/tools/llvm-rc/ResourceFileWriter.cpp index dd9db338ece25..9738fd49343a6 100644 --- a/llvm/tools/llvm-rc/ResourceFileWriter.cpp +++ b/llvm/tools/llvm-rc/ResourceFileWriter.cpp @@ -891,7 +891,7 @@ Error ResourceFileWriter::visitIconOrCursorResource(const RCResource *Base) { if (!File) return File.takeError(); - BinaryStreamReader Reader((*File)->getBuffer(), support::little); + BinaryStreamReader Reader((*File)->getBuffer(), llvm::endianness::little); // Read the file headers. // - At the beginning, ICONDIR/NEWHEADER header. diff --git a/llvm/tools/llvm-rc/ResourceFileWriter.h b/llvm/tools/llvm-rc/ResourceFileWriter.h index d809890ee8e82..9413a0eecdace 100644 --- a/llvm/tools/llvm-rc/ResourceFileWriter.h +++ b/llvm/tools/llvm-rc/ResourceFileWriter.h @@ -183,8 +183,8 @@ class ResourceFileWriter : public Visitor { uint64_t writeObject(const ArrayRef Data); template uint64_t writeInt(const T &Value) { - support::detail::packed_endian_specific_integral + support::detail::packed_endian_specific_integral< + T, llvm::endianness::little, support::unaligned> Object(Value); return writeObject(Object); } diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp index bdfb8acec069e..9c24b0b8db35f 100644 --- a/llvm/tools/llvm-readobj/COFFDumper.cpp +++ b/llvm/tools/llvm-readobj/COFFDumper.cpp @@ -1106,7 +1106,7 @@ void COFFDumper::initializeFileAndStringTables(BinaryStreamReader &Reader) { if (Error E = Reader.readFixedString(Contents, SubSectionSize)) reportError(std::move(E), Obj->getFileName()); - BinaryStreamRef ST(Contents, support::little); + BinaryStreamRef ST(Contents, llvm::endianness::little); switch (DebugSubsectionKind(SubType)) { case DebugSubsectionKind::FileChecksums: if (Error E = CVFileChecksumTable.initialize(ST)) @@ -1148,7 +1148,7 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, reportError(errorCodeToError(object_error::parse_failed), Obj->getFileName()); - BinaryStreamReader FSReader(Data, support::little); + BinaryStreamReader FSReader(Data, llvm::endianness::little); initializeFileAndStringTables(FSReader); // TODO: Convert this over to using ModuleSubstreamVisitor. @@ -1302,7 +1302,8 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, ListScope S(W, "FunctionLineTable"); W.printString("LinkageName", Name); - BinaryStreamReader Reader(FunctionLineTables[Name], support::little); + BinaryStreamReader Reader(FunctionLineTables[Name], + llvm::endianness::little); DebugLinesSubsectionRef LineInfo; if (Error E = LineInfo.initialize(Reader)) @@ -2091,10 +2092,10 @@ void COFFDumper::printStackMap() const { if (Obj->isLittleEndian()) prettyPrintStackMap( - W, StackMapParser(StackMapContentsArray)); + W, StackMapParser(StackMapContentsArray)); else prettyPrintStackMap( - W, StackMapParser(StackMapContentsArray)); + W, StackMapParser(StackMapContentsArray)); } void COFFDumper::printAddrsig() { diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 93b645cdfc1a8..586119a10b4f3 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -2842,7 +2842,7 @@ template void ELFDumper::printArchSpecificInfo() { if (Obj.isLE()) printAttributes(ELF::SHT_ARM_ATTRIBUTES, std::make_unique(&W), - support::little); + llvm::endianness::little); else reportUniqueWarning("attribute printing not implemented for big-endian " "ARM objects"); @@ -2851,7 +2851,7 @@ template void ELFDumper::printArchSpecificInfo() { if (Obj.isLE()) printAttributes(ELF::SHT_RISCV_ATTRIBUTES, std::make_unique(&W), - support::little); + llvm::endianness::little); else reportUniqueWarning("attribute printing not implemented for big-endian " "RISC-V objects"); @@ -2859,7 +2859,7 @@ template void ELFDumper::printArchSpecificInfo() { case EM_MSP430: printAttributes(ELF::SHT_MSP430_ATTRIBUTES, std::make_unique(&W), - support::little); + llvm::endianness::little); break; case EM_MIPS: { printMipsABIFlags(); @@ -6002,9 +6002,9 @@ template void GNUELFDumper::printNotes() { return Error::success(); } else if (Name == "CORE") { if (Type == ELF::NT_FILE) { - DataExtractor DescExtractor(Descriptor, - ELFT::TargetEndianness == support::little, - sizeof(Elf_Addr)); + DataExtractor DescExtractor( + Descriptor, ELFT::TargetEndianness == llvm::endianness::little, + sizeof(Elf_Addr)); if (Expected NoteOrErr = readCoreNote(DescExtractor)) { printCoreNote(OS, *NoteOrErr); return Error::success(); @@ -7699,9 +7699,9 @@ template void LLVMELFDumper::printNotes() { return Error::success(); } else if (Name == "CORE") { if (Type == ELF::NT_FILE) { - DataExtractor DescExtractor(Descriptor, - ELFT::TargetEndianness == support::little, - sizeof(Elf_Addr)); + DataExtractor DescExtractor( + Descriptor, ELFT::TargetEndianness == llvm::endianness::little, + sizeof(Elf_Addr)); if (Expected N = readCoreNote(DescExtractor)) { printCoreNoteLLVMStyle(*N, W); return Error::success(); diff --git a/llvm/tools/llvm-readobj/MachODumper.cpp b/llvm/tools/llvm-readobj/MachODumper.cpp index fb5a58747e50d..0a23ad772e4c5 100644 --- a/llvm/tools/llvm-readobj/MachODumper.cpp +++ b/llvm/tools/llvm-readobj/MachODumper.cpp @@ -734,10 +734,10 @@ void MachODumper::printStackMap() const { if (Obj->isLittleEndian()) prettyPrintStackMap( - W, StackMapParser(StackMapContentsArray)); + W, StackMapParser(StackMapContentsArray)); else prettyPrintStackMap( - W, StackMapParser(StackMapContentsArray)); + W, StackMapParser(StackMapContentsArray)); } void MachODumper::printCGProfile() { diff --git a/llvm/tools/llvm-readobj/WindowsResourceDumper.cpp b/llvm/tools/llvm-readobj/WindowsResourceDumper.cpp index fb085ecaa76ef..53370054f5e91 100644 --- a/llvm/tools/llvm-readobj/WindowsResourceDumper.cpp +++ b/llvm/tools/llvm-readobj/WindowsResourceDumper.cpp @@ -26,7 +26,7 @@ std::string stripUTF16(const ArrayRef &UTF16Str) { for (UTF16 Ch : UTF16Str) { // UTF16Str will have swapped byte order in case of big-endian machines. // Swap it back in such a case. - uint16_t ChValue = support::endian::byte_swap(Ch, support::little); + uint16_t ChValue = support::endian::byte_swap(Ch, llvm::endianness::little); if (ChValue <= 0xFF) Result += ChValue; else diff --git a/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp b/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp index d947be7f1fd87..107b555a99faa 100644 --- a/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp +++ b/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp @@ -978,7 +978,9 @@ static int linkAndVerify() { if (!Checker) Checker = std::make_unique( IsSymbolValid, GetSymbolInfo, GetSectionInfo, GetStubInfo, - GetStubInfo, Obj.isLittleEndian() ? support::little : support::big, + GetStubInfo, + Obj.isLittleEndian() ? llvm::endianness::little + : llvm::endianness::big, TheTriple, MCPU, SubtargetFeatures(), dbgs()); auto FileName = sys::path::filename(InputFile); diff --git a/llvm/tools/obj2yaml/coff2yaml.cpp b/llvm/tools/obj2yaml/coff2yaml.cpp index 604799fb2737f..2f80e62f87727 100644 --- a/llvm/tools/obj2yaml/coff2yaml.cpp +++ b/llvm/tools/obj2yaml/coff2yaml.cpp @@ -123,7 +123,7 @@ initializeFileAndStringTable(const llvm::object::COFFObjectFile &Obj, cantFail(Obj.getSectionContents(COFFSection, sectionData)); - BinaryStreamReader Reader(sectionData, support::little); + BinaryStreamReader Reader(sectionData, llvm::endianness::little); uint32_t Magic; Err(Reader.readInteger(Magic)); diff --git a/llvm/unittests/DebugInfo/CodeView/RandomAccessVisitorTest.cpp b/llvm/unittests/DebugInfo/CodeView/RandomAccessVisitorTest.cpp index 3eda776fdf6a3..5c961998a4157 100644 --- a/llvm/unittests/DebugInfo/CodeView/RandomAccessVisitorTest.cpp +++ b/llvm/unittests/DebugInfo/CodeView/RandomAccessVisitorTest.cpp @@ -182,7 +182,7 @@ class RandomAccessVisitorTest : public testing::Test { uint32_t Size = Count * sizeof(TypeIndexOffset); uint8_t *Buffer = GlobalState->Allocator.Allocate(Size); MutableArrayRef Bytes(Buffer, Size); - Storage = MutableBinaryByteStream(Bytes, support::little); + Storage = MutableBinaryByteStream(Bytes, llvm::endianness::little); BinaryStreamWriter Writer(Storage); for (const auto I : Indices) consumeError(Writer.writeObject(GlobalState->AllOffsets[I])); diff --git a/llvm/unittests/ExecutionEngine/JITLink/JITLinkMocks.cpp b/llvm/unittests/ExecutionEngine/JITLink/JITLinkMocks.cpp index 158333b106222..c40ce7adb0b5e 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/JITLinkMocks.cpp +++ b/llvm/unittests/ExecutionEngine/JITLink/JITLinkMocks.cpp @@ -52,7 +52,8 @@ TEST(JITLinkMocks, SmokeTest) { // Check that the testing infrastructure defaults can "link" a graph // successfully. auto G = std::make_unique("foo", Triple("x86_64-apple-darwin"), 8, - support::little, getGenericEdgeKindName); + llvm::endianness::little, + getGenericEdgeKindName); ArrayRef Content = "hello, world!"; auto &Sec = diff --git a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp index 711f35fc7683c..a94ad0859ebba 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp +++ b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp @@ -61,12 +61,12 @@ static ArrayRef BlockContent(BlockContentBytes); TEST(LinkGraphTest, Construction) { // Check that LinkGraph construction works as expected. - LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, + LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, llvm::endianness::little, getGenericEdgeKindName); EXPECT_EQ(G.getName(), "foo"); EXPECT_EQ(G.getTargetTriple().str(), "x86_64-apple-darwin"); EXPECT_EQ(G.getPointerSize(), 8U); - EXPECT_EQ(G.getEndianness(), support::little); + EXPECT_EQ(G.getEndianness(), llvm::endianness::little); EXPECT_TRUE(G.external_symbols().empty()); EXPECT_TRUE(G.absolute_symbols().empty()); EXPECT_TRUE(G.defined_symbols().empty()); @@ -75,7 +75,7 @@ TEST(LinkGraphTest, Construction) { TEST(LinkGraphTest, AddressAccess) { // Check that we can get addresses for blocks, symbols, and edges. - LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, + LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, llvm::endianness::little, getGenericEdgeKindName); auto &Sec1 = @@ -94,7 +94,7 @@ TEST(LinkGraphTest, AddressAccess) { TEST(LinkGraphTest, SectionEmpty) { // Check that Section::empty behaves as expected. - LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, + LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, llvm::endianness::little, getGenericEdgeKindName); auto &Sec1 = G.createSection("__data.1", orc::MemProt::Read | orc::MemProt::Write); @@ -112,7 +112,7 @@ TEST(LinkGraphTest, SectionEmpty) { TEST(LinkGraphTest, BlockAndSymbolIteration) { // Check that we can iterate over blocks within Sections and across sections. - LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, + LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, llvm::endianness::little, getGenericEdgeKindName); auto &Sec1 = G.createSection("__data.1", orc::MemProt::Read | orc::MemProt::Write); @@ -165,7 +165,7 @@ TEST(LinkGraphTest, BlockAndSymbolIteration) { TEST(LinkGraphTest, ContentAccessAndUpdate) { // Check that we can make a defined symbol external. - LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, + LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, llvm::endianness::little, getGenericEdgeKindName); auto &Sec = G.createSection("__data", orc::MemProt::Read | orc::MemProt::Write); @@ -254,7 +254,7 @@ TEST(LinkGraphTest, ContentAccessAndUpdate) { TEST(LinkGraphTest, MakeExternal) { // Check that we can make defined and absolute symbols external. - LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, + LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, llvm::endianness::little, getGenericEdgeKindName); auto &Sec = G.createSection("__data", orc::MemProt::Read | orc::MemProt::Write); @@ -324,7 +324,7 @@ TEST(LinkGraphTest, MakeExternal) { TEST(LinkGraphTest, MakeAbsolute) { // Check that we can make defined and external symbols absolute. - LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, + LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, llvm::endianness::little, getGenericEdgeKindName); auto &Sec = G.createSection("__data", orc::MemProt::Read | orc::MemProt::Write); @@ -393,7 +393,7 @@ TEST(LinkGraphTest, MakeAbsolute) { TEST(LinkGraphTest, MakeDefined) { // Check that we can make an external symbol defined. - LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, + LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, llvm::endianness::little, getGenericEdgeKindName); auto &Sec = G.createSection("__data", orc::MemProt::Read | orc::MemProt::Write); @@ -441,7 +441,7 @@ TEST(LinkGraphTest, MakeDefined) { TEST(LinkGraphTest, TransferDefinedSymbol) { // Check that we can transfer a defined symbol from one block to another. - LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, + LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, llvm::endianness::little, getGenericEdgeKindName); auto &Sec = G.createSection("__data", orc::MemProt::Read | orc::MemProt::Write); @@ -476,7 +476,7 @@ TEST(LinkGraphTest, TransferDefinedSymbol) { TEST(LinkGraphTest, TransferDefinedSymbolAcrossSections) { // Check that we can transfer a defined symbol from an existing block in one // section to another. - LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, + LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, llvm::endianness::little, getGenericEdgeKindName); auto &Sec1 = G.createSection("__data.1", orc::MemProt::Read | orc::MemProt::Write); @@ -510,7 +510,7 @@ TEST(LinkGraphTest, TransferDefinedSymbolAcrossSections) { TEST(LinkGraphTest, TransferBlock) { // Check that we can transfer a block (and all associated symbols) from one // section to another. - LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, + LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, llvm::endianness::little, getGenericEdgeKindName); auto &Sec1 = G.createSection("__data.1", orc::MemProt::Read | orc::MemProt::Write); @@ -558,7 +558,7 @@ TEST(LinkGraphTest, TransferBlock) { TEST(LinkGraphTest, MergeSections) { // Check that we can transfer a block (and all associated symbols) from one // section to another. - LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, + LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, llvm::endianness::little, getGenericEdgeKindName); auto &Sec1 = G.createSection("__data.1", orc::MemProt::Read | orc::MemProt::Write); @@ -644,7 +644,7 @@ TEST(LinkGraphTest, MergeSections) { TEST(LinkGraphTest, SplitBlock) { // Check that the LinkGraph::splitBlock test works as expected. - LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, + LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, llvm::endianness::little, getGenericEdgeKindName); auto &Sec = G.createSection("__data", orc::MemProt::Read | orc::MemProt::Write); @@ -740,7 +740,7 @@ TEST(LinkGraphTest, SplitBlock) { } TEST(LinkGraphTest, GraphAllocationMethods) { - LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, + LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, llvm::endianness::little, getGenericEdgeKindName); // Test allocation of sized, uninitialized buffer. @@ -761,7 +761,7 @@ TEST(LinkGraphTest, GraphAllocationMethods) { TEST(LinkGraphTest, IsCStringBlockTest) { // Check that the LinkGraph::splitBlock test works as expected. - LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, + LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, llvm::endianness::little, getGenericEdgeKindName); auto &Sec = G.createSection("__data", orc::MemProt::Read | orc::MemProt::Write); @@ -786,7 +786,7 @@ TEST(LinkGraphTest, IsCStringBlockTest) { TEST(LinkGraphTest, BasicLayoutHonorsNoAlloc) { // Check that BasicLayout honors NoAlloc. - LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, + LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, llvm::endianness::little, getGenericEdgeKindName); // Create a regular section and block. diff --git a/llvm/unittests/ExecutionEngine/JITLink/MemoryManagerErrorTests.cpp b/llvm/unittests/ExecutionEngine/JITLink/MemoryManagerErrorTests.cpp index c6f4b962a002a..f0f3dd117c6f8 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/MemoryManagerErrorTests.cpp +++ b/llvm/unittests/ExecutionEngine/JITLink/MemoryManagerErrorTests.cpp @@ -19,7 +19,8 @@ using namespace llvm::jitlink; TEST(MemoryManagerErrorTest, ErrorOnFirstAllocate) { // Check that we can get addresses for blocks, symbols, and edges. auto G = std::make_unique("foo", Triple("x86_64-apple-darwin"), 8, - support::little, getGenericEdgeKindName); + llvm::endianness::little, + getGenericEdgeKindName); ArrayRef Content = "hello, world!"; auto &Sec = diff --git a/llvm/unittests/ExecutionEngine/JITLink/StubsTests.cpp b/llvm/unittests/ExecutionEngine/JITLink/StubsTests.cpp index fb932e756c727..e33aa63b5e4c8 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/StubsTests.cpp +++ b/llvm/unittests/ExecutionEngine/JITLink/StubsTests.cpp @@ -60,7 +60,7 @@ GenerateStub(LinkGraph &G, size_t PointerSize, Edge::Kind PointerEdgeKind) { TEST(StubsTest, StubsGeneration_x86_64) { const char PointerJumpStubContent[6] = { static_cast(0xFFu), 0x25, 0x00, 0x00, 0x00, 0x00}; - LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, + LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, llvm::endianness::little, getGenericEdgeKindName); auto [PointerSym, StubSym] = GenerateStub(G, 8U, x86_64::Pointer64); @@ -80,7 +80,7 @@ TEST(StubsTest, StubsGeneration_aarch64) { 0x10, 0x02, 0x40, (char)0xf9u, // LDR x16, [x16, @pageoff12] 0x00, 0x02, 0x1f, (char)0xd6u // BR x16 }; - LinkGraph G("foo", Triple("aarch64-linux-gnu"), 8, support::little, + LinkGraph G("foo", Triple("aarch64-linux-gnu"), 8, llvm::endianness::little, getGenericEdgeKindName); auto [PointerSym, StubSym] = GenerateStub(G, 8U, aarch64::Pointer64); @@ -100,8 +100,8 @@ TEST(StubsTest, StubsGeneration_aarch64) { TEST(StubsTest, StubsGeneration_i386) { const char PointerJumpStubContent[6] = { static_cast(0xFFu), 0x25, 0x00, 0x00, 0x00, 0x00}; - LinkGraph G("foo", Triple("i386-unknown-linux-gnu"), 4, support::little, - getGenericEdgeKindName); + LinkGraph G("foo", Triple("i386-unknown-linux-gnu"), 4, + llvm::endianness::little, getGenericEdgeKindName); auto [PointerSym, StubSym] = GenerateStub(G, 4U, i386::Pointer32); EXPECT_EQ(std::distance(StubSym.getBlock().edges().begin(), @@ -129,7 +129,7 @@ TEST(StubsTest, StubsGeneration_loongarch32) { 0x00, 0x4c // jr $t8 }; - LinkGraph G("foo", Triple("loongarch32"), 4, support::little, + LinkGraph G("foo", Triple("loongarch32"), 4, llvm::endianness::little, getGenericEdgeKindName); auto [PointerSym, StubSym] = GenerateStub(G, 4U, loongarch::Pointer32); @@ -161,7 +161,7 @@ TEST(StubsTest, StubsGeneration_loongarch64) { 0x00, 0x4c // jr $t8 }; - LinkGraph G("foo", Triple("loongarch64"), 8, support::little, + LinkGraph G("foo", Triple("loongarch64"), 8, llvm::endianness::little, getGenericEdgeKindName); auto [PointerSym, StubSym] = GenerateStub(G, 8U, loongarch::Pointer64); diff --git a/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp index c943eaf3bd7f1..91659240c9d6d 100644 --- a/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp @@ -39,9 +39,9 @@ class ObjectLinkingLayerTest : public testing::Test { }; TEST_F(ObjectLinkingLayerTest, AddLinkGraph) { - auto G = - std::make_unique("foo", Triple("x86_64-apple-darwin"), 8, - support::little, x86_64::getEdgeKindName); + auto G = std::make_unique("foo", Triple("x86_64-apple-darwin"), 8, + llvm::endianness::little, + x86_64::getEdgeKindName); auto &Sec1 = G->createSection("__data", MemProt::Read | MemProt::Write); auto &B1 = G->createContentBlock(Sec1, BlockContent, @@ -104,9 +104,9 @@ TEST_F(ObjectLinkingLayerTest, ClaimLateDefinedWeakSymbols) { ObjLinkingLayer.addPlugin(std::make_unique()); - auto G = - std::make_unique("foo", Triple("x86_64-apple-darwin"), 8, - support::little, x86_64::getEdgeKindName); + auto G = std::make_unique("foo", Triple("x86_64-apple-darwin"), 8, + llvm::endianness::little, + x86_64::getEdgeKindName); auto &DataSec = G->createSection("__data", MemProt::Read | MemProt::Write); auto &DataBlock = G->createContentBlock(DataSec, BlockContent, @@ -158,9 +158,9 @@ TEST_F(ObjectLinkingLayerTest, HandleErrorDuringPostAllocationPass) { ObjLinkingLayer.addPlugin(std::make_unique()); - auto G = - std::make_unique("foo", Triple("x86_64-apple-darwin"), 8, - support::little, x86_64::getEdgeKindName); + auto G = std::make_unique("foo", Triple("x86_64-apple-darwin"), 8, + llvm::endianness::little, + x86_64::getEdgeKindName); auto &DataSec = G->createSection("__data", MemProt::Read | MemProt::Write); auto &DataBlock = G->createContentBlock(DataSec, BlockContent, diff --git a/llvm/unittests/MC/StringTableBuilderTest.cpp b/llvm/unittests/MC/StringTableBuilderTest.cpp index 91f2b3b97e116..05f469a229bf9 100644 --- a/llvm/unittests/MC/StringTableBuilderTest.cpp +++ b/llvm/unittests/MC/StringTableBuilderTest.cpp @@ -58,8 +58,8 @@ TEST(StringTableBuilderTest, BasicWinCOFF) { std::string Expected; - ExpectedSize = - support::endian::byte_swap(ExpectedSize); + ExpectedSize = support::endian::byte_swap( + ExpectedSize); Expected.append((const char*)&ExpectedSize, 4); Expected += "pygmy hippopotamus"; Expected += '\x00'; diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index 9c6d15552174c..494e3c18c81c3 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -844,13 +844,13 @@ TEST_P(MaybeSparseInstrProfTest, get_icall_data_read_write_big_endian) { Writer.addRecord({"callee3", 0x1235, {3, 4}}, Err); // Set big endian output. - Writer.setValueProfDataEndianness(support::big); + Writer.setValueProfDataEndianness(llvm::endianness::big); auto Profile = Writer.writeBuffer(); readProfile(std::move(Profile)); // Set big endian input. - Reader->setValueProfDataEndianness(support::big); + Reader->setValueProfDataEndianness(llvm::endianness::big); Expected R = Reader->getInstrProfRecord("caller", 0x1234); EXPECT_THAT_ERROR(R.takeError(), Succeeded()); @@ -867,7 +867,7 @@ TEST_P(MaybeSparseInstrProfTest, get_icall_data_read_write_big_endian) { ASSERT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee1")); // Restore little endian default: - Writer.setValueProfDataEndianness(support::little); + Writer.setValueProfDataEndianness(llvm::endianness::little); } TEST_P(MaybeSparseInstrProfTest, get_icall_data_merge1) { diff --git a/llvm/unittests/Support/ARMAttributeParser.cpp b/llvm/unittests/Support/ARMAttributeParser.cpp index ef3da3fc7504b..4bde0eeb95030 100644 --- a/llvm/unittests/Support/ARMAttributeParser.cpp +++ b/llvm/unittests/Support/ARMAttributeParser.cpp @@ -35,7 +35,7 @@ bool testBuildAttr(unsigned Tag, unsigned Value, reinterpret_cast(OS.str().c_str()), OS.str().size()); ARMAttributeParser Parser; - cantFail(Parser.parse(Bytes, support::little)); + cantFail(Parser.parse(Bytes, llvm::endianness::little)); std::optional Attr = Parser.getAttributeValue(ExpectedTag); return Attr && *Attr == ExpectedValue; @@ -43,7 +43,7 @@ bool testBuildAttr(unsigned Tag, unsigned Value, void testParseError(ArrayRef bytes, const char *msg) { ARMAttributeParser parser; - Error e = parser.parse(bytes, support::little); + Error e = parser.parse(bytes, llvm::endianness::little); EXPECT_STREQ(toString(std::move(e)).c_str(), msg); } diff --git a/llvm/unittests/Support/BinaryStreamTest.cpp b/llvm/unittests/Support/BinaryStreamTest.cpp index 6ceadd6ce1698..037aa596e7bba 100644 --- a/llvm/unittests/Support/BinaryStreamTest.cpp +++ b/llvm/unittests/Support/BinaryStreamTest.cpp @@ -266,7 +266,7 @@ TEST_F(BinaryStreamTest, StreamRefBounds) { TEST_F(BinaryStreamTest, StreamRefDynamicSize) { StringRef Strings[] = {"1", "2", "3", "4"}; - AppendingBinaryByteStream Stream(support::little); + AppendingBinaryByteStream Stream(llvm::endianness::little); BinaryStreamWriter Writer(Stream); BinaryStreamReader Reader(Stream); @@ -320,7 +320,7 @@ TEST_F(BinaryStreamTest, DropOperations) { initializeInput(InputData, 1); ArrayRef Result; - BinaryStreamRef Original(InputData, support::little); + BinaryStreamRef Original(InputData, llvm::endianness::little); ASSERT_EQ(InputData.size(), Original.getLength()); EXPECT_THAT_ERROR(Original.readBytes(0, InputData.size(), Result), @@ -835,7 +835,7 @@ TEST_F(BinaryStreamTest, StreamWriterPadToAlignment) { // This test may seem excessive but it is checking for past bugs and corner // cases by making sure that the stream is allowed to grow and that // both multiple pad chunks and single chunk extensions work. - AppendingBinaryByteStream Stream(support::little); + AppendingBinaryByteStream Stream(llvm::endianness::little); BinaryStreamWriter Writer(Stream); // Offset 0: '0' @@ -874,7 +874,7 @@ TEST_F(BinaryStreamTest, StreamWriterPadToAlignment) { TEST_F(BinaryStreamTest, StreamWriterAppend) { StringRef Strings[] = {"First", "Second", "Third", "Fourth"}; - AppendingBinaryByteStream Stream(support::little); + AppendingBinaryByteStream Stream(llvm::endianness::little); BinaryStreamWriter Writer(Stream); for (auto &Str : Strings) { diff --git a/llvm/unittests/Support/CSKYAttributeParserTest.cpp b/llvm/unittests/Support/CSKYAttributeParserTest.cpp index d3967fb0ea3c3..1d39d14899f85 100644 --- a/llvm/unittests/Support/CSKYAttributeParserTest.cpp +++ b/llvm/unittests/Support/CSKYAttributeParserTest.cpp @@ -81,7 +81,7 @@ static bool testAttributeInt(unsigned Tag, unsigned Value, unsigned ExpectedTag, OS.str().size()); CSKYAttributeParser Parser; - cantFail(Parser.parse(Bytes, support::little)); + cantFail(Parser.parse(Bytes, llvm::endianness::little)); std::optional Attr = Parser.getAttributeValue(ExpectedTag); return Attr && *Attr == ExpectedValue; @@ -98,7 +98,7 @@ static bool testAttributeString(unsigned Tag, const char *Value, OS.str().size()); CSKYAttributeParser Parser; - cantFail(Parser.parse(Bytes, support::little)); + cantFail(Parser.parse(Bytes, llvm::endianness::little)); std::optional Attr = Parser.getAttributeString(ExpectedTag); return Attr && *Attr == ExpectedValue; @@ -113,7 +113,7 @@ static void testParseError(unsigned Tag, unsigned Value, const char *msg) { OS.str().size()); CSKYAttributeParser Parser; - Error e = Parser.parse(Bytes, support::little); + Error e = Parser.parse(Bytes, llvm::endianness::little); EXPECT_STREQ(toString(std::move(e)).c_str(), msg); } diff --git a/llvm/unittests/Support/ELFAttributeParserTest.cpp b/llvm/unittests/Support/ELFAttributeParserTest.cpp index 74d031e58a8b0..38e7b09cc3c7d 100644 --- a/llvm/unittests/Support/ELFAttributeParserTest.cpp +++ b/llvm/unittests/Support/ELFAttributeParserTest.cpp @@ -31,7 +31,7 @@ class AttributeHeaderParser : public ELFAttributeParser { static void testParseError(ArrayRef bytes, const char *msg) { AttributeHeaderParser parser; - Error e = parser.parse(bytes, support::little); + Error e = parser.parse(bytes, llvm::endianness::little); EXPECT_STREQ(toString(std::move(e)).c_str(), msg); } diff --git a/llvm/unittests/Support/RISCVAttributeParserTest.cpp b/llvm/unittests/Support/RISCVAttributeParserTest.cpp index cdbec0cf2ddbd..a9ede29c659cf 100644 --- a/llvm/unittests/Support/RISCVAttributeParserTest.cpp +++ b/llvm/unittests/Support/RISCVAttributeParserTest.cpp @@ -42,7 +42,7 @@ static bool testAttribute(unsigned Tag, unsigned Value, unsigned ExpectedTag, OS.str().size()); RISCVAttributeParser Parser; - cantFail(Parser.parse(Bytes, support::little)); + cantFail(Parser.parse(Bytes, llvm::endianness::little)); std::optional Attr = Parser.getAttributeValue(ExpectedTag); return Attr && *Attr == ExpectedValue; From 37a53049765845d3cb9d697d40bd82c9611b73d5 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 12 Oct 2023 22:20:14 -0700 Subject: [PATCH 049/720] [Support] Stop including cstddef (NFC) SwapByteOrder.h doesn't use anything from . --- llvm/include/llvm/Support/SwapByteOrder.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/Support/SwapByteOrder.h b/llvm/include/llvm/Support/SwapByteOrder.h index 8f26af6f68ac6..18fd41ac5b04e 100644 --- a/llvm/include/llvm/Support/SwapByteOrder.h +++ b/llvm/include/llvm/Support/SwapByteOrder.h @@ -15,7 +15,6 @@ #define LLVM_SUPPORT_SWAPBYTEORDER_H #include "llvm/ADT/bit.h" -#include #include #include From 28b27c1b10ae8d1f5b4fb9df691e8cf0da9be3f6 Mon Sep 17 00:00:00 2001 From: "Balaji V. Iyer" <43187390+bviyer@users.noreply.github.com> Date: Fri, 13 Oct 2023 00:47:36 -0500 Subject: [PATCH 050/720] [ArmSVE][NVVM][Bazel] Added Features to BUILD.bazel file (#68949) Added VectorOps support for ArmSVE in BUILD.bazel Added BasicPtxBuilderInterface support for NVVM in build.bazel --- .../llvm-project-overlay/mlir/BUILD.bazel | 80 +++++++++++++++---- 1 file changed, 63 insertions(+), 17 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 51ea4a28cc8fa..de13e03807e82 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -2097,6 +2097,7 @@ cc_library( ":IR", ":LLVMDialect", ":SideEffectInterfaces", + ":VectorDialect", "//llvm:Core", "//llvm:Support", ], @@ -2109,13 +2110,12 @@ cc_library( includes = ["include"], deps = [ ":ArmSVEDialect", + ":DialectUtils", ":FuncDialect", ":IR", ":LLVMCommonConversion", ":LLVMDialect", - ":TransformUtils", - "//llvm:Core", - "//llvm:Support", + ":VectorDialect", ], ) @@ -4816,6 +4816,7 @@ cc_library( "lib/Dialect/LLVMIR/IR/NVVM*.cpp", "lib/Dialect/LLVMIR/IR/NVVM*.h", "lib/Dialect/LLVMIR/IR/ROCDL*.cpp", + "lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp", "lib/Dialect/LLVMIR/IR/ROCDL*.h", "lib/Dialect/LLVMIR/IR/*X86Vector*.cpp", "lib/Dialect/LLVMIR/IR/*X86Vector*.h", @@ -4827,6 +4828,7 @@ cc_library( "include/mlir/Dialect/LLVMIR/*AMX*.h", "include/mlir/Dialect/LLVMIR/*ArmSVE*.h", "include/mlir/Dialect/LLVMIR/NVVM*.h", + "include/mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.h", "include/mlir/Dialect/LLVMIR/ROCDL*.h", "include/mlir/Dialect/LLVMIR/*X86Vector*.h", ], @@ -5768,6 +5770,7 @@ cc_library( hdrs = ["include/mlir/Dialect/LLVMIR/NVVMDialect.h"], includes = ["include"], deps = [ + ":BasicPtxBuilderInterface", ":ConvertToLLVM", ":DialectUtils", ":GPUDialect", @@ -5822,11 +5825,25 @@ cc_library( ], ) +td_library( + name = "BasicPtxBuilderIntTdFiles", + srcs = [ + "include/mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.td", + ], + includes = ["include"], + deps = [ + ":GPUOpsTdFiles", + ":LLVMOpsTdFiles", + ":OpBaseTdFiles", + ], +) + td_library( name = "NVVMOpsTdFiles", srcs = ["include/mlir/Dialect/LLVMIR/NVVMOps.td"], includes = ["include"], deps = [ + ":BasicPtxBuilderIntTdFiles", ":GPUOpsTdFiles", ":LLVMOpsTdFiles", ":OpBaseTdFiles", @@ -5834,6 +5851,31 @@ td_library( ], ) +gentbl_cc_library( + name = "BasicPtxBuilderIntGen", + tbl_outs = [ + ( + [ + "-gen-op-interface-decls", + ], + "include/mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.h.inc", + ), + ( + [ + "-gen-op-interface-defs", + ], + "include/mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.cpp.inc", + ), + ], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.td", + deps = [ + ":BasicPtxBuilderIntTdFiles", + ":GPUOpsTdFiles", + ":LLVMOpsTdFiles", + ], +) + gentbl_cc_library( name = "NVVMOpsIncGen", tbl_outs = [ @@ -5881,20 +5923,6 @@ gentbl_cc_library( ], "include/mlir/Dialect/LLVMIR/NVVMOpsAttributes.cpp.inc", ), - ( - [ - "-gen-op-interface-decls", - "-attrdefs-dialect=nvvm", - ], - "include/mlir/Dialect/LLVMIR/NVVMOpsInterface.h.inc", - ), - ( - [ - "-gen-op-interface-defs", - "-attrdefs-dialect=nvvm", - ], - "include/mlir/Dialect/LLVMIR/NVVMOpsInterface.cpp.inc", - ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/LLVMIR/NVVMOps.td", @@ -5914,6 +5942,22 @@ gentbl_cc_library( deps = [":NVVMOpsTdFiles"], ) +cc_library( + name = "BasicPtxBuilderInterface", + srcs = ["lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp"], + hdrs = [ + "include/mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.h", + ], + includes = ["include"], + deps = [ + ":BasicPtxBuilderIntGen", + ":IR", + ":LLVMDialect", + ":Support", + ], +) + + cc_library( name = "NVVMToLLVM", srcs = glob(["lib/Conversion/NVVMToLLVM/NVVMToLLVM.cpp"]), @@ -7996,6 +8040,7 @@ cc_library( ":LLVMIntrinsicConversionIncGen", ":OpenMPDialect", ":Support", + ":TransformUtils", "//llvm:Core", "//llvm:FrontendOpenMP", "//llvm:Support", @@ -8201,6 +8246,7 @@ cc_library( ":OpenMPCommon", ":Support", ":ToLLVMIRTranslation", + ":TransformUtils", "//llvm:Core", "//llvm:FrontendOpenMP", "//llvm:Support", From 9bd5bfc689a7891b4e0081170834b400308f0ece Mon Sep 17 00:00:00 2001 From: Aart Bik <39774503+aartbik@users.noreply.github.com> Date: Thu, 12 Oct 2023 22:51:07 -0700 Subject: [PATCH 051/720] [mlir][sparse] remove unused sparse tensor iterator (#68951) --- .../mlir/Dialect/SparseTensor/IR/Enums.h | 3 - .../ExecutionEngine/SparseTensorRuntime.h | 22 +--- .../ExecutionEngine/SparseTensorRuntime.cpp | 109 +----------------- 3 files changed, 7 insertions(+), 127 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h index f1643d66c26a1..1434c649acd29 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -146,11 +146,8 @@ enum class Action : uint32_t { kEmptyForward = 1, kFromCOO = 2, kSparseToSparse = 3, - kFuture = 4, // not used kToCOO = 5, - kToIterator = 6, kPack = 7, - // Sort an unordered COO in place. kSortCOOInPlace = 8, }; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index f9312c866f363..e8dd50d6730c7 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -39,9 +39,9 @@ extern "C" { /// This is the "swiss army knife" method for materializing sparse /// tensors into the computation. The types of the `ptr` argument and -/// the result depend on the action, as explained in the following table -/// (where "STS" means a sparse-tensor-storage object, "COO" means -/// a coordinate-scheme object, and "Iterator" means an iterator object). +/// the result depend on the action, as explained in the following table, +/// where "STS" means a sparse-tensor-storage object and "COO" means +/// a coordinate-scheme object. /// /// Action: `ptr`: Returns: /// kEmpty - STS, empty @@ -49,8 +49,8 @@ extern "C" { /// kFromCOO COO STS, copied from the COO source /// kSparseToSparse STS STS, copied from the STS source /// kToCOO STS COO, copied from the STS source -/// kToIterator STS Iterator (@getNext/@delSparseTensorIterator) /// kPack buffers STS, from level buffers +/// kSortCOOInPlace STS STS, sorted in place MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensor( // NOLINT StridedMemRefType *dimSizesRef, StridedMemRefType *lvlSizesRef, @@ -90,14 +90,6 @@ MLIR_SPARSETENSOR_FOREVERY_O(DECL_SPARSECOORDINATES) MLIR_SPARSETENSOR_FOREVERY_V(DECL_FORWARDINGINSERT) #undef DECL_FORWARDINGINSERT -/// Coordinate-scheme method for getting the next element while iterating. -#define DECL_GETNEXT(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT bool _mlir_ciface_getNext##VNAME( \ - void *iter, StridedMemRefType *cref, \ - StridedMemRefType *vref); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) -#undef DECL_GETNEXT - /// Tensor-storage method to insert elements in lexicographical /// level-coordinate order. #define DECL_LEXINSERT(VNAME, V) \ @@ -201,12 +193,6 @@ MLIR_CRUNNERUTILS_EXPORT void delSparseTensor(void *tensor); MLIR_SPARSETENSOR_FOREVERY_V(DECL_DELCOO) #undef DECL_DELCOO -/// Releases the memory for an iterator object. -#define DECL_DELITER(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT void delSparseTensorIterator##VNAME(void *iter); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_DELITER) -#undef DECL_DELITER - /// Helper function to read a sparse tensor filename from the environment, /// defined with the naming convention ${TENSOR0}, ${TENSOR1}, etc. MLIR_CRUNNERUTILS_EXPORT char *getTensorFilename(index_type id); diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index cd1b663578a48..ae33a869497a0 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -63,71 +63,18 @@ using namespace mlir::sparse_tensor; //===----------------------------------------------------------------------===// // -// Implementation details for public functions, which don't have a good -// place to live in the C++ library this file is wrapping. +// Utilities for manipulating `StridedMemRefType`. // //===----------------------------------------------------------------------===// namespace { -/// Wrapper class to avoid memory leakage issues. The `SparseTensorCOO` -/// class provides a standard C++ iterator interface, where the iterator -/// is implemented as per `std::vector`'s iterator. However, for MLIR's -/// usage we need to have an iterator which also holds onto the underlying -/// `SparseTensorCOO` so that it can be freed whenever the iterator -/// is freed. -// -// We name this `SparseTensorIterator` rather than `SparseTensorCOOIterator` -// for future-proofing, since the use of `SparseTensorCOO` is an -// implementation detail that we eventually want to change (e.g., to -// use `SparseTensorEnumerator` directly, rather than constructing the -// intermediate `SparseTensorCOO` at all). -template -class SparseTensorIterator final { -public: - /// This ctor requires `coo` to be a non-null pointer to a dynamically - /// allocated object, and takes ownership of that object. Therefore, - /// callers must not free the underlying COO object, since the iterator's - /// dtor will do so. - explicit SparseTensorIterator(const SparseTensorCOO *coo) - : coo(coo), it(coo->begin()), end(coo->end()) {} - - ~SparseTensorIterator() { delete coo; } - - // Disable copy-ctor and copy-assignment, to prevent double-free. - SparseTensorIterator(const SparseTensorIterator &) = delete; - SparseTensorIterator &operator=(const SparseTensorIterator &) = delete; - - /// Gets the next element. If there are no remaining elements, then - /// returns nullptr. - const Element *getNext() { return it < end ? &*it++ : nullptr; } - -private: - const SparseTensorCOO *const coo; // Owning pointer. - typename SparseTensorCOO::const_iterator it; - const typename SparseTensorCOO::const_iterator end; -}; - -//===----------------------------------------------------------------------===// -// -// Utilities for manipulating `StridedMemRefType`. -// -//===----------------------------------------------------------------------===// - -// We shouldn't need to use `detail::safelyEQ` here since the `1` is a literal. #define ASSERT_NO_STRIDE(MEMREF) \ do { \ assert((MEMREF) && "Memref is nullptr"); \ assert(((MEMREF)->strides[0] == 1) && "Memref has non-trivial stride"); \ } while (false) -// All our functions use `uint64_t` for ranks, but `StridedMemRefType::sizes` -// uses `int64_t` on some platforms. So we explicitly cast this lookup to -// ensure we get a consistent type, and we use `checkOverflowCast` rather -// than `static_cast` just to be extremely sure that the casting can't -// go awry. (The cast should aways be safe since (1) sizes should never -// be negative, and (2) the maximum `int64_t` is smaller than the maximum -// `uint64_t`. But it's better to be safe than sorry.) #define MEMREF_GET_USIZE(MEMREF) \ detail::checkOverflowCast((MEMREF)->sizes[0]) @@ -137,22 +84,13 @@ class SparseTensorIterator final { #define MEMREF_GET_PAYLOAD(MEMREF) ((MEMREF)->data + (MEMREF)->offset) -/// Initializes the memref with the provided size and data pointer. This +/// Initializes the memref with the provided size and data pointer. This /// is designed for functions which want to "return" a memref that aliases /// into memory owned by some other object (e.g., `SparseTensorStorage`), /// without doing any actual copying. (The "return" is in scarequotes /// because the `_mlir_ciface_` calling convention migrates any returned /// memrefs into an out-parameter passed before all the other function /// parameters.) -/// -/// We make this a function rather than a macro mainly for type safety -/// reasons. This function does not modify the data pointer, but it -/// cannot be marked `const` because it is stored into the (necessarily) -/// non-`const` memref. This function is templated over the `DataSizeT` -/// to work around signedness warnings due to many data types having -/// varying signedness across different platforms. The templating allows -/// this function to ensure that it does the right thing and never -/// introduces errors due to implicit conversions. template static inline void aliasIntoMemref(DataSizeT size, T *data, StridedMemRefType &ref) { @@ -200,20 +138,11 @@ extern "C" { dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim, \ dimRank, tensor); \ } \ - case Action::kFuture: { \ - break; \ - } \ case Action::kToCOO: { \ assert(ptr && "Received nullptr for SparseTensorStorage object"); \ auto &tensor = *static_cast *>(ptr); \ return tensor.toCOO(lvlRank, lvlSizes, dimRank, dim2lvl, lvl2dim); \ } \ - case Action::kToIterator: { \ - assert(ptr && "Received nullptr for SparseTensorStorage object"); \ - auto &tensor = *static_cast *>(ptr); \ - auto *coo = tensor.toCOO(lvlRank, lvlSizes, dimRank, dim2lvl, lvl2dim); \ - return new SparseTensorIterator(coo); \ - } \ case Action::kPack: { \ assert(ptr && "Received nullptr for SparseTensorStorage object"); \ intptr_t *buffers = static_cast(ptr); \ @@ -372,7 +301,6 @@ void *_mlir_ciface_newSparseTensor( // NOLINT CASE_SECSAME(OverheadType::kU64, PrimaryType::kC32, uint64_t, complex32); // Unsupported case (add above if needed). - // TODO: better pretty-printing of enum values! MLIR_SPARSETENSOR_FATAL( "unsupported combination of types: \n", static_cast(posTp), static_cast(crdTp), @@ -428,29 +356,6 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES) MLIR_SPARSETENSOR_FOREVERY_V(IMPL_FORWARDINGINSERT) #undef IMPL_FORWARDINGINSERT -// NOTE: the `cref` argument uses the same coordinate-space as the `iter` -// (which can be either dim- or lvl-coords, depending on context). -#define IMPL_GETNEXT(VNAME, V) \ - bool _mlir_ciface_getNext##VNAME(void *iter, \ - StridedMemRefType *cref, \ - StridedMemRefType *vref) { \ - assert(iter &&vref); \ - ASSERT_NO_STRIDE(cref); \ - index_type *coords = MEMREF_GET_PAYLOAD(cref); \ - V *value = MEMREF_GET_PAYLOAD(vref); \ - const uint64_t rank = MEMREF_GET_USIZE(cref); \ - const Element *elem = \ - static_cast *>(iter)->getNext(); \ - if (elem == nullptr) \ - return false; \ - for (uint64_t d = 0; d < rank; d++) \ - coords[d] = elem->coords[d]; \ - *value = elem->value; \ - return true; \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) -#undef IMPL_GETNEXT - #define IMPL_LEXINSERT(VNAME, V) \ void _mlir_ciface_lexInsert##VNAME( \ void *t, StridedMemRefType *lvlCoordsRef, \ @@ -636,7 +541,6 @@ void *_mlir_ciface_newSparseTensorFromReader( CASE_SECSAME(kU64, kC32, uint64_t, complex32); // Unsupported case (add above if needed). - // TODO: better pretty-printing of enum values! MLIR_SPARSETENSOR_FATAL( "unsupported combination of types: \n", static_cast(posTp), static_cast(crdTp), @@ -701,7 +605,7 @@ void endLexInsert(void *tensor) { #define IMPL_OUTSPARSETENSOR(VNAME, V) \ void outSparseTensor##VNAME(void *coo, void *dest, bool sort) { \ - assert(coo && "Got nullptr for COO object"); \ + assert(coo); \ auto &coo_ = *static_cast *>(coo); \ if (sort) \ coo_.sort(); \ @@ -721,13 +625,6 @@ void delSparseTensor(void *tensor) { MLIR_SPARSETENSOR_FOREVERY_V(IMPL_DELCOO) #undef IMPL_DELCOO -#define IMPL_DELITER(VNAME, V) \ - void delSparseTensorIterator##VNAME(void *iter) { \ - delete static_cast *>(iter); \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_DELITER) -#undef IMPL_DELITER - char *getTensorFilename(index_type id) { constexpr size_t BUF_SIZE = 80; char var[BUF_SIZE]; From 398e48a75ba1ce7f2d42c0260f28218936c47073 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Fri, 13 Oct 2023 05:54:30 +0000 Subject: [PATCH 052/720] [mlir] Apply ClangTidy fix (NFC) Prefer to use .empty() instead of checking size() == 0. --- mlir/lib/AsmParser/AttributeParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/AsmParser/AttributeParser.cpp b/mlir/lib/AsmParser/AttributeParser.cpp index b1991ce06f6ea..8366c18760fd6 100644 --- a/mlir/lib/AsmParser/AttributeParser.cpp +++ b/mlir/lib/AsmParser/AttributeParser.cpp @@ -309,7 +309,7 @@ ParseResult Parser::parseAttributeDict(NamedAttrList &attributes) { else return emitWrongTokenError("expected attribute name"); - if (nameId->size() == 0) + if (nameId->empty()) return emitError("expected valid attribute name"); if (!seenKeys.insert(*nameId).second) From 60b3e05967ff5f6cbb7b9dea32395ed0799f3bdd Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 12 Oct 2023 22:58:42 -0700 Subject: [PATCH 053/720] [ELF] Restore the --call-graph-profile-sort=hfsort default before #68638 The high time complexity of cache-directed sort is a real issue and is not appropriate as the default, at least for now (https://github.com/llvm/llvm-project/pull/68638#issuecomment-1760918891). --- lld/ELF/Driver.cpp | 2 +- lld/docs/ld.lld.1 | 4 ++-- lld/test/ELF/cgprofile-txt.s | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index f581529427d7f..d082463d34e57 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1095,7 +1095,7 @@ static void ltoValidateAllVtablesHaveTypeInfos(opt::InputArgList &args) { } static CGProfileSortKind getCGProfileSortKind(opt::InputArgList &args) { - StringRef s = args.getLastArgValue(OPT_call_graph_profile_sort, "cdsort"); + StringRef s = args.getLastArgValue(OPT_call_graph_profile_sort, "hfsort"); if (s == "hfsort") return CGProfileSortKind::Hfsort; if (s == "cdsort") diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index 12b17dd37796d..2e46fc18132f3 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -128,9 +128,9 @@ may be: .It Cm none Ignore call graph profile. .It Cm hfsort -Use hfsort. +Use hfsort (default). .It Cm cdsort -Use cdsort (default). +Use cdsort. .El .Pp .It Fl -color-diagnostics Ns = Ns Ar value diff --git a/lld/test/ELF/cgprofile-txt.s b/lld/test/ELF/cgprofile-txt.s index cf5b17627cfb6..c9194bbbc43cb 100644 --- a/lld/test/ELF/cgprofile-txt.s +++ b/lld/test/ELF/cgprofile-txt.s @@ -26,12 +26,12 @@ # RUN: echo "TooManyPreds10 TooManyPreds 11" >> %t.call_graph # RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph --call-graph-profile-sort=hfsort -o %t2 # RUN: llvm-readobj --symbols %t2 | FileCheck %s +## --call-graph-profile-sort=hfsort is the default. +# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2b +# RUN: cmp %t2 %t2b # RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph --call-graph-profile-sort=cdsort -o %t2 # RUN: llvm-readobj --symbols %t2 | FileCheck %s --check-prefix=CDSORT -## --call-graph-profile-sort=cdsort is the default. -# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2b -# RUN: cmp %t2 %t2b # RUN: not ld.lld -e A %t --call-graph-ordering-file %t.call_graph --call-graph-profile-sort=sort \ # RUN: -o /dev/null 2>&1 | FileCheck %s --check-prefix=UNKNOWN From 9bcc094d376705e3dcfdd6fe2c71bb5456746b08 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 12 Oct 2023 22:59:25 -0700 Subject: [PATCH 054/720] [llvm] Use llvm::erase_if (NFC) --- .../LogicalView/Readers/LVCodeViewVisitor.cpp | 11 ++++------- llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 4 +--- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 8 +++----- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp index 80b644103fefd..1d01785328825 100644 --- a/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp @@ -465,13 +465,10 @@ LVScope *LVNamespaceDeduction::get(LVStringRefs Components) { LVScope *LVNamespaceDeduction::get(StringRef ScopedName, bool CheckScope) { LVStringRefs Components = getAllLexicalComponents(ScopedName); if (CheckScope) - Components.erase(std::remove_if(Components.begin(), Components.end(), - [&](StringRef Component) { - LookupSet::iterator Iter = - IdentifiedNamespaces.find(Component); - return Iter == IdentifiedNamespaces.end(); - }), - Components.end()); + llvm::erase_if(Components, [&](StringRef Component) { + LookupSet::iterator Iter = IdentifiedNamespaces.find(Component); + return Iter == IdentifiedNamespaces.end(); + }); LLVM_DEBUG( { dbgs() << formatv("ScopedName: '{0}'\n", ScopedName.str().c_str()); }); diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 93db983b92c05..a679699a66c75 100644 --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -2829,9 +2829,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { return Var == DbgVar; }; - InstrVec.erase( - std::remove_if(InstrVec.begin(), InstrVec.end(), IsDbgVar), - InstrVec.end()); + llvm::erase_if(InstrVec, IsDbgVar); } forEachDbgRegOperand(Instr, [&](MachineOperand &Op) { Op.setReg(0); }); diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 8c3ff399621a8..f2b9d784ead8a 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1977,11 +1977,9 @@ void JumpThreadingPass::updateSSA( // Find debug values outside of the block findDbgValues(DbgValues, &I); - DbgValues.erase(remove_if(DbgValues, - [&](const DbgValueInst *DbgVal) { - return DbgVal->getParent() == BB; - }), - DbgValues.end()); + llvm::erase_if(DbgValues, [&](const DbgValueInst *DbgVal) { + return DbgVal->getParent() == BB; + }); // If there are no uses outside the block, we're done with this instruction. if (UsesToRename.empty() && DbgValues.empty()) From 7755cdf03d2f2dce652398ada012377186c292d3 Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Fri, 13 Oct 2023 08:34:53 +0200 Subject: [PATCH 055/720] [flang][runtime] Fix IsContiguous for zero and one element arrays (#68869) The byte strides in zero and one element array descriptor may not be perfect multiple of the element size and previous and extents. IsContiguous and its CFI equivalent should still return true for such arrays (Fortran 2018 standards says in 8.5.7 that an array is not contiguous if it has two or more elements and ....). --- flang/include/flang/Runtime/descriptor.h | 10 +- flang/runtime/ISO_Fortran_binding.cpp | 13 ++- .../Evaluate/ISO-Fortran-binding.cpp | 97 ++++++++++++++++++- 3 files changed, 111 insertions(+), 9 deletions(-) diff --git a/flang/include/flang/Runtime/descriptor.h b/flang/include/flang/Runtime/descriptor.h index c9a3b1b031007..c69bb336dd29e 100644 --- a/flang/include/flang/Runtime/descriptor.h +++ b/flang/include/flang/Runtime/descriptor.h @@ -390,14 +390,16 @@ class Descriptor { if (leadingDimensions > raw_.rank) { leadingDimensions = raw_.rank; } + bool stridesAreContiguous{true}; for (int j{0}; j < leadingDimensions; ++j) { const Dimension &dim{GetDimension(j)}; - if (bytes != dim.ByteStride()) { - return false; - } + stridesAreContiguous &= bytes == dim.ByteStride(); bytes *= dim.Extent(); } - return true; + // One and zero element arrays are contiguous even if the descriptor + // byte strides are not perfect multiples. + return stridesAreContiguous || bytes == 0 || + bytes == static_cast(ElementBytes()); } // Establishes a pointer to a section or element. diff --git a/flang/runtime/ISO_Fortran_binding.cpp b/flang/runtime/ISO_Fortran_binding.cpp index 15743be88d1be..103413cb7140a 100644 --- a/flang/runtime/ISO_Fortran_binding.cpp +++ b/flang/runtime/ISO_Fortran_binding.cpp @@ -125,14 +125,19 @@ RT_API_ATTRS int CFI_establish(CFI_cdesc_t *descriptor, void *base_addr, } RT_API_ATTRS int CFI_is_contiguous(const CFI_cdesc_t *descriptor) { + bool stridesAreContiguous{true}; CFI_index_t bytes = descriptor->elem_len; for (int j{0}; j < descriptor->rank; ++j) { - if (bytes != descriptor->dim[j].sm) { - return 0; - } + stridesAreContiguous &= bytes == descriptor->dim[j].sm; bytes *= descriptor->dim[j].extent; } - return 1; + // One and zero element arrays are contiguous even if the descriptor + // byte strides are not perfect multiples. + if (stridesAreContiguous || bytes == 0 || + bytes == static_cast(descriptor->elem_len)) { + return 1; + } + return 0; } RT_API_ATTRS int CFI_section(CFI_cdesc_t *result, const CFI_cdesc_t *source, diff --git a/flang/unittests/Evaluate/ISO-Fortran-binding.cpp b/flang/unittests/Evaluate/ISO-Fortran-binding.cpp index 09a51e6cea10b..d1f0a31454056 100644 --- a/flang/unittests/Evaluate/ISO-Fortran-binding.cpp +++ b/flang/unittests/Evaluate/ISO-Fortran-binding.cpp @@ -643,13 +643,108 @@ static void run_CFI_setpointer_tests() { } } +static void run_CFI_is_contiguous_tests() { + // INTEGER :: A(0:3,0:3) + constexpr CFI_rank_t rank{2}; + CFI_index_t extents[rank] = {4, 4}; + CFI_CDESC_T(rank) dv_storage; + CFI_cdesc_t *dv{&dv_storage}; + Descriptor *dvDesc{reinterpret_cast(dv)}; + char base; + void *base_addr{&base}; + int retCode{CFI_establish(dv, base_addr, CFI_attribute_other, CFI_type_int, + /*elem_len=*/0, rank, extents)}; + MATCH(retCode == CFI_SUCCESS, true); + + MATCH(true, CFI_is_contiguous(dv) == 1); + MATCH(true, dvDesc->IsContiguous()); + + CFI_CDESC_T(rank) sectionDescriptorStorage; + CFI_cdesc_t *section{§ionDescriptorStorage}; + Descriptor *sectionDesc{reinterpret_cast(section)}; + retCode = CFI_establish(section, base_addr, CFI_attribute_other, CFI_type_int, + /*elem_len=*/0, rank, extents); + MATCH(retCode == CFI_SUCCESS, true); + + // Test empty section B = A(0:3:2,0:3:-2) is contiguous. + CFI_index_t lb[rank] = {0, 0}; + CFI_index_t ub[rank] = {3, 3}; + CFI_index_t strides[rank] = {2, -2}; + retCode = CFI_section(section, dv, lb, ub, strides); + MATCH(true, retCode == CFI_SUCCESS); + MATCH(true, CFI_is_contiguous(section) == 1); + MATCH(true, sectionDesc->IsContiguous()); + + // Test 1 element section B = A(0:1:2,0:1:2) is contiguous. + lb[0] = 0; + lb[1] = 0; + ub[0] = 1; + ub[1] = 1; + strides[0] = 2; + strides[1] = 2; + retCode = CFI_section(section, dv, lb, ub, strides); + MATCH(true, retCode == CFI_SUCCESS); + MATCH(true, CFI_is_contiguous(section) == 1); + MATCH(true, sectionDesc->IsContiguous()); + + // Test section B = A(0:3:1,0:2:1) is contiguous. + lb[0] = 0; + lb[1] = 0; + ub[0] = 3; + ub[1] = 2; + strides[0] = 1; + strides[1] = 1; + retCode = CFI_section(section, dv, lb, ub, strides); + sectionDesc->Dump(); + MATCH(true, retCode == CFI_SUCCESS); + MATCH(true, CFI_is_contiguous(section) == 1); + MATCH(true, sectionDesc->IsContiguous()); + + // Test section B = A(0:2:1,0:2:1) is not contiguous. + lb[0] = 0; + lb[1] = 0; + ub[0] = 2; + ub[1] = 2; + strides[0] = 1; + strides[1] = 1; + retCode = CFI_section(section, dv, lb, ub, strides); + sectionDesc->Dump(); + MATCH(true, retCode == CFI_SUCCESS); + MATCH(true, CFI_is_contiguous(section) == 0); + MATCH(false, sectionDesc->IsContiguous()); + + // Test section B = A(0:3:2,0:3:1) is not contiguous. + lb[0] = 0; + lb[1] = 0; + ub[0] = 3; + ub[1] = 3; + strides[0] = 2; + strides[1] = 1; + retCode = CFI_section(section, dv, lb, ub, strides); + MATCH(true, retCode == CFI_SUCCESS); + MATCH(true, CFI_is_contiguous(section) == 0); + MATCH(false, sectionDesc->IsContiguous()); + + // Test section B = A(0:3:1,0:3:2) is not contiguous. + lb[0] = 0; + lb[1] = 0; + ub[0] = 3; + ub[1] = 3; + strides[0] = 1; + strides[1] = 2; + retCode = CFI_section(section, dv, lb, ub, strides); + MATCH(true, retCode == CFI_SUCCESS); + MATCH(true, CFI_is_contiguous(section) == 0); + MATCH(false, sectionDesc->IsContiguous()); +} + int main() { TestCdescMacroForAllRanksSmallerThan(); run_CFI_establish_tests(); run_CFI_address_tests(); run_CFI_allocate_tests(); // TODO: test CFI_deallocate - // TODO: test CFI_is_contiguous + run_CFI_is_contiguous_tests(); run_CFI_section_tests(); run_CFI_select_part_tests(); run_CFI_setpointer_tests(); From 411ceacf4351bd3af9db75b859063864b19e71e1 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Fri, 13 Oct 2023 15:19:35 +0800 Subject: [PATCH 056/720] [Clang] Fix tautological assertion in `Sema::CheckX86BuiltinTileDuplicate` Closes #68958. --- clang/lib/Sema/SemaChecking.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 1b2f8cf296d16..cd61459cfbb13 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -6047,7 +6047,7 @@ bool Sema::CheckX86BuiltinTileDuplicate(CallExpr *TheCall, if (SemaBuiltinConstantArg(TheCall, ArgNum, Result)) return true; int ArgExtValue = Result.getExtValue(); - assert((ArgExtValue >= TileRegLow || ArgExtValue <= TileRegHigh) && + assert((ArgExtValue >= TileRegLow && ArgExtValue <= TileRegHigh) && "Incorrect tile register num."); if (ArgValues.test(ArgExtValue)) return Diag(TheCall->getBeginLoc(), From 47000586caca4424e88372d8ab4f8b2c0178ee4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 13 Oct 2023 10:28:46 +0300 Subject: [PATCH 057/720] [llvm-remarkutil] Fix building with Xcode 12 This fixes erorrs like these: llvm-project/llvm/tools/llvm-remarkutil/RemarkCounter.h:90:14: error: call to deleted constructor of 'llvm::Error' return E; ^ llvm-project/llvm/include/llvm/Support/Error.h:189:3: note: 'Error' has been explicitly marked deleted here Error(const Error &Other) = delete; ^ llvm-project/llvm/include/llvm/Support/Error.h:496:18: note: passing argument to parameter 'Err' here Expected(Error Err) ^ --- llvm/tools/llvm-remarkutil/RemarkCounter.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/tools/llvm-remarkutil/RemarkCounter.h b/llvm/tools/llvm-remarkutil/RemarkCounter.h index 89cd3f7388d07..54bba8d7cc995 100644 --- a/llvm/tools/llvm-remarkutil/RemarkCounter.h +++ b/llvm/tools/llvm-remarkutil/RemarkCounter.h @@ -87,7 +87,7 @@ struct Filters { Filter.ArgFilter = std::move(ArgFilter); Filter.RemarkTypeFilter = std::move(RemarkTypeFilter); if (auto E = Filter.regexArgumentsValid()) - return E; + return std::move(E); return Filter; } /// Returns true if \p Remark satisfies all the provided filters. @@ -165,11 +165,11 @@ struct ArgumentCounter : Counter { for (auto &Arg : Arguments) { if (Arg.IsRegex) { if (auto E = checkRegex(Arg.FilterRE)) - return E; + return std::move(E); } } if (auto E = AC.getAllMatchingArgumentsInRemark(Buffer, Arguments, Filter)) - return E; + return std::move(E); return AC; } From 4f4694509d293bf715dcbf0df339c801a044e114 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 13 Oct 2023 10:33:17 +0300 Subject: [PATCH 058/720] [llvm-remarkutil] Silence a GCC warning about missing returns after a fully covered switch. NFC. --- llvm/tools/llvm-remarkutil/RemarkCounter.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/tools/llvm-remarkutil/RemarkCounter.cpp b/llvm/tools/llvm-remarkutil/RemarkCounter.cpp index fa05f4fda95fb..8bde0b8830182 100644 --- a/llvm/tools/llvm-remarkutil/RemarkCounter.cpp +++ b/llvm/tools/llvm-remarkutil/RemarkCounter.cpp @@ -181,6 +181,7 @@ std::optional Counter::getGroupByKey(const Remark &Remark) { return Remark.Loc->SourceFilePath.str() + ":" + Remark.FunctionName.str(); return Remark.Loc->SourceFilePath.str(); } + llvm_unreachable("Fully covered switch above!"); } void ArgumentCounter::collect(const Remark &Remark) { From ac32d7b87f4d4b546eea96b9b722e88fdb3a5b49 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 13 Oct 2023 09:36:34 +0200 Subject: [PATCH 059/720] Revert "Add Documentation for Execution Results Handling in Clang-Repl (#65650)" This reverts commit b9b8fc4878b6f7708f2ca2df6036a9c7bb5077b0. This uses a function defined in LLVM's config-ix inside clang. config-ix is a non-exported cmake module, so this is a layering violation. --- clang/docs/CMakeLists.txt | 7 - clang/docs/ClangRepl.rst | 405 -------------------------------------- clang/docs/conf.py | 2 +- 3 files changed, 1 insertion(+), 413 deletions(-) diff --git a/clang/docs/CMakeLists.txt b/clang/docs/CMakeLists.txt index 356814f994c32..4163dd2d90ad5 100644 --- a/clang/docs/CMakeLists.txt +++ b/clang/docs/CMakeLists.txt @@ -103,13 +103,6 @@ function (gen_rst_file_from_td output_file td_option source docs_targets) endfunction() if (LLVM_ENABLE_SPHINX) - llvm_find_program(dot) - if (HAVE_DOT) - set(DOT ${LLVM_PATH_DOT}) - else() - message(FATAL_ERROR "Cannot find DOT") - endif() - include(AddSphinxTarget) if (SPHINX_FOUND AND (${SPHINX_OUTPUT_HTML} OR ${SPHINX_OUTPUT_MAN})) # Copy rst files to build directory before generating the html diff --git a/clang/docs/ClangRepl.rst b/clang/docs/ClangRepl.rst index 5399036c123fb..aaaabd99bc82f 100644 --- a/clang/docs/ClangRepl.rst +++ b/clang/docs/ClangRepl.rst @@ -213,411 +213,6 @@ concept helps support advanced use cases such as template instantiations on dema automatic language interoperability. It also helps static languages such as C/C++ become apt for data science. -Execution Results Handling in Clang-Repl -======================================== - -Execution Results Handling features discussed below help extend the Clang-Repl -functionality by creating an interface between the execution results of a -program and the compiled program. - -1. **Capture Execution Results**: This feature helps capture the execution results -of a program and bring them back to the compiled program. - -2. **Dump Captured Execution Results**: This feature helps create a temporary dump -for Value Printing/Automatic Printf, that is, to display the value and type of -the captured data. - - -1. Capture Execution Results -============================ - -In many cases, it is useful to bring back the program execution result to the -compiled program. This result can be stored in an object of type **Value**. - -How Execution Results are captured (Value Synthesis): ------------------------------------------------------ - -The synthesizer chooses which expression to synthesize, and then it replaces -the original expression with the synthesized expression. Depending on the -expression type, it may choose to save an object (``LastValue``) of type 'value' -while allocating memory to it (``SetValueWithAlloc()``), or not ( -``SetValueNoAlloc()``). - -.. graphviz:: - :name: valuesynthesis - :caption: Value Synthesis - :alt: Shows how an object of type 'Value' is synthesized - :align: center - - digraph "valuesynthesis" { - rankdir="LR"; - graph [fontname="Verdana", fontsize="12"]; - node [fontname="Verdana", fontsize="12"]; - edge [fontname="Sans", fontsize="9"]; - - start [label=" Create an Object \n 'Last Value' \n of type 'Value' ", shape="note", fontcolor=white, fillcolor="#3333ff", style=filled]; - assign [label=" Assign the result \n to the 'LastValue' \n (based on respective \n Memory Allocation \n scenario) ", shape="box"] - print [label=" Pretty Print \n the Value Object ", shape="Msquare", fillcolor="yellow", style=filled]; - start -> assign; - assign -> print; - - subgraph SynthesizeExpression { - synth [label=" SynthesizeExpr() ", shape="note", fontcolor=white, fillcolor="#3333ff", style=filled]; - mem [label=" New Memory \n Allocation? ", shape="diamond"]; - withaloc [label=" SetValueWithAlloc() ", shape="box"]; - noaloc [label=" SetValueNoAlloc() ", shape="box"]; - right [label=" 1. RValue Structure \n (a temporary value)", shape="box"]; - left2 [label=" 2. LValue Structure \n (a variable with \n an address)", shape="box"]; - left3 [label=" 3. Built-In Type \n (int, float, etc.)", shape="box"]; - output [label=" move to 'Assign' step ", shape="box"]; - - synth -> mem; - mem -> withaloc [label="Yes"]; - mem -> noaloc [label="No"]; - withaloc -> right; - noaloc -> left2; - noaloc -> left3; - right -> output; - left2 -> output; - left3 -> output; - } - output -> assign - } - -Where is the captured result stored? ------------------------------------- - -``LastValue`` holds the last result of the value printing. It is a class member -because it can be accessed even after subsequent inputs. - -**Note:** If no value printing happens, then it is in an invalid state. - -Improving Efficiency and User Experience ----------------------------------------- - -The Value object is essentially used to create a mapping between an expression -'type' and the allocated 'memory'. Built-in types (bool, char, int, -float, double, etc.) are copyable. Their memory allocation size is known -and the Value object can introduce a small-buffer optimization. -In case of objects, the ``Value`` class provides reference-counted memory -management. - -The implementation maps the type as written and the Clang Type to be able to use -the preprocessor to synthesize the relevant cast operations. For example, -``X(char, Char_S)``, where ``char`` is the type from the language's type system -and ``Char_S`` is the Clang builtin type which represents it. This mapping helps -to import execution results from the interpreter in a compiled program and vice -versa. The ``Value.h`` header file can be included at runtime and this is why it -has a very low token count and was developed with strict constraints in mind. - -This also enables the user to receive the computed 'type' back in their code -and then transform the type into something else (e.g., re-cast a double into -a float). Normally, the compiler can handle these conversions transparently, -but in interpreter mode, the compiler cannot see all the 'from' and 'to' types, -so it cannot implicitly do the conversions. So this logic enables providing -these conversions on request. - -On-request conversions can help improve the user experience, by allowing -conversion to a desired 'to' type, when the 'from' type is unknown or unclear. - -Significance of this Feature ----------------------------- - -The 'Value' object enables wrapping a memory region that comes from the -JIT, and bringing it back to the compiled code (and vice versa). -This is a very useful functionality when: - -- connecting an interpreter to the compiled code, or -- connecting an interpreter in another language. - -For example, this feature helps transport values across boundaries. A notable -example is the cppyy project code makes use of this feature to enable running C++ -within Python. It enables transporting values/information between C++ -and Python. - -Note: `cppyy `_ is an automatic, run-time, -Python-to-C++ bindings generator, for calling C++ from Python and Python from C++. -It uses LLVM along with a C++ interpreter (e.g., Cling) to enable features like -run-time instantiation of C++ templates, cross-inheritance, callbacks, -auto-casting, transparent use of smart pointers, etc. - -In a nutshell, this feature enables a new way of developing code, paving the -way for language interoperability and easier interactive programming. - -Implementation Details -====================== - -Interpreter as a REPL vs. as a Library --------------------------------------- - -1 - If we're using the interpreter in interactive (REPL) mode, it will dump -the value (i.e., value printing). - -.. code-block:: console - - if (LastValue.isValid()) { - if (!V) { - LastValue.dump(); - LastValue.clear(); - } else - *V = std::move(LastValue); - } - - -2 - If we're using the interpreter as a library, then it will pass the value -to the user. - -Incremental AST Consumer ------------------------- - -The ``IncrementalASTConsumer`` class wraps the original code generator -``ASTConsumer`` and it performs a hook, to traverse all the top-level decls, to -look for expressions to synthesize, based on the ``isSemiMissing()`` condition. - -If this condition is found to be true, then ``Interp.SynthesizeExpr()`` will be -invoked. - -**Note:** Following is a sample code snippet. Actual code may vary over time. - -.. code-block:: console - - for (Decl *D : DGR) - if (auto *TSD = llvm::dyn_cast(D); - TSD && TSD->isSemiMissing()) - TSD->setStmt(Interp.SynthesizeExpr(cast(TSD->getStmt()))); - - return Consumer->HandleTopLevelDecl(DGR); - -The synthesizer will then choose the relevant expression, based on its type. - -Communication between Compiled Code and Interpreted Code --------------------------------------------------------- - -In Clang-Repl there is **interpreted code**, and this feature adds a 'value' -runtime that can talk to the **compiled code**. - -Following is an example where the compiled code interacts with the interpreter -code. The execution results of an expression are stored in the object 'V' of -type Value. This value is then printed, effectively helping the interpreter -use a value from the compiled code. - -.. code-block:: console - - int Global = 42; - void setGlobal(int val) { Global = val; } - int getGlobal() { return Global; } - Interp.ParseAndExecute(“void setGlobal(int val);”); - Interp.ParseAndExecute(“int getGlobal();”); - Value V; - Interp.ParseAndExecute(“getGlobal()”, &V); - std::cout << V.getAs() << “\n”; // Prints 42 - - -**Note:** Above is an example of interoperability between the compiled code and -the interpreted code. Interoperability between languages (e.g., C++ and Python) -works similarly. - - -2. Dump Captured Execution Results -================================== - -This feature helps create a temporary dump to display the value and type -(pretty print) of the desired data. This is a good way to interact with the -interpreter during interactive programming. - -How value printing is simplified (Automatic Printf) ---------------------------------------------------- - -The ``Automatic Printf`` feature makes it easy to display variable values during -program execution. Using the ``printf`` function repeatedly is not required. -This is achieved using an extension in the ``libclangInterpreter`` library. - -To automatically print the value of an expression, simply write the expression -in the global scope **without a semicolon**. - -.. graphviz:: - :name: automaticprintf - :caption: Automatic PrintF - :alt: Shows how Automatic PrintF can be used - :align: center - - digraph "AutomaticPrintF" { - size="6,4"; - rankdir="LR"; - graph [fontname="Verdana", fontsize="12"]; - node [fontname="Verdana", fontsize="12"]; - edge [fontname="Sans", fontsize="9"]; - - manual [label=" Manual PrintF ", shape="box"]; - int1 [label=" int ( &) 42 ", shape="box"] - auto [label=" Automatic PrintF ", shape="box"]; - int2 [label=" int ( &) 42 ", shape="box"] - - auto -> int2 [label="int x = 42; \n x"]; - manual -> int1 [label="int x = 42; \n printf("(int &) %d \\n", x);"]; - } - - -Significance of this feature ----------------------------- - -Inspired by a similar implementation in `Cling `_, -this feature added to upstream Clang repo has essentially extended the syntax of -C++, so that it can be more helpful for people that are writing code for data -science applications. - -This is useful, for example, when you want to experiment with a set of values -against a set of functions, and you'd like to know the results right away. -This is similar to how Python works (hence its popularity in data science -research), but the superior performance of C++, along with this flexibility -makes it a more attractive option. - -Implementation Details -====================== - -Parsing mechanism: ------------------- - -The Interpreter in Clang-Repl (``Interpreter.cpp``) includes the function -``ParseAndExecute()`` that can accept a 'Value' parameter to capture the result. -But if the value parameter is made optional and it is omitted (i.e., that the -user does not want to utilize it elsewhere), then the last value can be -validated and pushed into the ``dump()`` function. - -.. graphviz:: - :name: parsing - :caption: Parsing Mechanism - :alt: Shows the Parsing Mechanism for Pretty Printing - :align: center - - - digraph "prettyprint" { - rankdir="LR"; - graph [fontname="Verdana", fontsize="12"]; - node [fontname="Verdana", fontsize="12"]; - edge [fontname="Verdana", fontsize="9"]; - - parse [label=" ParseAndExecute() \n in Clang ", shape="box"]; - capture [label=" Capture 'Value' parameter \n for processing? ", shape="diamond"]; - use [label=" Use for processing ", shape="box"]; - dump [label=" Validate and push \n to dump()", shape="box"]; - callp [label=" call print() function ", shape="box"]; - type [label=" Print the Type \n ReplPrintTypeImpl()", shape="box"]; - data [label=" Print the Data \n ReplPrintDataImpl() ", shape="box"]; - output [label=" Output Pretty Print \n to the user ", shape="box", fontcolor=white, fillcolor="#3333ff", style=filled]; - - parse -> capture [label="Optional 'Value' Parameter"]; - capture -> use [label="Yes"]; - use -> End; - capture -> dump [label="No"]; - dump -> callp; - callp -> type; - callp -> data; - type -> output; - data -> output; - } - -**Note:** Following is a sample code snippet. Actual code may vary over time. - -.. code-block:: console - - llvm::Error Interpreter::ParseAndExecute(llvm::StringRef Code, Value *V) { - - auto PTU = Parse(Code); - if (!PTU) - return PTU.takeError(); - if (PTU->TheModule) - if (llvm::Error Err = Execute(*PTU)) - return Err; - - if (LastValue.isValid()) { - if (!V) { - LastValue.dump(); - LastValue.clear(); - } else - *V = std::move(LastValue); - } - return llvm::Error::success(); - } - -The ``dump()`` function (in ``value.cpp``) calls the ``print()`` function. - -Printing the Data and Type are handled in their respective functions: -``ReplPrintDataImpl()`` and ``ReplPrintTypeImpl()``. - -Annotation Token (annot_repl_input_end) ---------------------------------------- - -This feature uses a new token (``annot_repl_input_end``) to consider printing the -value of an expression if it doesn't end with a semicolon. When parsing an -Expression Statement, if the last semicolon is missing, then the code will -pretend that there one and set a marker there for later utilization, and -continue parsing. - -A semicolon is normally required in C++, but this feature expands the C++ -syntax to handle cases where a missing semicolon is expected (i.e., when -handling an expression statement). It also makes sure that an error is not -generated for the missing semicolon in this specific case. - -This is accomplished by identifying the end position of the user input -(expression statement). This helps store and return the expression statement -effectively, so that it can be printed (displayed to the user automatically). - -**Note:** This logic is only available for C++ for now, since part of the -implementation itself requires C++ features. Future versions may support more -languages. - -.. code-block:: console - - Token *CurTok = nullptr; - // If the semicolon is missing at the end of REPL input, consider if - // we want to do value printing. Note this is only enabled in C++ mode - // since part of the implementation requires C++ language features. - // Note we shouldn't eat the token since the callback needs it. - if (Tok.is(tok::annot_repl_input_end) && Actions.getLangOpts().CPlusPlus) - CurTok = &Tok; - else - // Otherwise, eat the semicolon. - ExpectAndConsumeSemi(diag::err_expected_semi_after_expr); - - StmtResult R = handleExprStmt(Expr, StmtCtx); - if (CurTok && !R.isInvalid()) - CurTok->setAnnotationValue(R.get()); - - return R; - } - -AST Transformation -------------------- - -When Sema encounters the ``annot_repl_input_end`` token, it knows to transform -the AST before the real CodeGen process. It will consume the token and set a -'semi missing' bit in the respective decl. - -.. code-block:: console - - if (Tok.is(tok::annot_repl_input_end) && - Tok.getAnnotationValue() != nullptr) { - ConsumeAnnotationToken(); - cast(DeclsInGroup.back())->setSemiMissing(); - } - -In the AST Consumer, traverse all the Top Level Decls, to look for expressions -to synthesize. If the current Decl is the Top Level Statement -Decl(``TopLevelStmtDecl``) and has a semicolon missing, then ask the interpreter -to synthesize another expression (an internal function call) to replace this -original expression. - - -Detailed RFC and Discussion: ----------------------------- - -For more technical details, community discussion and links to patches related -to these features, -Please visit: `RFC on LLVM Discourse `_. - -Some logic presented in the RFC (e.g. ValueGetter()) may be outdated, -compared to the final developed solution. Related Reading =============== diff --git a/clang/docs/conf.py b/clang/docs/conf.py index 31a4daa39d5b8..ca310026f53e2 100644 --- a/clang/docs/conf.py +++ b/clang/docs/conf.py @@ -27,7 +27,7 @@ # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ["sphinx.ext.todo", "sphinx.ext.mathjax", "sphinx.ext.graphviz"] +extensions = ["sphinx.ext.todo", "sphinx.ext.mathjax"] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] From 06cd6485ae2d2f390436be53e6318fd49c442c4a Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Fri, 13 Oct 2023 00:41:35 -0700 Subject: [PATCH 060/720] [AMDGPU] Make ubsan happy (#68959) --- llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index e16fed445b9f9..fa651b9fcb05a 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -2243,7 +2243,8 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo return; } - Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? Val << 32 : Lo_32(Val); + Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32 + : Lo_32(Val); Inst.addOperand(MCOperand::createImm(Val)); setImmKindLiteral(); From 187e02fa2deda9a01563c146d7daabdaf7e5108d Mon Sep 17 00:00:00 2001 From: Maurice Heumann Date: Fri, 13 Oct 2023 09:47:47 +0200 Subject: [PATCH 061/720] [CodeGenPrepare] Check types when unmerging GEPs across indirect branches (#68587) The optimization in CodeGenPrepare, where GEPs are unmerged across indirect branches must respect the types of both GEPs and their sizes when adjusting the indices. The sample here shows the bug: https://godbolt.org/z/8e9o5sYPP The value `%elementValuePtr` addresses the second field of the `%struct.Blub`. It is therefore a GEP with index 1 and type i8. The value `%nextArrayElement` addresses the next array element. It is therefore a GEP with index 1 and type `%struct.Blub`. Both values point to completely different addresses, even if the indices are the same, due to the types being different. However, after CodeGenPrepare has run, `%nextArrayElement` is a bitcast from `%elementValuePtr`, meaning both were treated as equal. The cause for this is that the unmerging optimization does not take types into consideration. It sees both GEPs have `%currentArrayElement` as source operand and therefore tries to rewrite `%nextArrayElement` in terms of `%elementValuePtr`. It changes the index to the difference of the two GEPs. As both indices are `1`, the difference is `0`. As the indices are `0` the GEP is later replaced with a simple bitcast in CodeGenPrepare. Before adjusting the indices, the types of the GEPs would have to be aligned and the indices scaled accordingly for the optimization to be correct. Due to the size of the struct being `16` and the `%elementValuePtr` pointing to offset `1`, the correct index for the unmerged `%nextArrayElement` would be 15. I assume this bug emerged from the opaque pointer change as GEPs like `%elementValuePtr` that access the struct field based of type i8 did not naturally occur before. In light of future migration to ptradd, simply not performing the optimization if the types mismatch should be sufficient. --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 2 + .../CodeGen/X86/indirect-br-gep-unmerge.ll | 51 +++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 llvm/test/CodeGen/X86/indirect-br-gep-unmerge.ll diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 371f6598e6b2b..187820717b6fd 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -7999,6 +7999,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, return false; if (UGEPI->getOperand(0) != GEPIOp) return false; + if (UGEPI->getSourceElementType() != GEPI->getSourceElementType()) + return false; if (GEPIIdx->getType() != cast(UGEPI->getOperand(1))->getType()) return false; diff --git a/llvm/test/CodeGen/X86/indirect-br-gep-unmerge.ll b/llvm/test/CodeGen/X86/indirect-br-gep-unmerge.ll new file mode 100644 index 0000000000000..6b953e3004256 --- /dev/null +++ b/llvm/test/CodeGen/X86/indirect-br-gep-unmerge.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -codegenprepare %s -o - | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.Blub = type { i8, i8, ptr } + +@indirectBrPtr = external hidden global ptr + +define ptr @testFunc(ptr readonly %array, i1 %skip) { +; CHECK-LABEL: define ptr @testFunc( +; CHECK-SAME: ptr readonly [[ARRAY:%.*]], i1 [[SKIP:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[SKIP]], label [[LOOPHEADER:%.*]], label [[ENDBLOCK_CLONE:%.*]] +; CHECK: loopHeader: +; CHECK-NEXT: [[CURRENTARRAYELEMENT:%.*]] = phi ptr [ [[ARRAY]], [[ENTRY:%.*]] ], [ [[NEXTARRAYELEMENT:%.*]], [[LOOPFOOTER:%.*]] ] +; CHECK-NEXT: [[ELEMENTVALUEPTR:%.*]] = getelementptr inbounds i8, ptr [[CURRENTARRAYELEMENT]], i64 1 +; CHECK-NEXT: [[ELEMENTVALUE:%.*]] = load i8, ptr [[ELEMENTVALUEPTR]], align 1 +; CHECK-NEXT: indirectbr ptr @indirectBrPtr, [label [[LOOPFOOTER]], label %endBlock] +; CHECK: loopFooter: +; CHECK-NEXT: [[ISGOODVALUE:%.*]] = icmp eq i8 [[ELEMENTVALUE]], 0 +; CHECK-NEXT: [[NEXTARRAYELEMENT]] = getelementptr inbounds [[STRUCT_BLUB:%.*]], ptr [[CURRENTARRAYELEMENT]], i64 1 +; CHECK-NEXT: br i1 [[ISGOODVALUE]], label [[LOOPHEADER]], label [[ENDBLOCK_CLONE]] +; CHECK: endBlock: +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[MERGE:%.*]] = phi ptr [ [[ELEMENTVALUEPTR]], [[ENDBLOCK:%.*]] ], [ [[RETVAL_CLONE:%.*]], [[ENDBLOCK_CLONE]] ] +; CHECK-NEXT: ret ptr [[MERGE]] +; CHECK: endBlock.clone: +; CHECK-NEXT: [[RETVAL_CLONE]] = phi ptr [ [[ARRAY]], [[ENTRY]] ], [ [[ELEMENTVALUEPTR]], [[LOOPFOOTER]] ] +; CHECK-NEXT: br label [[DOTSPLIT]] +; +entry: + br i1 %skip, label %loopHeader, label %endBlock + +loopHeader: + %currentArrayElement = phi ptr [ %array, %entry ], [ %nextArrayElement, %loopFooter ] + %elementValuePtr = getelementptr inbounds i8, ptr %currentArrayElement, i64 1 + %elementValue = load i8, ptr %elementValuePtr, align 1 + indirectbr ptr @indirectBrPtr, [label %loopFooter, label %endBlock] + +loopFooter: + %isGoodValue = icmp eq i8 %elementValue, 0 + %nextArrayElement = getelementptr inbounds %struct.Blub, ptr %currentArrayElement, i64 1 + br i1 %isGoodValue, label %loopHeader, label %endBlock + +endBlock: + %retVal = phi ptr [ %array, %entry ], [ %elementValuePtr, %loopFooter ], [ %elementValuePtr, %loopHeader ] + ret ptr %retVal +} From 69b6b48670c79a7440fcc273ee58420795bb9b43 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 13 Oct 2023 01:00:02 -0700 Subject: [PATCH 062/720] [ADT] Stop including identity.h (NFC) The last use of identity was removed by: commit 388b8c16c5610a54c639bb74e3c8de161e8ca1c6 Author: Benjamin Kramer Date: Wed Jan 25 01:38:28 2023 +0100 While I am at it, this patch teaches IndexedMap.h to include identity.h as it is relying on transitive includes via llvm/ADT/STLExtras.h. --- llvm/include/llvm/ADT/IndexedMap.h | 1 + llvm/include/llvm/ADT/STLExtras.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/ADT/IndexedMap.h b/llvm/include/llvm/ADT/IndexedMap.h index 5ac5f798269b9..b1ebbdd1bfd54 100644 --- a/llvm/include/llvm/ADT/IndexedMap.h +++ b/llvm/include/llvm/ADT/IndexedMap.h @@ -22,6 +22,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/identity.h" #include namespace llvm { diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index c7d417324c94f..d0b79fa91c031 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -21,7 +21,6 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/STLForwardCompat.h" #include "llvm/ADT/STLFunctionalExtras.h" -#include "llvm/ADT/identity.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Config/abi-breaking.h" From 9a8ff346bb20a684e8edd62035077aba06bea084 Mon Sep 17 00:00:00 2001 From: Rik Huijzer Date: Fri, 13 Oct 2023 10:09:44 +0200 Subject: [PATCH 063/720] [mlir][doc] Trim summary text during DocGen (#68477) When defining a multi-line string in tblgen, the output in the Markdown file currently contains too much whitespace and newlines for Hugo's Markdown parser. For example, for `arith.addui_extended` the tblgen ```tblgen let summary = [{ extended unsigned integer addition operation returning sum and overflow bit }]; ``` is currently converted to ```markdown _ extended unsigned integer addition operation returning sum and overflow bit _ ``` which causes the text to not be italicized (as can be seen at https://mlir.llvm.org/docs/Dialects/ArithOps/#arithaddui_extended-arithadduiextendedop). After this PR, the output becomes ``` _Extended unsigned integer addition operation returning sum and overflow bit_ ``` --- mlir/tools/mlir-tblgen/OpDocGen.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mlir/tools/mlir-tblgen/OpDocGen.cpp b/mlir/tools/mlir-tblgen/OpDocGen.cpp index 855f02d828418..773ad6ec198b9 100644 --- a/mlir/tools/mlir-tblgen/OpDocGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDocGen.cpp @@ -54,8 +54,9 @@ using mlir::tblgen::Operator; void mlir::tblgen::emitSummary(StringRef summary, raw_ostream &os) { if (!summary.empty()) { - char first = std::toupper(summary.front()); - llvm::StringRef rest = summary.drop_front(); + llvm::StringRef trimmed = summary.trim(); + char first = std::toupper(trimmed.front()); + llvm::StringRef rest = trimmed.drop_front(); os << "\n_" << first << rest << "_\n\n"; } } From 41418ca13323c43460806af43eff8f2384ef08f8 Mon Sep 17 00:00:00 2001 From: Paschalis Mpeis Date: Fri, 13 Oct 2023 10:12:50 +0100 Subject: [PATCH 064/720] [NFC] Fixed typo in LangRef.rst (#68789) An IR sample in LangRef had a misplaced comment delimiter. --- llvm/docs/LangRef.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 1883e9f6290b1..35123474381e7 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -18357,8 +18357,8 @@ For example: .. code-block:: text - llvm.experimental.vector.splice(, , 1) ==> ; index - llvm.experimental.vector.splice(, , -3) ==> ; trailing elements + llvm.experimental.vector.splice(, , 1); ==> index + llvm.experimental.vector.splice(, , -3); ==> trailing elements Arguments: @@ -21517,8 +21517,8 @@ Examples: .. code-block:: text - llvm.experimental.vp.splice(, , 1, 2, 3) ==> ; index - llvm.experimental.vp.splice(, , -2, 3, 2) ==> ; trailing elements + llvm.experimental.vp.splice(, , 1, 2, 3); ==> index + llvm.experimental.vp.splice(, , -2, 3, 2); ==> trailing elements .. _int_vp_load: From 92e751d426dbc17607bc8f552325fc659f4d0f66 Mon Sep 17 00:00:00 2001 From: Jack Frankland Date: Fri, 13 Oct 2023 10:20:18 +0100 Subject: [PATCH 065/720] [mlir][linalg] Add NHWC + FHWC Img2Col (#68708) Adds the Img2Col transformation for the fhwc channel ordering in a Conv2D. Because of how the channel ordering affects the matrix dimensions in the flattened filter this results in a slightly different implementation of the actual "matrix multiplication". Instead of doing a regular row-column dot-product this arrangement requires a row-row dot product, otherwise the filter matrix would first need to be transposed. Adds a lit test to the transform dialect to check the semantics of the optimization are correct. Signed-off-by: Jack Frankland --- .../Dialect/Linalg/Transforms/Transforms.h | 8 + .../TransformOps/LinalgTransformOps.cpp | 3 + .../Transforms/ConvertConv2DToImg2Col.cpp | 150 +++++++++++++++++- .../Linalg/convert-conv2d-to-img2col.mlir | 70 ++++++++ 4 files changed, 230 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 07a192f7b8606..3597209d7f90c 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -1175,6 +1175,14 @@ FailureOr rewriteInDestinationPassingStyle(RewriterBase &rewriter, FailureOr> rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNhwcHwcfOp convOp); +/// Same as the above but for Fhwc channel orderings in the filter. In this case +/// the matrix multiplication is actually a row-wise dot-product rather than a +/// row-column dot-product. This is to avoid transposing the filter matrix which +/// would be required for a regular matrix multiplication to produce the correct +/// output dimensions. +FailureOr> +rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNhwcFhwcOp convOp); + /// Similar to rewriteInIm2Col with linalg::Conv2DNhwcHwcfOp except there is no /// reduction among the input channels so each convolution can be a /// matrix-vector product and by transposing both input filter so channels are diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 9ce780d3d249c..8508507871d0c 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -3118,6 +3118,9 @@ DiagnosedSilenceableFailure transform::ConvertConv2DToImg2ColOp::applyToOne( .Case([&](linalg::Conv2DNhwcHwcfOp op) { return rewriteInIm2Col(rewriter, op); }) + .Case([&](linalg::Conv2DNhwcFhwcOp op) { + return rewriteInIm2Col(rewriter, op); + }) .Case([&](linalg::DepthwiseConv2DNhwcHwcOp op) { return rewriteInIm2Col(rewriter, op); }) diff --git a/mlir/lib/Dialect/Linalg/Transforms/ConvertConv2DToImg2Col.cpp b/mlir/lib/Dialect/Linalg/Transforms/ConvertConv2DToImg2Col.cpp index 275e78aaa73dd..e7629d79494bd 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ConvertConv2DToImg2Col.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ConvertConv2DToImg2Col.cpp @@ -494,6 +494,141 @@ rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNchwFchwOp convOp) { reshapedResult.getOperation()); } +FailureOr> +rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNhwcFhwcOp convOp) { + auto inputType = cast(convOp.getInputs()[0].getType()); + auto filterType = cast(convOp.getInputs()[1].getType()); + auto outputType = cast(convOp.getOutputs()[0].getType()); + + if (!filterType.hasStaticShape()) + return rewriter.notifyMatchFailure( + convOp, "expected a static shape for the filter"); + + if (!inputType.hasStaticShape()) + return rewriter.notifyMatchFailure(convOp, + "expected a static shape for the input"); + + // TODO: Support dilation. + if (!hasAllOneValues(convOp.getDilations())) + return rewriter.notifyMatchFailure(convOp, + "expected all ones for dilations"); + + MLIRContext *context = rewriter.getContext(); + Value input = convOp.getInputs()[0]; + Value filter = convOp.getInputs()[1]; + Value output = convOp.getOutputs()[0]; + + ArrayRef filterShape = filterType.getShape(); + ArrayRef outputShape = outputType.getShape(); + + int64_t n = outputShape[0]; + int64_t oh = outputShape[1]; + int64_t ow = outputShape[2]; + int64_t oc = outputShape[3]; + int64_t fh = filterShape[1]; + int64_t fw = filterShape[2]; + int64_t ic = filterShape[3]; + + Location loc = convOp.getLoc(); + + // Reshape output and filter to the LHS and result of a "row-wise" matrix + // multiplication. + SmallVector filterReassocIndices = {{0}, {1, 2, 3}}; + auto reshapedFilterType = + RankedTensorType::get({oc, fh * fw * ic}, filterType.getElementType()); + Value reshapedFilter = rewriter.create( + loc, reshapedFilterType, filter, filterReassocIndices); + + SmallVector outputReassocIndices = {{0}, {1, 2}, {3}}; + RankedTensorType reshapedOutputType = + RankedTensorType::get({n, oh * ow, oc}, outputType.getElementType()); + Value reshapedOutput = rewriter.create( + loc, reshapedOutputType, output, outputReassocIndices); + + SmallVector colTensorShape = {n, oh * ow, fh * fw * ic}; + Value colTensor = rewriter.create( + loc, colTensorShape, inputType.getElementType()); + + // Convert the input to a (BMK) column tensor. + auto nloops = colTensorShape.size(); + + auto parallel = utils::IteratorType::parallel; + auto reduction = utils::IteratorType::reduction; + SmallVector img2colIterators(nloops, parallel); + + SmallVector img2colIndexingMaps = { + AffineMap::getMultiDimIdentityMap(nloops, context)}; + + auto img2ColTensor = rewriter.create( + loc, colTensor.getType(), + /*inputs=*/ValueRange{}, /*outputs=*/colTensor, img2colIndexingMaps, + img2colIterators, + [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) { + // Get the iterators named based on the matmul (batch, m, k). + Value bIndex = nestedBuilder.create(loc, 0); + Value mIndex = nestedBuilder.create(loc, 1); + Value kIndex = nestedBuilder.create(loc, 2); + + // Recover the original iteration indices from the problem/input sizes. + SmallVector mIndices = unrollIndex( + nestedBuilder, nestedLoc, mIndex, ArrayRef{oh, ow}); + auto ohIndex = mIndices[0]; + auto owIndex = mIndices[1]; + + SmallVector kIndices = unrollIndex( + nestedBuilder, nestedLoc, kIndex, ArrayRef{fh, fw, ic}); + auto fhIndex = kIndices[0]; + auto fwIndex = kIndices[1]; + auto icIndex = kIndices[2]; + + // Extract the input element corresponding to the expanded indices. + Value hIndex = + getConvolvedIndex(nestedBuilder, nestedLoc, ohIndex, fhIndex, + convOp.getStrides().getValues()[0]); + Value wIndex = + getConvolvedIndex(nestedBuilder, nestedLoc, owIndex, fwIndex, + convOp.getStrides().getValues()[1]); + + // im2col[n, oh*ow, fh*fw*ic] = input[n, sh*oh + fh, sw*ow + fw, ic] + SmallVector extractionIndices{bIndex, hIndex, wIndex, icIndex}; + Value inputVal = nestedBuilder.create( + loc, input, extractionIndices); + nestedBuilder.create(nestedLoc, inputVal); + }); + + // Because we didn't transpose the filters we don't actually have a batched + // matrix multiply. Instead, we have an operation consisting of "row-wise" dot + // products. + AffineExpr bDim, mDim, nDim, kDim; + bindDims(context, bDim, mDim, nDim, kDim); + auto lhsMap = AffineMap::get(4, 0, {bDim, mDim, kDim}, context); + auto rhsMap = AffineMap::get(4, 0, {nDim, kDim}, context); + auto resultMap = AffineMap::get(4, 0, {bDim, mDim, nDim}, context); + SmallVector genericIterators = {parallel, parallel, + parallel, reduction}; + + auto genericOp = rewriter.create( + loc, reshapedOutputType, + /*inputs=*/ValueRange{img2ColTensor.getResult(0), reshapedFilter}, + /*outputs=*/ValueRange{reshapedOutput}, + ArrayRef{lhsMap, rhsMap, resultMap}, genericIterators, + [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) { + Value mul = + createMul(loc, args[0], args[1], args[2].getType(), nestedBuilder); + Value add = createAdd(loc, mul, args[2], nestedBuilder); + nestedBuilder.create(nestedLoc, add); + }); + Value result = genericOp.getResults().front(); + + auto reshapedResult = rewriter.create( + loc, outputType, result, outputReassocIndices); + + rewriter.replaceOp(convOp, ArrayRef{reshapedResult}); + + return std::make_pair(img2ColTensor.getOperation(), + reshapedResult.getOperation()); +} + namespace { class ConvertConv2DNhwcHwcf final @@ -534,12 +669,25 @@ class ConvertConv2DNchwFchw final return success(); } }; + +class ConvertConv2DNhwcFhwc final + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(linalg::Conv2DNhwcFhwcOp convOp, + PatternRewriter &rewriter) const override { + if (failed(rewriteInIm2Col(rewriter, convOp))) + return failure(); + return success(); + } +}; } // end anonymous namespace void populateConvertConv2DToImg2ColPatterns(RewritePatternSet &patterns) { MLIRContext *context = patterns.getContext(); patterns.insert(context); + ConvertConv2DNchwFchw, ConvertConv2DNhwcFhwc>(context); } } // end namespace linalg } // end namespace mlir diff --git a/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir b/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir index 657cf83f25460..b2470ed7b7480 100644 --- a/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir +++ b/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir @@ -279,6 +279,76 @@ transform.sequence failures(propagate) { // ----- +// CHECK: IR printer: tensor_producer +// CHECK-NEXT: %[[COL_TENSOR:.+]] = linalg.generic +// CHECK-SAME: affine_map<(d0, d1, d2) -> (d0, d1, d2)>] +// CHECK: ^bb0(%[[OUT_DATA:.+]]: f32) + +// Collapsed indices. +// CHECK: %[[BINDEX:.+]] = linalg.index 0 : index +// CHECK: %[[MINDEX:.+]] = linalg.index 1 : index +// CHECK: %[[KINDEX:.+]] = linalg.index 2 : index + +// Compute input channel/convolved indices. +// CHECK: %[[ICINDEX:.+]] = affine.apply affine_map<(d0) -> (d0 mod 4)>(%[[KINDEX]]) +// CHECK: %[[CONVH:.+]] = affine.apply affine_map<(d0, d1) -> (d0 floordiv 14 + d1 floordiv 12)>(%[[MINDEX]], %[[KINDEX]]) +// CHECK: %[[CONVW:.+]] = affine.apply affine_map<(d0, d1) -> (d0 mod 14 + (d1 mod 12) floordiv 4)>(%[[MINDEX]], %[[KINDEX]]) + +// Extract from the input tensor. +// CHECK: %[[EXTRACTED_INPUT:.+]] = tensor.extract +// CHECK-SAME: %{{.+}}{{\[}}%[[BINDEX]], %[[CONVH]], %[[CONVW]], %[[ICINDEX]]] : tensor<1x16x16x4xf32> +// CHECK: linalg.yield %[[EXTRACTED_INPUT]] : f32 + +// CHECK: IR printer: transformed +// CHECK: tensor.expand_shape %{{[^ ]*}} {{\[}}[0], [1, 2], [3]] : tensor<1x196x16xf32> into tensor<1x14x14x16xf32> + +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d2, d3)> +// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> +// CHECK: @conv_2d_nhwc_fhwc +// CHECK-SAME: %[[INPUT:.+]]: tensor<1x16x16x4xf32> +// CHECK-SAME: %[[FILTER:.+]]: tensor<16x3x3x4xf32> +// CHECK-SAME: %[[OUTPUT:.+]]: tensor<1x14x14x16xf32> +// CHECK-DAG: %[[COLLAPSED_FILTER:.+]] = tensor.collapse_shape %[[FILTER]] {{\[}}[0], [1, 2, 3]] : tensor<16x3x3x4xf32> into tensor<16x36xf32> +// CHECK-DAG: %[[COLLAPSED_OUT:.+]] = tensor.collapse_shape %[[OUTPUT]] {{\[}}[0], [1, 2], [3]] : tensor<1x14x14x16xf32> into tensor<1x196x16xf32> +// CHECK: %[[INIT_COL_TENSOR:.+]] = tensor.empty() : tensor<1x196x36xf32> +// CHECK: %[[COL_TENSOR:.+]] = linalg.generic +// CHECK-SAME: #[[MAP0]] +// CHECK: ^bb0(%[[OUT_DATA:.+]]: f32) +// CHECK: linalg.yield %{{.+}} : f32 +// CHECK: %[[MATMUL_RESULT:.+]] = linalg.generic +// CHECK-SAME: #[[MAP1]] +// CHECK-SAME: #[[MAP2]] +// CHECK-SAME: #[[MAP3]] +// CHECK-SAME: ins(%[[COL_TENSOR]], %[[COLLAPSED_FILTER]] : tensor<1x196x36xf32>, tensor<16x36xf32>) +// CHECK-SAME: outs(%[[COLLAPSED_OUT]] : tensor<1x196x16xf32>) +// CHECK: ^bb0(%[[ARG0:.+]]: f32, %[[ARG1:.+]]: f32, %[[ARG2:.+]]: f32) +// CHECK: %[[MUL:.+]] = arith.mulf %[[ARG0]], %[[ARG1]] : f32 +// CHECK: %[[ADD:.+]] = arith.addf %[[MUL]], %[[ARG2]] : f32 +// CHECK: linalg.yield %[[ADD]] : f32 +// CHECK: } -> tensor<1x196x16xf32> +// CHECK: %[[RESULT:.+]] = tensor.expand_shape %[[MATMUL_RESULT]] {{\[}}[0], [1, 2], [3]] : tensor<1x196x16xf32> into tensor<1x14x14x16xf32> +// CHECK: return %[[RESULT]] + +func.func @conv_2d_nhwc_fhwc(%arg0: tensor<1x16x16x4xf32>, %arg1: tensor<16x3x3x4xf32>, %arg2: tensor<1x14x14x16xf32>) -> tensor<1x14x14x16xf32> { + %0 = linalg.conv_2d_nhwc_fhwc + {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } + ins(%arg0, %arg1: tensor<1x16x16x4xf32>, tensor<16x3x3x4xf32>) + outs(%arg2: tensor<1x14x14x16xf32>) -> tensor<1x14x14x16xf32> + return %0 : tensor<1x14x14x16xf32> +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["linalg.conv_2d_nhwc_fhwc"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %img2col_tensor_producer, %transformed = transform.structured.convert_conv2d_to_img2col %0 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + transform.print %img2col_tensor_producer {name = "tensor_producer"}: !transform.any_op + transform.print %transformed {name = "transformed"}: !transform.any_op +} + +// ----- + // Check for signed extend when the input type is smaller than the accumulator type. // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> From f54dc7b3936f1bd751db710cfc2fec1652159a3f Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 13 Oct 2023 10:22:05 +0100 Subject: [PATCH 066/720] [mlir][ODS] Omit printing default-valued attributes in oilists (#68880) This makes these match the behaviour of optional attributes (which are omitted when they are their default value of none). This allows for concise assembly formats without a custom printer. An extra print of " " is also removed, this does change any existing uses of oilists, but if the parameter before the oilist is optional, that would previously add an extra space. This #68694 + some fixes for the MLIR Python tests, unfortunately GitHub does not allow re-opening PRs :confused: --- flang/test/Lower/OpenMP/FIR/atomic-read.f90 | 2 +- flang/test/Lower/OpenMP/FIR/critical.f90 | 2 +- flang/test/Lower/OpenMP/critical.f90 | 2 +- .../OpenMPToLLVM/convert-to-llvmir.mlir | 4 +-- mlir/test/Dialect/OpenMP/ops.mlir | 10 +++---- .../dialects/transform_structured_ext.py | 6 ++--- .../python/dialects/transform_vector_ext.py | 8 +----- mlir/tools/mlir-tblgen/OpFormatGen.cpp | 26 +++++++++++++------ 8 files changed, 32 insertions(+), 28 deletions(-) diff --git a/flang/test/Lower/OpenMP/FIR/atomic-read.f90 b/flang/test/Lower/OpenMP/FIR/atomic-read.f90 index ff2b651953f2a..0079b347fac8d 100644 --- a/flang/test/Lower/OpenMP/FIR/atomic-read.f90 +++ b/flang/test/Lower/OpenMP/FIR/atomic-read.f90 @@ -14,7 +14,7 @@ !CHECK: %[[VAR_X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"} !CHECK: %[[VAR_Y:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"} !CHECK: omp.atomic.read %[[VAR_X]] = %[[VAR_Y]] memory_order(acquire) hint(uncontended) : !fir.ref, i32 -!CHECK: omp.atomic.read %[[VAR_A]] = %[[VAR_B]] memory_order(relaxed) hint(none) : !fir.ref>, !fir.char<1> +!CHECK: omp.atomic.read %[[VAR_A]] = %[[VAR_B]] memory_order(relaxed) : !fir.ref>, !fir.char<1> !CHECK: omp.atomic.read %[[VAR_C]] = %[[VAR_D]] memory_order(seq_cst) hint(contended) : !fir.ref>, !fir.logical<4> !CHECK: omp.atomic.read %[[VAR_E]] = %[[VAR_F]] hint(speculative) : !fir.ref>, !fir.char<1,8> !CHECK: omp.atomic.read %[[VAR_G]] = %[[VAR_H]] hint(nonspeculative) : !fir.ref, f32 diff --git a/flang/test/Lower/OpenMP/FIR/critical.f90 b/flang/test/Lower/OpenMP/FIR/critical.f90 index c6ac818fe21aa..b86729f8a98e3 100644 --- a/flang/test/Lower/OpenMP/FIR/critical.f90 +++ b/flang/test/Lower/OpenMP/FIR/critical.f90 @@ -2,7 +2,7 @@ !RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | fir-opt --fir-to-llvm-ir | FileCheck %s --check-prefix="OMPDialect" !RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | fir-opt --fir-to-llvm-ir | tco | FileCheck %s --check-prefix="LLVMIR" -!OMPDialect: omp.critical.declare @help2 hint(none) +!OMPDialect: omp.critical.declare @help2 !OMPDialect: omp.critical.declare @help1 hint(contended) subroutine omp_critical() diff --git a/flang/test/Lower/OpenMP/critical.f90 b/flang/test/Lower/OpenMP/critical.f90 index 9fbd172df9642..5a4d2e4815df4 100644 --- a/flang/test/Lower/OpenMP/critical.f90 +++ b/flang/test/Lower/OpenMP/critical.f90 @@ -1,6 +1,6 @@ !RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s -!CHECK: omp.critical.declare @help2 hint(none) +!CHECK: omp.critical.declare @help2 !CHECK: omp.critical.declare @help1 hint(contended) subroutine omp_critical() diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir index 1df27dd9957e5..881d738b413ef 100644 --- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir @@ -90,7 +90,7 @@ func.func @wsloop(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: // CHECK-LABEL: @atomic_write // CHECK: (%[[ARG0:.*]]: !llvm.ptr) // CHECK: %[[VAL0:.*]] = llvm.mlir.constant(1 : i32) : i32 -// CHECK: omp.atomic.write %[[ARG0]] = %[[VAL0]] hint(none) memory_order(relaxed) : !llvm.ptr, i32 +// CHECK: omp.atomic.write %[[ARG0]] = %[[VAL0]] memory_order(relaxed) : !llvm.ptr, i32 func.func @atomic_write(%a: !llvm.ptr) -> () { %1 = arith.constant 1 : i32 omp.atomic.write %a = %1 hint(none) memory_order(relaxed) : !llvm.ptr, i32 @@ -474,4 +474,4 @@ llvm.func @_QPtarget_map_with_bounds(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %x: memref) { omp.atomic.read %v = %x hint(nonspeculative, contended) : memref, i32 // CHECK: omp.atomic.read %[[v]] = %[[x]] memory_order(seq_cst) hint(contended, speculative) : memref, i32 omp.atomic.read %v = %x hint(speculative, contended) memory_order(seq_cst) : memref, i32 - // CHECK: omp.atomic.read %[[v]] = %[[x]] memory_order(seq_cst) hint(none) : memref, i32 + // CHECK: omp.atomic.read %[[v]] = %[[x]] memory_order(seq_cst) : memref, i32 omp.atomic.read %v = %x hint(none) memory_order(seq_cst) : memref, i32 return } @@ -927,7 +927,7 @@ func.func @omp_atomic_write(%addr : memref, %val : i32) { omp.atomic.write %addr = %val memory_order(relaxed) : memref, i32 // CHECK: omp.atomic.write %[[ADDR]] = %[[VAL]] hint(uncontended, speculative) : memref, i32 omp.atomic.write %addr = %val hint(speculative, uncontended) : memref, i32 - // CHECK: omp.atomic.write %[[ADDR]] = %[[VAL]] hint(none) : memref, i32 + // CHECK: omp.atomic.write %[[ADDR]] = %[[VAL]] : memref, i32 omp.atomic.write %addr = %val hint(none) : memref, i32 return } @@ -1004,7 +1004,7 @@ func.func @omp_atomic_update(%x : memref, %expr : i32, %xBool : memref, omp.yield(%const:i32) } - // CHECK: omp.atomic.update hint(none) %[[X]] : memref + // CHECK: omp.atomic.update %[[X]] : memref // CHECK-NEXT: (%[[XVAL:.*]]: i32): // CHECK-NEXT: %[[NEWVAL:.*]] = llvm.add %[[XVAL]], %[[EXPR]] : i32 // CHECK-NEXT: omp.yield(%[[NEWVAL]] : i32) @@ -1181,7 +1181,7 @@ func.func @omp_atomic_capture(%v: memref, %x: memref, %expr: i32) { omp.atomic.write %x = %expr : memref, i32 } - // CHECK: omp.atomic.capture hint(none) { + // CHECK: omp.atomic.capture { // CHECK-NEXT: omp.atomic.update %[[x]] : memref // CHECK-NEXT: (%[[xval:.*]]: i32): // CHECK-NEXT: %[[newval:.*]] = llvm.add %[[xval]], %[[expr]] : i32 diff --git a/mlir/test/python/dialects/transform_structured_ext.py b/mlir/test/python/dialects/transform_structured_ext.py index 0f89d4137455a..c9b7802e1cc45 100644 --- a/mlir/test/python/dialects/transform_structured_ext.py +++ b/mlir/test/python/dialects/transform_structured_ext.py @@ -439,7 +439,7 @@ def testTileToForallCompact(target): structured.TileUsingForallOp(matmul, num_threads=[2, 3, 4]) # CHECK-LABEL: TEST: testTileToForallCompact # CHECK: = transform.structured.tile_using_forall - # CHECK-SAME: num_threads [2, 3, 4] tile_sizes [] + # CHECK-SAME: num_threads [2, 3, 4] # CHECK-SAME: (!transform.op<"linalg.matmul">) -> (!transform.any_op, !transform.any_op) @@ -454,7 +454,7 @@ def testTileToForallLoopsAndTileOpTypes(target): ) # CHECK-LABEL: TEST: testTileToForallLoopsAndTileOpTypes # CHECK: = transform.structured.tile_using_forall - # CHECK-SAME: num_threads [2, 3, 4] tile_sizes [] + # CHECK-SAME: num_threads [2, 3, 4] # CHECK-SAME: (!transform.any_op) -> (!transform.op<"scf.forall">, !transform.op<"linalg.matmul">) @@ -464,7 +464,7 @@ def testTileToForallTileSizes(target): structured.TileUsingForallOp(target, tile_sizes=[2, 3, 4]) # CHECK-LABEL: TEST: testTileToForallTileSizes # CHECK: = transform.structured.tile_using_forall - # CHECK-SAME: num_threads [] tile_sizes [2, 3, 4] + # CHECK-SAME: tile_sizes [2, 3, 4] @run diff --git a/mlir/test/python/dialects/transform_vector_ext.py b/mlir/test/python/dialects/transform_vector_ext.py index 1a0a9e1d6ecbd..a51f2154d1f7d 100644 --- a/mlir/test/python/dialects/transform_vector_ext.py +++ b/mlir/test/python/dialects/transform_vector_ext.py @@ -94,30 +94,24 @@ def enum_configurable_patterns(): ) # CHECK: transform.apply_patterns.vector.lower_transpose - # CHECK-SAME: lowering_strategy = eltwise - # CHECK-SAME: avx2_lowering_strategy = false vector.ApplyLowerTransposePatternsOp() # CHECK: transform.apply_patterns.vector.lower_transpose - # CHECK-SAME: lowering_strategy = eltwise - # CHECK-SAME: avx2_lowering_strategy = false + # This is the default strategy, not printed. vector.ApplyLowerTransposePatternsOp( lowering_strategy=vector.VectorTransposeLowering.EltWise ) # CHECK: transform.apply_patterns.vector.lower_transpose # CHECK-SAME: lowering_strategy = flat_transpose - # CHECK-SAME: avx2_lowering_strategy = false vector.ApplyLowerTransposePatternsOp( lowering_strategy=vector.VectorTransposeLowering.Flat ) # CHECK: transform.apply_patterns.vector.lower_transpose # CHECK-SAME: lowering_strategy = shuffle_1d - # CHECK-SAME: avx2_lowering_strategy = false vector.ApplyLowerTransposePatternsOp( lowering_strategy=vector.VectorTransposeLowering.Shuffle1D ) # CHECK: transform.apply_patterns.vector.lower_transpose # CHECK-SAME: lowering_strategy = shuffle_16x16 - # CHECK-SAME: avx2_lowering_strategy = false vector.ApplyLowerTransposePatternsOp( lowering_strategy=vector.VectorTransposeLowering.Shuffle16x16 ) diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index bdb97866a47fc..18ca34379a71a 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -2009,6 +2009,16 @@ static void genEnumAttrPrinter(const NamedAttribute *var, const Operator &op, " }\n"; } +/// Generate a check that a DefaultValuedAttr has a value that is non-default. +static void genNonDefaultValueCheck(MethodBody &body, const Operator &op, + AttributeVariable &attrElement) { + FmtContext fctx; + Attribute attr = attrElement.getVar()->attr; + fctx.withBuilder("::mlir::OpBuilder((*this)->getContext())"); + body << " && " << op.getGetterName(attrElement.getVar()->name) << "Attr() != " + << tgfmt(attr.getConstBuilderTemplate(), &fctx, attr.getDefaultValue()); +} + /// Generate the check for the anchor of an optional group. static void genOptionalGroupPrinterAnchor(FormatElement *anchor, const Operator &op, @@ -2042,12 +2052,7 @@ static void genOptionalGroupPrinterAnchor(FormatElement *anchor, if (attr.hasDefaultValue()) { // Consider a default-valued attribute as present if it's not the // default value. - FmtContext fctx; - fctx.withBuilder("::mlir::OpBuilder((*this)->getContext())"); - body << " && " << op.getGetterName(element->getVar()->name) - << "Attr() != " - << tgfmt(attr.getConstBuilderTemplate(), &fctx, - attr.getDefaultValue()); + genNonDefaultValueCheck(body, op, *element); return; } llvm_unreachable("attribute must be optional or default-valued"); @@ -2158,7 +2163,6 @@ void OperationFormat::genElementPrinter(FormatElement *element, // Emit the OIList if (auto *oilist = dyn_cast(element)) { - genLiteralPrinter(" ", body, shouldEmitSpace, lastWasPunctuation); for (auto clause : oilist->getClauses()) { LiteralElement *lelement = std::get<0>(clause); ArrayRef pelement = std::get<1>(clause); @@ -2170,8 +2174,14 @@ void OperationFormat::genElementPrinter(FormatElement *element, for (VariableElement *var : vars) { TypeSwitch(var) .Case([&](AttributeVariable *attrEle) { - body << " || " << op.getGetterName(attrEle->getVar()->name) + body << " || (" << op.getGetterName(attrEle->getVar()->name) << "Attr()"; + Attribute attr = attrEle->getVar()->attr; + if (attr.hasDefaultValue()) { + // Don't print default-valued attributes. + genNonDefaultValueCheck(body, op, *attrEle); + } + body << ")"; }) .Case([&](OperandVariable *ele) { if (ele->getVar()->isVariadic()) { From 2e955c0504d4cc529e33e0342b60183170b5c815 Mon Sep 17 00:00:00 2001 From: vabridgers <58314289+vabridgers@users.noreply.github.com> Date: Fri, 13 Oct 2023 05:03:38 -0500 Subject: [PATCH 067/720] Revert "[Sema] Add check for bitfield assignments to integral types" (#68963) This reverts commit 47e36266e93de9c34ba3028951a58124864bb2b4. This change broke some arm8/arm7 build bots because int and void * have the same size. Co-authored-by: einvbri --- clang/docs/ReleaseNotes.rst | 3 -- clang/include/clang/Basic/DiagnosticGroups.td | 2 -- .../clang/Basic/DiagnosticSemaKinds.td | 3 -- clang/lib/Sema/SemaChecking.cpp | 13 +------ clang/test/SemaCXX/bitfield-width.c | 34 ------------------- 5 files changed, 1 insertion(+), 54 deletions(-) delete mode 100644 clang/test/SemaCXX/bitfield-width.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 31969201a1cac..2d918967e7f0b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -185,9 +185,6 @@ New Compiler Flags the preprocessed text to the output. This can greatly reduce the size of the preprocessed output, which can be helpful when trying to reduce a test case. -* ``-Wbitfield-conversion`` was added to detect assignments of integral - types to a bitfield that may change the value. - Deprecated Compiler Flags ------------------------- diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 674eb9f4ef2e7..0b09c00219184 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -53,7 +53,6 @@ def SingleBitBitFieldConstantConversion : def BitFieldConstantConversion : DiagGroup<"bitfield-constant-conversion", [SingleBitBitFieldConstantConversion]>; def BitFieldEnumConversion : DiagGroup<"bitfield-enum-conversion">; -def BitFieldConversion : DiagGroup<"bitfield-conversion">; def BitFieldWidth : DiagGroup<"bitfield-width">; def CompoundTokenSplitByMacro : DiagGroup<"compound-token-split-by-macro">; def CompoundTokenSplitBySpace : DiagGroup<"compound-token-split-by-space">; @@ -934,7 +933,6 @@ def Conversion : DiagGroup<"conversion", ConstantConversion, EnumConversion, BitFieldEnumConversion, - BitFieldConversion, FloatConversion, Shorten64To32, IntConversion, diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index ab7fe881976aa..c1a6e3831127e 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6171,9 +6171,6 @@ def warn_signed_bitfield_enum_conversion : Warning< "signed bit-field %0 needs an extra bit to represent the largest positive " "enumerators of %1">, InGroup, DefaultIgnore; -def warn_bitfield_too_small_for_integral_type : Warning< - "conversion from %2 (%3 bits) to bit-field %0 (%1 bits) may change value">, - InGroup, DefaultIgnore; def note_change_bitfield_sign : Note< "consider making the bitfield type %select{unsigned|signed}0">; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index cd61459cfbb13..35b36db2049db 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -14298,18 +14298,6 @@ static bool AnalyzeBitFieldAssignment(Sema &S, FieldDecl *Bitfield, Expr *Init, S.Diag(WidthExpr->getExprLoc(), diag::note_widen_bitfield) << BitsNeeded << ED << WidthExpr->getSourceRange(); } - } else if (OriginalInit->getType()->isIntegralType(S.Context)) { - IntRange LikelySourceRange = - GetExprRange(S.Context, Init, S.isConstantEvaluatedContext(), - /*Approximate=*/true); - - if (LikelySourceRange.Width > FieldWidth) { - Expr *WidthExpr = Bitfield->getBitWidth(); - S.Diag(InitLoc, diag::warn_bitfield_too_small_for_integral_type) - << Bitfield << FieldWidth << OriginalInit->getType() - << LikelySourceRange.Width; - S.Diag(WidthExpr->getExprLoc(), diag::note_declared_at); - } } return false; @@ -15207,6 +15195,7 @@ static void CheckImplicitConversion(Sema &S, Expr *E, QualType T, if (LikelySourceRange.Width > TargetRange.Width) { // If the source is a constant, use a default-on diagnostic. + // TODO: this should happen for bitfield stores, too. Expr::EvalResult Result; if (E->EvaluateAsInt(Result, S.Context, Expr::SE_AllowSideEffects, S.isConstantEvaluatedContext())) { diff --git a/clang/test/SemaCXX/bitfield-width.c b/clang/test/SemaCXX/bitfield-width.c deleted file mode 100644 index 8219054b959e5..0000000000000 --- a/clang/test/SemaCXX/bitfield-width.c +++ /dev/null @@ -1,34 +0,0 @@ -// RUN: %clang_cc1 -Wconversion -fsyntax-only -verify %s -// RUN: %clang_cc1 -Wbitfield-conversion -fsyntax-only -verify %s - -typedef struct _xx { - int bf:9; // expected-note 4{{declared here}} - } xx, *pxx; - - xx vxx; - - void foo1(int x) { - vxx.bf = x; // expected-warning{{conversion from 'int' (32 bits) to bit-field 'bf' (9 bits) may change value}} - } - void foo2(short x) { - vxx.bf = x; // expected-warning{{conversion from 'short' (16 bits) to bit-field 'bf' (9 bits) may change value}} - } - void foo3(char x) { - vxx.bf = x; // no warning expected - } - void foo5(void * x) { - vxx.bf = (int)x; // expected-warning{{cast to smaller integer type 'int' from 'void *'}} - // expected-warning@-1{{conversion from 'int' (32 bits) to bit-field 'bf' (9 bits) may change value}} - } - void foo6(short x) { - vxx.bf = 0xff & x; // no warning expected - } - void foo7(short x) { - vxx.bf = 0x1ff & x; // no warning expected - } - void foo8(short x) { - vxx.bf = 0x3ff & x; // expected-warning{{conversion from 'int' (10 bits) to bit-field 'bf' (9 bits) may change value}} - } - int fee(void) { - return 0; - } From c6f065d9d99738f1aca1a29f1f9f8900d2d38cbb Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Fri, 13 Oct 2023 10:34:13 +0000 Subject: [PATCH 068/720] [BOLT][RISCV] Recognize mapping syms with encoded ISA (#68964) RISC-V supports mapping syms for code that encode the exact ISA for which the code is valid. They have the form `$x` where `` is the textual encoding of an ISA specification. BOLT currently doesn't recognize these mapping symbols causing many binaries compiled with newer versions of GCC (which emits them) to not be properly processed. This patch makes sure BOLT recognizes them as code markers. Note that LLVM does not emit these kinds of mapping symbols yet so the test is based on a binary produced by GCC. --- bolt/lib/Core/BinaryContext.cpp | 4 ++ bolt/test/RISCV/Inputs/mapping-syms-isa.yaml | 47 ++++++++++++++++++++ bolt/test/RISCV/mapping-syms-isa.test | 18 ++++++++ 3 files changed, 69 insertions(+) create mode 100644 bolt/test/RISCV/Inputs/mapping-syms-isa.yaml create mode 100644 bolt/test/RISCV/mapping-syms-isa.test diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index b5514228d7a25..f19460f8c1f52 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1803,6 +1803,10 @@ MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { if (*NameOrError == "$x" || NameOrError->startswith("$x.")) return MarkerSymType::CODE; + // $x + if (isRISCV() && NameOrError->startswith("$x")) + return MarkerSymType::CODE; + if (*NameOrError == "$d" || NameOrError->startswith("$d.")) return MarkerSymType::DATA; diff --git a/bolt/test/RISCV/Inputs/mapping-syms-isa.yaml b/bolt/test/RISCV/Inputs/mapping-syms-isa.yaml new file mode 100644 index 0000000000000..a47ecfde5dead --- /dev/null +++ b/bolt/test/RISCV/Inputs/mapping-syms-isa.yaml @@ -0,0 +1,47 @@ +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_RISCV + Flags: [ EF_RISCV_RVC, EF_RISCV_FLOAT_ABI_DOUBLE ] + Entry: 0x100B0 +ProgramHeaders: + - Type: 0x70000003 + Flags: [ PF_R ] + FirstSec: .riscv.attributes + LastSec: .riscv.attributes + Offset: 0xB8 + - Type: PT_LOAD + Flags: [ PF_X, PF_R ] + FirstSec: .text + LastSec: .text + VAddr: 0x10000 + Align: 0x1000 + Offset: 0x0 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x100B0 + AddressAlign: 0x2 + Content: '0100000000008280' + - Name: .riscv.attributes + Type: SHT_RISCV_ATTRIBUTES + AddressAlign: 0x1 + Content: 4144000000726973637600013A0000000572763634693270315F6D3270305F613270315F663270325F643270325F633270305F7A696373723270305F7A6D6D756C31703000 +Symbols: + - Name: '_start' + Section: .text + Binding: STB_GLOBAL + Value: 0x100B0 + - Name: '$xrv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zmmul1p0' + Section: .text + Value: 0x100B0 + - Name: '$d' + Section: .text + Value: 0x100B2 + - Name: '$x' + Section: .text + Value: 0x100B6 +... diff --git a/bolt/test/RISCV/mapping-syms-isa.test b/bolt/test/RISCV/mapping-syms-isa.test new file mode 100644 index 0000000000000..22678af12f913 --- /dev/null +++ b/bolt/test/RISCV/mapping-syms-isa.test @@ -0,0 +1,18 @@ +# Test that BOLT handles mapping syms that include ISA strings: $x + +RUN: yaml2obj -o %t %p/Inputs/mapping-syms-isa.yaml +RUN: llvm-bolt --print-cfg --print-only=_start -o %t.bolt %t 2>&1 | FileCheck %s +RUN: llvm-objdump -d %t.bolt | FileCheck --check-prefix=CHECK-OBJDUMP %s + +CHECK-NOT: BOLT-WARNING + +# Check that .word is not disassembled by BOLT +CHECK: 00000000: nop +CHECK: 00000002: ret + +# Check .word is still present in output +CHECK-OBJDUMP: <_start>: +CHECK-OBJDUMP-NEXT: nop +CHECK-OBJDUMP-NEXT: unimp +CHECK-OBJDUMP-NEXT: unimp +CHECK-OBJDUMP-NEXT: ret From 7ef1754301a88ea0cbcffae53c2027abad3cc357 Mon Sep 17 00:00:00 2001 From: Rik Huijzer Date: Fri, 13 Oct 2023 12:35:04 +0200 Subject: [PATCH 069/720] [mlir][arith] Fix canon pattern for large ints in chained arith (#68900) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The logic for chained basic arithmetic operations in the `arith` dialect was using `getInt()` on `IntegerAttr`. This is a problem for very large integers. Specifically, in https://github.com/llvm/llvm-project/issues/64774 the following assertion failed: ``` Assertion failed: (getSignificantBits() <= 64 && "Too many bits for int64_t"), function getSExtValue, file APInt.h, line 1510. ``` According to a comment on `getInt()`, calls to `getInt()` should be replaced by `getValue()`: https://github.com/llvm/llvm-project/blob/ab6a66dbec61654d0962f6abf6d6c5b776937584/mlir/include/mlir/IR/BuiltinAttributes.td#L707-L708 This patch fixes https://github.com/llvm/llvm-project/issues/64774 by doing such a replacement. --------- Co-authored-by: Markus Böck --- mlir/lib/Dialect/Arith/IR/ArithOps.cpp | 16 ++++++++-------- mlir/test/Dialect/Arith/canonicalize.mlir | 12 ++++++++++++ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp index ae8a6ef350ce1..3892e8fa0a32f 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp +++ b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp @@ -39,26 +39,26 @@ using namespace mlir::arith; static IntegerAttr applyToIntegerAttrs(PatternRewriter &builder, Value res, Attribute lhs, Attribute rhs, - function_ref binFn) { - return builder.getIntegerAttr(res.getType(), - binFn(llvm::cast(lhs).getInt(), - llvm::cast(rhs).getInt())); + function_ref binFn) { + APInt lhsVal = llvm::cast(lhs).getValue(); + APInt rhsVal = llvm::cast(rhs).getValue(); + APInt value = binFn(lhsVal, rhsVal); + return IntegerAttr::get(res.getType(), value); } static IntegerAttr addIntegerAttrs(PatternRewriter &builder, Value res, Attribute lhs, Attribute rhs) { - return applyToIntegerAttrs(builder, res, lhs, rhs, std::plus()); + return applyToIntegerAttrs(builder, res, lhs, rhs, std::plus()); } static IntegerAttr subIntegerAttrs(PatternRewriter &builder, Value res, Attribute lhs, Attribute rhs) { - return applyToIntegerAttrs(builder, res, lhs, rhs, std::minus()); + return applyToIntegerAttrs(builder, res, lhs, rhs, std::minus()); } static IntegerAttr mulIntegerAttrs(PatternRewriter &builder, Value res, Attribute lhs, Attribute rhs) { - return applyToIntegerAttrs(builder, res, lhs, rhs, - std::multiplies()); + return applyToIntegerAttrs(builder, res, lhs, rhs, std::multiplies()); } /// Invert an integer comparison predicate. diff --git a/mlir/test/Dialect/Arith/canonicalize.mlir b/mlir/test/Dialect/Arith/canonicalize.mlir index f697f3d01458e..5e4476a21df04 100644 --- a/mlir/test/Dialect/Arith/canonicalize.mlir +++ b/mlir/test/Dialect/Arith/canonicalize.mlir @@ -909,6 +909,18 @@ func.func @tripleMulIMulII32(%arg0: i32) -> i32 { return %mul2 : i32 } +// CHECK-LABEL: @tripleMulLargeInt +// CHECK: %[[cres:.+]] = arith.constant 3618502788666131213697322783095070105623107215331596699973092056135872020482 : i256 +// CHECK: %[[addi:.+]] = arith.addi %arg0, %[[cres]] : i256 +// CHECK: return %[[addi]] +func.func @tripleMulLargeInt(%arg0: i256) -> i256 { + %0 = arith.constant 3618502788666131213697322783095070105623107215331596699973092056135872020481 : i256 + %1 = arith.constant 1 : i256 + %2 = arith.addi %arg0, %0 : i256 + %3 = arith.addi %2, %1 : i256 + return %3 : i256 +} + // CHECK-LABEL: @addiMuliToSubiRhsI32 // CHECK-SAME: (%[[ARG0:.+]]: i32, %[[ARG1:.+]]: i32) // CHECK: %[[SUB:.+]] = arith.subi %[[ARG0]], %[[ARG1]] : i32 From 7025ff6fa3dfe2ce8d3d7fcb0ec9de9a357d2c6f Mon Sep 17 00:00:00 2001 From: Arjun P Date: Fri, 13 Oct 2023 11:24:09 +0100 Subject: [PATCH 070/720] [MLIR][Presburger] clang-format and clang-tidy Fix formatting issues mostly introduced in recent commits. (This was possibly missed due to GitHub not having formatting checks at the time, but it's unclear.) --- .../include/mlir/Analysis/Presburger/Matrix.h | 28 ++-- mlir/lib/Analysis/Presburger/Matrix.cpp | 130 ++++++++++++------ .../Presburger/PresburgerRelation.cpp | 18 +-- mlir/lib/Analysis/Presburger/Utils.cpp | 4 +- 4 files changed, 108 insertions(+), 72 deletions(-) diff --git a/mlir/include/mlir/Analysis/Presburger/Matrix.h b/mlir/include/mlir/Analysis/Presburger/Matrix.h index bed3a5f75e396..29f8b7d2b304e 100644 --- a/mlir/include/mlir/Analysis/Presburger/Matrix.h +++ b/mlir/include/mlir/Analysis/Presburger/Matrix.h @@ -15,8 +15,8 @@ #ifndef MLIR_ANALYSIS_PRESBURGER_MATRIX_H #define MLIR_ANALYSIS_PRESBURGER_MATRIX_H -#include "mlir/Support/LLVM.h" #include "mlir/Analysis/Presburger/Fraction.h" +#include "mlir/Support/LLVM.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/raw_ostream.h" @@ -36,9 +36,11 @@ namespace presburger { /// This class only works for the types MPInt and Fraction, since the method /// implementations are in the Matrix.cpp file. Only these two types have /// been explicitly instantiated there. -template +template class Matrix { -static_assert(std::is_same_v || std::is_same_v, "T must be MPInt or Fraction."); + static_assert(std::is_same_v || std::is_same_v, + "T must be MPInt or Fraction."); + public: Matrix() = delete; @@ -69,9 +71,7 @@ static_assert(std::is_same_v || std::is_same_v, "T must be T &operator()(unsigned row, unsigned column) { return at(row, column); } - T operator()(unsigned row, unsigned column) const { - return at(row, column); - } + T operator()(unsigned row, unsigned column) const { return at(row, column); } /// Swap the given columns. void swapColumns(unsigned column, unsigned otherColumn); @@ -204,21 +204,20 @@ static_assert(std::is_same_v || std::is_same_v, "T must be // An inherited class for integer matrices, with no new data attributes. // This is only used for the matrix-related methods which apply only // to integers (hermite normal form computation and row normalisation). -class IntMatrix : public Matrix -{ +class IntMatrix : public Matrix { public: IntMatrix(unsigned rows, unsigned columns, unsigned reservedRows = 0, - unsigned reservedColumns = 0) : - Matrix(rows, columns, reservedRows, reservedColumns) {}; + unsigned reservedColumns = 0) + : Matrix(rows, columns, reservedRows, reservedColumns){}; - IntMatrix(Matrix m) : - Matrix(m.getNumRows(), m.getNumColumns(), m.getNumReservedRows(), m.getNumReservedColumns()) - { + IntMatrix(Matrix m) + : Matrix(m.getNumRows(), m.getNumColumns(), m.getNumReservedRows(), + m.getNumReservedColumns()) { for (unsigned i = 0; i < m.getNumRows(); i++) for (unsigned j = 0; j < m.getNumColumns(); j++) at(i, j) = m(i, j); }; - + /// Return the identity matrix of the specified dimension. static IntMatrix identity(unsigned dimension); @@ -240,7 +239,6 @@ class IntMatrix : public Matrix /// Divide the columns of the specified row by their GCD. /// Returns the GCD of the columns of the specified row. MPInt normalizeRow(unsigned row); - }; } // namespace presburger diff --git a/mlir/lib/Analysis/Presburger/Matrix.cpp b/mlir/lib/Analysis/Presburger/Matrix.cpp index f0bcb09fb28f7..ce6253e0bda93 100644 --- a/mlir/lib/Analysis/Presburger/Matrix.cpp +++ b/mlir/lib/Analysis/Presburger/Matrix.cpp @@ -14,35 +14,41 @@ using namespace mlir; using namespace presburger; -template Matrix::Matrix(unsigned rows, unsigned columns, unsigned reservedRows, - unsigned reservedColumns) +template +Matrix::Matrix(unsigned rows, unsigned columns, unsigned reservedRows, + unsigned reservedColumns) : nRows(rows), nColumns(columns), nReservedColumns(std::max(nColumns, reservedColumns)), data(nRows * nReservedColumns) { data.reserve(std::max(nRows, reservedRows) * nReservedColumns); } -template Matrix Matrix::identity(unsigned dimension) { +template +Matrix Matrix::identity(unsigned dimension) { Matrix matrix(dimension, dimension); for (unsigned i = 0; i < dimension; ++i) matrix(i, i) = 1; return matrix; } -template unsigned Matrix::getNumReservedRows() const { +template +unsigned Matrix::getNumReservedRows() const { return data.capacity() / nReservedColumns; } -template void Matrix::reserveRows(unsigned rows) { +template +void Matrix::reserveRows(unsigned rows) { data.reserve(rows * nReservedColumns); } -template unsigned Matrix::appendExtraRow() { +template +unsigned Matrix::appendExtraRow() { resizeVertically(nRows + 1); return nRows - 1; } -template unsigned Matrix::appendExtraRow(ArrayRef elems) { +template +unsigned Matrix::appendExtraRow(ArrayRef elems) { assert(elems.size() == nColumns && "elems must match row length!"); unsigned row = appendExtraRow(); for (unsigned col = 0; col < nColumns; ++col) @@ -50,24 +56,28 @@ template unsigned Matrix::appendExtraRow(ArrayRef elems) { return row; } -template void Matrix::resizeHorizontally(unsigned newNColumns) { +template +void Matrix::resizeHorizontally(unsigned newNColumns) { if (newNColumns < nColumns) removeColumns(newNColumns, nColumns - newNColumns); if (newNColumns > nColumns) insertColumns(nColumns, newNColumns - nColumns); } -template void Matrix::resize(unsigned newNRows, unsigned newNColumns) { +template +void Matrix::resize(unsigned newNRows, unsigned newNColumns) { resizeHorizontally(newNColumns); resizeVertically(newNRows); } -template void Matrix::resizeVertically(unsigned newNRows) { +template +void Matrix::resizeVertically(unsigned newNRows) { nRows = newNRows; data.resize(nRows * nReservedColumns); } -template void Matrix::swapRows(unsigned row, unsigned otherRow) { +template +void Matrix::swapRows(unsigned row, unsigned otherRow) { assert((row < getNumRows() && otherRow < getNumRows()) && "Given row out of bounds"); if (row == otherRow) @@ -76,7 +86,8 @@ template void Matrix::swapRows(unsigned row, unsigned otherRow) std::swap(at(row, col), at(otherRow, col)); } -template void Matrix::swapColumns(unsigned column, unsigned otherColumn) { +template +void Matrix::swapColumns(unsigned column, unsigned otherColumn) { assert((column < getNumColumns() && otherColumn < getNumColumns()) && "Given column out of bounds"); if (column == otherColumn) @@ -85,23 +96,30 @@ template void Matrix::swapColumns(unsigned column, unsigned othe std::swap(at(row, column), at(row, otherColumn)); } -template MutableArrayRef Matrix::getRow(unsigned row) { +template +MutableArrayRef Matrix::getRow(unsigned row) { return {&data[row * nReservedColumns], nColumns}; } -template ArrayRef Matrix::getRow(unsigned row) const { +template +ArrayRef Matrix::getRow(unsigned row) const { return {&data[row * nReservedColumns], nColumns}; } -template void Matrix::setRow(unsigned row, ArrayRef elems) { +template +void Matrix::setRow(unsigned row, ArrayRef elems) { assert(elems.size() == getNumColumns() && "elems size must match row length!"); for (unsigned i = 0, e = getNumColumns(); i < e; ++i) at(row, i) = elems[i]; } -template void Matrix::insertColumn(unsigned pos) { insertColumns(pos, 1); } -template void Matrix::insertColumns(unsigned pos, unsigned count) { +template +void Matrix::insertColumn(unsigned pos) { + insertColumns(pos, 1); +} +template +void Matrix::insertColumns(unsigned pos, unsigned count) { if (count == 0) return; assert(pos <= nColumns); @@ -142,8 +160,12 @@ template void Matrix::insertColumns(unsigned pos, unsigned count } } -template void Matrix::removeColumn(unsigned pos) { removeColumns(pos, 1); } -template void Matrix::removeColumns(unsigned pos, unsigned count) { +template +void Matrix::removeColumn(unsigned pos) { + removeColumns(pos, 1); +} +template +void Matrix::removeColumns(unsigned pos, unsigned count) { if (count == 0) return; assert(pos + count - 1 < nColumns); @@ -156,8 +178,12 @@ template void Matrix::removeColumns(unsigned pos, unsigned count nColumns -= count; } -template void Matrix::insertRow(unsigned pos) { insertRows(pos, 1); } -template void Matrix::insertRows(unsigned pos, unsigned count) { +template +void Matrix::insertRow(unsigned pos) { + insertRows(pos, 1); +} +template +void Matrix::insertRows(unsigned pos, unsigned count) { if (count == 0) return; @@ -170,8 +196,12 @@ template void Matrix::insertRows(unsigned pos, unsigned count) { at(r, c) = 0; } -template void Matrix::removeRow(unsigned pos) { removeRows(pos, 1); } -template void Matrix::removeRows(unsigned pos, unsigned count) { +template +void Matrix::removeRow(unsigned pos) { + removeRows(pos, 1); +} +template +void Matrix::removeRows(unsigned pos, unsigned count) { if (count == 0) return; assert(pos + count - 1 <= nRows); @@ -180,50 +210,57 @@ template void Matrix::removeRows(unsigned pos, unsigned count) { resizeVertically(nRows - count); } -template void Matrix::copyRow(unsigned sourceRow, unsigned targetRow) { +template +void Matrix::copyRow(unsigned sourceRow, unsigned targetRow) { if (sourceRow == targetRow) return; for (unsigned c = 0; c < nColumns; ++c) at(targetRow, c) = at(sourceRow, c); } -template void Matrix::fillRow(unsigned row, const T &value) { +template +void Matrix::fillRow(unsigned row, const T &value) { for (unsigned col = 0; col < nColumns; ++col) at(row, col) = value; } -template void Matrix::addToRow(unsigned sourceRow, unsigned targetRow, - const T &scale) { +template +void Matrix::addToRow(unsigned sourceRow, unsigned targetRow, + const T &scale) { addToRow(targetRow, getRow(sourceRow), scale); } -template void Matrix::addToRow(unsigned row, ArrayRef rowVec, - const T &scale) { +template +void Matrix::addToRow(unsigned row, ArrayRef rowVec, const T &scale) { if (scale == 0) return; for (unsigned col = 0; col < nColumns; ++col) at(row, col) += scale * rowVec[col]; } -template void Matrix::addToColumn(unsigned sourceColumn, unsigned targetColumn, - const T &scale) { +template +void Matrix::addToColumn(unsigned sourceColumn, unsigned targetColumn, + const T &scale) { if (scale == 0) return; for (unsigned row = 0, e = getNumRows(); row < e; ++row) at(row, targetColumn) += scale * at(row, sourceColumn); } -template void Matrix::negateColumn(unsigned column) { +template +void Matrix::negateColumn(unsigned column) { for (unsigned row = 0, e = getNumRows(); row < e; ++row) at(row, column) = -at(row, column); } -template void Matrix::negateRow(unsigned row) { +template +void Matrix::negateRow(unsigned row) { for (unsigned column = 0, e = getNumColumns(); column < e; ++column) at(row, column) = -at(row, column); } -template SmallVector Matrix::preMultiplyWithRow(ArrayRef rowVec) const { +template +SmallVector Matrix::preMultiplyWithRow(ArrayRef rowVec) const { assert(rowVec.size() == getNumRows() && "Invalid row vector dimension!"); SmallVector result(getNumColumns(), T(0)); @@ -233,8 +270,8 @@ template SmallVector Matrix::preMultiplyWithRow(ArrayRef SmallVector -Matrix::postMultiplyWithColumn(ArrayRef colVec) const { +template +SmallVector Matrix::postMultiplyWithColumn(ArrayRef colVec) const { assert(getNumColumns() == colVec.size() && "Invalid column vector dimension!"); @@ -250,8 +287,9 @@ Matrix::postMultiplyWithColumn(ArrayRef colVec) const { /// sourceCol. This brings M(row, targetCol) to the range [0, M(row, /// sourceCol)). Apply the same column operation to otherMatrix, with the same /// integer multiple. -static void modEntryColumnOperation(Matrix &m, unsigned row, unsigned sourceCol, - unsigned targetCol, Matrix &otherMatrix) { +static void modEntryColumnOperation(Matrix &m, unsigned row, + unsigned sourceCol, unsigned targetCol, + Matrix &otherMatrix) { assert(m(row, sourceCol) != 0 && "Cannot divide by zero!"); assert(m(row, sourceCol) > 0 && "Source must be positive!"); MPInt ratio = -floorDiv(m(row, targetCol), m(row, sourceCol)); @@ -259,7 +297,8 @@ static void modEntryColumnOperation(Matrix &m, unsigned row, unsigned sou otherMatrix.addToColumn(sourceCol, targetCol, ratio); } -template void Matrix::print(raw_ostream &os) const { +template +void Matrix::print(raw_ostream &os) const { for (unsigned row = 0; row < nRows; ++row) { for (unsigned column = 0; column < nColumns; ++column) os << at(row, column) << ' '; @@ -267,9 +306,13 @@ template void Matrix::print(raw_ostream &os) const { } } -template void Matrix::dump() const { print(llvm::errs()); } +template +void Matrix::dump() const { + print(llvm::errs()); +} -template bool Matrix::hasConsistentState() const { +template +bool Matrix::hasConsistentState() const { if (data.size() != nRows * nReservedColumns) return false; if (nColumns > nReservedColumns) @@ -287,8 +330,8 @@ namespace mlir { namespace presburger { template class Matrix; template class Matrix; -} -} +} // namespace presburger +} // namespace mlir IntMatrix IntMatrix::identity(unsigned dimension) { IntMatrix matrix(dimension, dimension); @@ -297,7 +340,6 @@ IntMatrix IntMatrix::identity(unsigned dimension) { return matrix; } - std::pair IntMatrix::computeHermiteNormalForm() const { // We start with u as an identity matrix and perform operations on h until h // is in hermite normal form. We apply the same sequence of operations on u to diff --git a/mlir/lib/Analysis/Presburger/PresburgerRelation.cpp b/mlir/lib/Analysis/Presburger/PresburgerRelation.cpp index 0b3f6a3912885..5a9cf71fc8679 100644 --- a/mlir/lib/Analysis/Presburger/PresburgerRelation.cpp +++ b/mlir/lib/Analysis/Presburger/PresburgerRelation.cpp @@ -43,7 +43,7 @@ void PresburgerRelation::convertVarKind(VarKind srcKind, unsigned srcPos, unsigned num, VarKind dstKind, unsigned dstPos) { assert(srcKind != VarKind::Local && dstKind != VarKind::Local && - "srcKind/dstKind cannot be local"); + "srcKind/dstKind cannot be local"); assert(srcKind != dstKind && "cannot convert variables to the same kind"); assert(srcPos + num <= space.getNumVarKind(srcKind) && "invalid range for source variables"); @@ -636,17 +636,13 @@ bool PresburgerRelation::isPlainEqual(const PresburgerRelation &set) const { /// one unconstrained disjunct, indicating the absence of constraints or /// conditions. bool PresburgerRelation::isPlainUniverse() const { - for (auto &disjunct : getAllDisjuncts()) { - if (disjunct.getNumConstraints() == 0) - return true; - } - return false; + return llvm::any_of(getAllDisjuncts(), [](const IntegerRelation &disjunct) { + return disjunct.getNumConstraints() == 0; + }); } bool PresburgerRelation::isConvexNoLocals() const { - if (getNumDisjuncts() == 1 && getSpace().getNumLocalVars() == 0) - return true; - return false; + return getNumDisjuncts() == 1 && getSpace().getNumLocalVars() == 0; } /// Return true if there is no disjunct, false otherwise. @@ -823,8 +819,8 @@ PresburgerRelation SetCoalescer::coalesce() { } PresburgerRelation newSet = PresburgerRelation::getEmpty(space); - for (unsigned i = 0, e = disjuncts.size(); i < e; ++i) - newSet.unionInPlace(disjuncts[i]); + for (const IntegerRelation &disjunct : disjuncts) + newSet.unionInPlace(disjunct); return newSet; } diff --git a/mlir/lib/Analysis/Presburger/Utils.cpp b/mlir/lib/Analysis/Presburger/Utils.cpp index e7fd2843b93a3..9aef2f5de1093 100644 --- a/mlir/lib/Analysis/Presburger/Utils.cpp +++ b/mlir/lib/Analysis/Presburger/Utils.cpp @@ -502,8 +502,8 @@ void DivisionRepr::print(raw_ostream &os) const { os << "Dividends:\n"; dividends.print(os); os << "Denominators\n"; - for (unsigned i = 0, e = denoms.size(); i < e; ++i) - os << denoms[i] << " "; + for (const MPInt &denom : denoms) + os << denom << " "; os << "\n"; } From afdb18df4d43d94225a941056e4fe02fbb6e8c93 Mon Sep 17 00:00:00 2001 From: JolantaJensen Date: Fri, 13 Oct 2023 12:10:21 +0100 Subject: [PATCH 071/720] [NFC][AArch64][LV] Reorganise LV tests using symbols from SLEEF (#68207) The tests introduced by https://reviews.llvm.org/D134719 and later modified in https://reviews.llvm.org/D146839 are not testing LV in isolation. This patch: 1. Assures that all tests test LV in isolation. 2. Adds LV tests using llvm intrinsics that have libm mappings. llrint, llround and lrint are not included as currently IR verifier pass does not allow to use vector types with them. --- .../AArch64/sleef-calls-aarch64.ll | 934 +++++++----- .../AArch64/sleef-intrinsic-calls-aarch64.ll | 1311 +++++++++++++++++ 2 files changed, 1890 insertions(+), 355 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll index d25e24efd5a23..d7dc122edaf7e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll @@ -1,21 +1,26 @@ -; Do NOT use -O3. It will lower exp2 to ldexp, and the test will fail. -; RUN: opt -vector-library=sleefgnuabi -replace-with-veclib < %s | opt -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-unroll,loop-vectorize -S | FileCheck %s --check-prefixes=CHECK,NEON -; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -replace-with-veclib < %s | opt -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-unroll,loop-vectorize -S | FileCheck %s --check-prefixes=CHECK,SVE +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(_)|(cos|exp[^e]|fmod|gamma|log|pow|sin|sqrt|tan)|(ret)" --version 2 +; RUN: opt -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=NEON +; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=SVE target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" -declare double @acos(double) #0 -declare float @acosf(float) #0 -declare double @llvm.acos.f64(double) #0 -declare float @llvm.acos.f32(float) #0 +declare double @acos(double) +declare float @acosf(float) define void @acos_f64(double* nocapture %varray) { - ; CHECK-LABEL: @acos_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_acos(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_acos( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @acos_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_acos(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @acos(double [[CONV:%.*]]) #[[ATTR0:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @acos_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0:[0-9]+]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_acos( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @acos(double [[CONV:%.*]]) #[[ATTR2:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -35,11 +40,18 @@ define void @acos_f64(double* nocapture %varray) { } define void @acos_f32(float* nocapture %varray) { - ; CHECK-LABEL: @acos_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_acosf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_acosf( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @acos_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_acosf(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @acosf(float [[CONV:%.*]]) #[[ATTR1:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @acos_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_acosf( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @acosf(float [[CONV:%.*]]) #[[ATTR3:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -58,17 +70,22 @@ define void @acos_f32(float* nocapture %varray) { ret void } -declare double @asin(double) #0 -declare float @asinf(float) #0 -declare double @llvm.asin.f64(double) #0 -declare float @llvm.asin.f32(float) #0 +declare double @asin(double) +declare float @asinf(float) define void @asin_f64(double* nocapture %varray) { - ; CHECK-LABEL: @asin_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_asin(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_asin( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @asin_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_asin(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @asin(double [[CONV:%.*]]) #[[ATTR2:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @asin_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_asin( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @asin(double [[CONV:%.*]]) #[[ATTR4:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -88,11 +105,18 @@ define void @asin_f64(double* nocapture %varray) { } define void @asin_f32(float* nocapture %varray) { - ; CHECK-LABEL: @asin_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_asinf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_asinf( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @asin_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_asinf(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @asinf(float [[CONV:%.*]]) #[[ATTR3:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @asin_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_asinf( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @asinf(float [[CONV:%.*]]) #[[ATTR5:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -111,17 +135,22 @@ define void @asin_f32(float* nocapture %varray) { ret void } -declare double @atan(double) #0 -declare float @atanf(float) #0 -declare double @llvm.atan.f64(double) #0 -declare float @llvm.atan.f32(float) #0 +declare double @atan(double) +declare float @atanf(float) define void @atan_f64(double* nocapture %varray) { - ; CHECK-LABEL: @atan_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_atan(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_atan( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @atan_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_atan(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @atan(double [[CONV:%.*]]) #[[ATTR4:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @atan_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_atan( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @atan(double [[CONV:%.*]]) #[[ATTR6:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -141,11 +170,18 @@ define void @atan_f64(double* nocapture %varray) { } define void @atan_f32(float* nocapture %varray) { - ; CHECK-LABEL: @atan_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_atanf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_atanf( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @atan_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_atanf(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @atanf(float [[CONV:%.*]]) #[[ATTR5:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @atan_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_atanf( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @atanf(float [[CONV:%.*]]) #[[ATTR7:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -164,17 +200,22 @@ define void @atan_f32(float* nocapture %varray) { ret void } -declare double @atan2(double, double) #0 -declare float @atan2f(float, float) #0 -declare double @llvm.atan2.f64(double, double) #0 -declare float @llvm.atan2.f32(float, float) #0 +declare double @atan2(double, double) +declare float @atan2f(float, float) define void @atan2_f64(double* nocapture %varray) { - ; CHECK-LABEL: @atan2_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[TMP4:%.*]], <2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxvv_atan2( [[TMP4:%.*]], [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @atan2_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[TMP1:%.*]], <2 x double> [[TMP1]]) +; NEON: [[CALL:%.*]] = tail call double @atan2(double [[CONV:%.*]], double [[CONV]]) #[[ATTR6:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @atan2_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_atan2( [[TMP11:%.*]], [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @atan2(double [[CONV:%.*]], double [[CONV]]) #[[ATTR8:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -194,11 +235,18 @@ define void @atan2_f64(double* nocapture %varray) { } define void @atan2_f32(float* nocapture %varray) { - ; CHECK-LABEL: @atan2_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[TMP4:%.*]], <4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxvv_atan2f( [[TMP4:%.*]], [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @atan2_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[TMP1:%.*]], <4 x float> [[TMP1]]) +; NEON: [[CALL:%.*]] = tail call float @atan2f(float [[CONV:%.*]], float [[CONV]]) #[[ATTR7:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @atan2_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_atan2f( [[TMP11:%.*]], [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @atan2f(float [[CONV:%.*]], float [[CONV]]) #[[ATTR9:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -217,17 +265,22 @@ define void @atan2_f32(float* nocapture %varray) { ret void } -declare double @atanh(double) #0 -declare float @atanhf(float) #0 -declare double @llvm.atanh.f64(double) #0 -declare float @llvm.atanh.f32(float) #0 +declare double @atanh(double) +declare float @atanhf(float) define void @atanh_f64(double* nocapture %varray) { - ; CHECK-LABEL: @atanh_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_atanh(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_atanh( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @atanh_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_atanh(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @atanh(double [[CONV:%.*]]) #[[ATTR8:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @atanh_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_atanh( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @atanh(double [[CONV:%.*]]) #[[ATTR10:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -247,11 +300,18 @@ define void @atanh_f64(double* nocapture %varray) { } define void @atanh_f32(float* nocapture %varray) { - ; CHECK-LABEL: @atanh_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_atanhf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_atanhf( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @atanh_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_atanhf(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @atanhf(float [[CONV:%.*]]) #[[ATTR9:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @atanh_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_atanhf( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @atanhf(float [[CONV:%.*]]) #[[ATTR11:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -270,17 +330,22 @@ define void @atanh_f32(float* nocapture %varray) { ret void } -declare double @cos(double) #0 -declare float @cosf(float) #0 -declare double @llvm.cos.f64(double) #0 -declare float @llvm.cos.f32(float) #0 +declare double @cos(double) +declare float @cosf(float) define void @cos_f64(double* nocapture %varray) { - ; CHECK-LABEL: @cos_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_cos( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @cos_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @cos(double [[CONV:%.*]]) #[[ATTR10:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @cos_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_cos( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @cos(double [[CONV:%.*]]) #[[ATTR12:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -300,11 +365,18 @@ define void @cos_f64(double* nocapture %varray) { } define void @cos_f32(float* nocapture %varray) { - ; CHECK-LABEL: @cos_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_cosf( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @cos_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @cosf(float [[CONV:%.*]]) #[[ATTR11:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @cos_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_cosf( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @cosf(float [[CONV:%.*]]) #[[ATTR13:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -323,17 +395,22 @@ define void @cos_f32(float* nocapture %varray) { ret void } -declare double @cosh(double) #0 -declare float @coshf(float) #0 -declare double @llvm.cosh.f64(double) #0 -declare float @llvm.cosh.f32(float) #0 +declare double @cosh(double) +declare float @coshf(float) define void @cosh_f64(double* nocapture %varray) { - ; CHECK-LABEL: @cosh_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_cosh(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_cosh( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @cosh_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_cosh(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @cosh(double [[CONV:%.*]]) #[[ATTR12:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @cosh_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_cosh( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @cosh(double [[CONV:%.*]]) #[[ATTR14:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -353,11 +430,18 @@ define void @cosh_f64(double* nocapture %varray) { } define void @cosh_f32(float* nocapture %varray) { - ; CHECK-LABEL: @cosh_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_coshf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_coshf( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @cosh_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_coshf(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @coshf(float [[CONV:%.*]]) #[[ATTR13:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @cosh_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_coshf( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @coshf(float [[CONV:%.*]]) #[[ATTR15:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -376,17 +460,22 @@ define void @cosh_f32(float* nocapture %varray) { ret void } -declare double @exp(double) #0 -declare float @expf(float) #0 -declare double @llvm.exp.f64(double) #0 -declare float @llvm.exp.f32(float) #0 +declare double @exp(double) +declare float @expf(float) define void @exp_f64(double* nocapture %varray) { - ; CHECK-LABEL: @exp_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_exp( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @exp_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @exp(double [[CONV:%.*]]) #[[ATTR14:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @exp_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_exp( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @exp(double [[CONV:%.*]]) #[[ATTR16:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -406,11 +495,18 @@ define void @exp_f64(double* nocapture %varray) { } define void @exp_f32(float* nocapture %varray) { - ; CHECK-LABEL: @exp_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_expf( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @exp_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @expf(float [[CONV:%.*]]) #[[ATTR15:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @exp_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_expf( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @expf(float [[CONV:%.*]]) #[[ATTR17:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -429,17 +525,22 @@ define void @exp_f32(float* nocapture %varray) { ret void } -declare double @exp2(double) #0 -declare float @exp2f(float) #0 -declare double @llvm.exp2.f64(double) #0 -declare float @llvm.exp2.f32(float) #0 +declare double @exp2(double) +declare float @exp2f(float) define void @exp2_f64(double* nocapture %varray) { - ; CHECK-LABEL: @exp2_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_exp2( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @exp2_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @exp2(double [[CONV:%.*]]) #[[ATTR16:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @exp2_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_exp2( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @exp2(double [[CONV:%.*]]) #[[ATTR18:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -459,11 +560,18 @@ define void @exp2_f64(double* nocapture %varray) { } define void @exp2_f32(float* nocapture %varray) { - ; CHECK-LABEL: @exp2_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_exp2f( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @exp2_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @exp2f(float [[CONV:%.*]]) #[[ATTR17:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @exp2_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_exp2f( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @exp2f(float [[CONV:%.*]]) #[[ATTR19:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -482,17 +590,22 @@ define void @exp2_f32(float* nocapture %varray) { ret void } -declare double @exp10(double) #0 -declare float @exp10f(float) #0 -declare double @llvm.exp10.f64(double) #0 -declare float @llvm.exp10.f32(float) #0 +declare double @exp10(double) +declare float @exp10f(float) define void @exp10_f64(double* nocapture %varray) { - ; CHECK-LABEL: @exp10_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_exp10( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @exp10_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @exp10(double [[CONV:%.*]]) #[[ATTR18:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @exp10_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_exp10( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @exp10(double [[CONV:%.*]]) #[[ATTR20:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -512,11 +625,18 @@ define void @exp10_f64(double* nocapture %varray) { } define void @exp10_f32(float* nocapture %varray) { - ; CHECK-LABEL: @exp10_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_exp10f( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @exp10_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @exp10f(float [[CONV:%.*]]) #[[ATTR19:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @exp10_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_exp10f( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @exp10f(float [[CONV:%.*]]) #[[ATTR21:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -535,14 +655,25 @@ define void @exp10_f32(float* nocapture %varray) { ret void } -declare double @fmod(double, double) #0 -declare float @fmodf(float, float) #0 +; There are no TLI mappings to fixed vector functions for fmod and fmodf. + +declare double @fmod(double, double) +declare float @fmodf(float, float) define void @fmod_f64(double* nocapture %varray) { - ; CHECK-LABEL: @fmod_f64( - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxvv_fmod( [[TMP4:%.*]], [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @fmod_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP3:%.*]] = tail call double @fmod(double [[TMP2:%.*]], double [[TMP2]]) #[[ATTR20:[0-9]+]] +; NEON: [[TMP5:%.*]] = tail call double @fmod(double [[TMP4:%.*]], double [[TMP4]]) #[[ATTR20]] +; NEON: [[CALL:%.*]] = tail call double @fmod(double [[CONV:%.*]], double [[CONV]]) #[[ATTR20]] +; NEON: ret void +; +; SVE-LABEL: define void @fmod_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_fmod( [[TMP11:%.*]], [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @fmod(double [[CONV:%.*]], double [[CONV]]) #[[ATTR22:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -562,10 +693,19 @@ define void @fmod_f64(double* nocapture %varray) { } define void @fmod_f32(float* nocapture %varray) { - ; CHECK-LABEL: @fmod_f32( - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxvv_fmodf( [[TMP4:%.*]], [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @fmod_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP3:%.*]] = tail call float @fmodf(float [[TMP2:%.*]], float [[TMP2]]) #[[ATTR21:[0-9]+]] +; NEON: [[TMP5:%.*]] = tail call float @fmodf(float [[TMP4:%.*]], float [[TMP4]]) #[[ATTR21]] +; NEON: [[CALL:%.*]] = tail call float @fmodf(float [[CONV:%.*]], float [[CONV]]) #[[ATTR21]] +; NEON: ret void +; +; SVE-LABEL: define void @fmod_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_fmodf( [[TMP11:%.*]], [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @fmodf(float [[CONV:%.*]], float [[CONV]]) #[[ATTR23:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -584,17 +724,22 @@ define void @fmod_f32(float* nocapture %varray) { ret void } -declare double @lgamma(double) #0 -declare float @lgammaf(float) #0 -declare double @llvm.lgamma.f64(double) #0 -declare float @llvm.lgamma.f32(float) #0 +declare double @lgamma(double) +declare float @lgammaf(float) define void @lgamma_f64(double* nocapture %varray) { - ; CHECK-LABEL: @lgamma_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_lgamma(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_lgamma( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @lgamma_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_lgamma(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @lgamma(double [[CONV:%.*]]) #[[ATTR22:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @lgamma_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_lgamma( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @lgamma(double [[CONV:%.*]]) #[[ATTR24:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -614,11 +759,18 @@ define void @lgamma_f64(double* nocapture %varray) { } define void @lgamma_f32(float* nocapture %varray) { - ; CHECK-LABEL: @lgamma_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_lgammaf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_lgammaf( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @lgamma_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_lgammaf(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @lgammaf(float [[CONV:%.*]]) #[[ATTR23:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @lgamma_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_lgammaf( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @lgammaf(float [[CONV:%.*]]) #[[ATTR25:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -637,17 +789,22 @@ define void @lgamma_f32(float* nocapture %varray) { ret void } -declare double @log10(double) #0 -declare float @log10f(float) #0 -declare double @llvm.log10.f64(double) #0 -declare float @llvm.log10.f32(float) #0 +declare double @log10(double) +declare float @log10f(float) define void @log10_f64(double* nocapture %varray) { - ; CHECK-LABEL: @log10_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_log10( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @log10_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @log10(double [[CONV:%.*]]) #[[ATTR24:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @log10_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log10( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @log10(double [[CONV:%.*]]) #[[ATTR26:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -667,11 +824,18 @@ define void @log10_f64(double* nocapture %varray) { } define void @log10_f32(float* nocapture %varray) { - ; CHECK-LABEL: @log10_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_log10f( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @log10_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @log10f(float [[CONV:%.*]]) #[[ATTR25:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @log10_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log10f( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @log10f(float [[CONV:%.*]]) #[[ATTR27:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -690,17 +854,22 @@ define void @log10_f32(float* nocapture %varray) { ret void } -declare double @log2(double) #0 -declare float @log2f(float) #0 -declare double @llvm.log2.f64(double) #0 -declare float @llvm.log2.f32(float) #0 +declare double @log2(double) +declare float @log2f(float) define void @log2_f64(double* nocapture %varray) { - ; CHECK-LABEL: @log2_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_log2( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @log2_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @log2(double [[CONV:%.*]]) #[[ATTR26:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @log2_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log2( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @log2(double [[CONV:%.*]]) #[[ATTR28:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -720,11 +889,18 @@ define void @log2_f64(double* nocapture %varray) { } define void @log2_f32(float* nocapture %varray) { - ; CHECK-LABEL: @log2_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_log2f( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @log2_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @log2f(float [[CONV:%.*]]) #[[ATTR27:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @log2_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log2f( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @log2f(float [[CONV:%.*]]) #[[ATTR29:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -743,17 +919,22 @@ define void @log2_f32(float* nocapture %varray) { ret void } -declare double @log(double) #0 -declare float @logf(float) #0 -declare double @llvm.log.f64(double) #0 -declare float @llvm.log.f32(float) #0 +declare double @log(double) +declare float @logf(float) define void @log_f64(double* nocapture %varray) { - ; CHECK-LABEL: @log_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_log( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @log_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @log(double [[CONV:%.*]]) #[[ATTR28:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @log_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @log(double [[CONV:%.*]]) #[[ATTR30:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -773,11 +954,18 @@ define void @log_f64(double* nocapture %varray) { } define void @log_f32(float* nocapture %varray) { - ; CHECK-LABEL: @log_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_logf( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @log_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @logf(float [[CONV:%.*]]) #[[ATTR29:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @log_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_logf( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @logf(float [[CONV:%.*]]) #[[ATTR31:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -796,17 +984,22 @@ define void @log_f32(float* nocapture %varray) { ret void } -declare double @pow(double, double) #0 -declare float @powf(float, float) #0 -declare double @llvm.pow.f64(double, double) #0 -declare float @llvm.pow.f32(float, float) #0 +declare double @pow(double, double) +declare float @powf(float, float) define void @pow_f64(double* nocapture %varray) { - ; CHECK-LABEL: @pow_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[TMP4:%.*]], <2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxvv_pow( [[TMP4:%.*]], [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @pow_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[TMP1:%.*]], <2 x double> [[TMP1]]) +; NEON: [[CALL:%.*]] = tail call double @pow(double [[CONV:%.*]], double [[CONV]]) #[[ATTR30:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @pow_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_pow( [[TMP11:%.*]], [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @pow(double [[CONV:%.*]], double [[CONV]]) #[[ATTR32:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -826,11 +1019,18 @@ define void @pow_f64(double* nocapture %varray) { } define void @pow_f32(float* nocapture %varray) { - ; CHECK-LABEL: @pow_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxvv_powf( [[TMP4:%.*]], [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @pow_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[TMP1:%.*]], <4 x float> [[TMP1]]) +; NEON: [[CALL:%.*]] = tail call float @powf(float [[CONV:%.*]], float [[CONV]]) #[[ATTR31:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @pow_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_powf( [[TMP11:%.*]], [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @powf(float [[CONV:%.*]], float [[CONV]]) #[[ATTR33:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -849,17 +1049,22 @@ define void @pow_f32(float* nocapture %varray) { ret void } -declare double @sin(double) #0 -declare float @sinf(float) #0 -declare double @llvm.sin.f64(double) #0 -declare float @llvm.sin.f32(float) #0 +declare double @sin(double) +declare float @sinf(float) define void @sin_f64(double* nocapture %varray) { - ; CHECK-LABEL: @sin_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_sin( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @sin_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @sin(double [[CONV:%.*]]) #[[ATTR32:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @sin_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_sin( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @sin(double [[CONV:%.*]]) #[[ATTR34:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -879,11 +1084,18 @@ define void @sin_f64(double* nocapture %varray) { } define void @sin_f32(float* nocapture %varray) { - ; CHECK-LABEL: @sin_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_sinf( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @sin_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @sinf(float [[CONV:%.*]]) #[[ATTR33:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @sin_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_sinf( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @sinf(float [[CONV:%.*]]) #[[ATTR35:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -902,17 +1114,22 @@ define void @sin_f32(float* nocapture %varray) { ret void } -declare double @sinh(double) #0 -declare float @sinhf(float) #0 -declare double @llvm.sinh.f64(double) #0 -declare float @llvm.sinh.f32(float) #0 +declare double @sinh(double) +declare float @sinhf(float) define void @sinh_f64(double* nocapture %varray) { - ; CHECK-LABEL: @sinh_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_sinh(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_sinh( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @sinh_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sinh(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @sinh(double [[CONV:%.*]]) #[[ATTR34:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @sinh_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_sinh( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @sinh(double [[CONV:%.*]]) #[[ATTR36:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -932,11 +1149,18 @@ define void @sinh_f64(double* nocapture %varray) { } define void @sinh_f32(float* nocapture %varray) { - ; CHECK-LABEL: @sinh_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_sinhf( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @sinh_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @sinhf(float [[CONV:%.*]]) #[[ATTR35:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @sinh_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_sinhf( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @sinhf(float [[CONV:%.*]]) #[[ATTR37:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -955,17 +1179,22 @@ define void @sinh_f32(float* nocapture %varray) { ret void } -declare double @sqrt(double) #0 -declare float @sqrtf(float) #0 -declare double @llvm.sqrt.f64(double) #0 -declare float @llvm.sqrt.f32(float) #0 +declare double @sqrt(double) +declare float @sqrtf(float) define void @sqrt_f64(double* nocapture %varray) { - ; CHECK-LABEL: @sqrt_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_sqrt(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_sqrt( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @sqrt_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sqrt(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @sqrt(double [[CONV:%.*]]) #[[ATTR36:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @sqrt_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_sqrt( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @sqrt(double [[CONV:%.*]]) #[[ATTR38:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -985,11 +1214,18 @@ define void @sqrt_f64(double* nocapture %varray) { } define void @sqrt_f32(float* nocapture %varray) { - ; CHECK-LABEL: @sqrt_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_sqrtf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_sqrtf( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @sqrt_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sqrtf(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @sqrtf(float [[CONV:%.*]]) #[[ATTR37:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @sqrt_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_sqrtf( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @sqrtf(float [[CONV:%.*]]) #[[ATTR39:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -1008,65 +1244,22 @@ define void @sqrt_f32(float* nocapture %varray) { ret void } -define void @llvm_sqrt_f64(double* nocapture %varray) { - ; CHECK-LABEL: @llvm_sqrt_f64( - ; NEON: [[TMP5:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call fast @llvm.sqrt.nxv2f64( [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: - br label %for.body - - for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %tmp = trunc i64 %iv to i32 - %conv = sitofp i32 %tmp to double - %call = tail call fast double @llvm.sqrt.f64(double %conv) - %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv - store double %call, double* %arrayidx, align 8 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body - - for.end: - ret void -} - -define void @llvm_sqrt_f32(float* nocapture %varray) { - ; CHECK-LABEL: @llvm_sqrt_f32( - ; NEON: [[TMP5:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call fast @llvm.sqrt.nxv4f32( [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: - br label %for.body - - for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %tmp = trunc i64 %iv to i32 - %conv = sitofp i32 %tmp to float - %call = tail call fast float @llvm.sqrt.f32(float %conv) - %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv - store float %call, float* %arrayidx, align 4 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body - - for.end: - ret void -} - -declare double @tan(double) #0 -declare float @tanf(float) #0 -declare double @llvm.tan.f64(double) #0 -declare float @llvm.tan.f32(float) #0 +declare double @tan(double) +declare float @tanf(float) define void @tan_f64(double* nocapture %varray) { - ; CHECK-LABEL: @tan_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_tan(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_tan( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @tan_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_tan(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @tan(double [[CONV:%.*]]) #[[ATTR38:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @tan_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_tan( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @tan(double [[CONV:%.*]]) #[[ATTR40:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -1086,11 +1279,18 @@ define void @tan_f64(double* nocapture %varray) { } define void @tan_f32(float* nocapture %varray) { - ; CHECK-LABEL: @tan_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_tanf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_tanf( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @tan_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_tanf(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @tanf(float [[CONV:%.*]]) #[[ATTR39:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @tan_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_tanf( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @tanf(float [[CONV:%.*]]) #[[ATTR41:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -1109,17 +1309,22 @@ define void @tan_f32(float* nocapture %varray) { ret void } -declare double @tanh(double) #0 -declare float @tanhf(float) #0 -declare double @llvm.tanh.f64(double) #0 -declare float @llvm.tanh.f32(float) #0 +declare double @tanh(double) +declare float @tanhf(float) define void @tanh_f64(double* nocapture %varray) { - ; CHECK-LABEL: @tanh_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_tanh(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_tanh( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @tanh_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_tanh(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @tanh(double [[CONV:%.*]]) #[[ATTR40:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @tanh_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_tanh( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @tanh(double [[CONV:%.*]]) #[[ATTR42:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -1139,11 +1344,18 @@ define void @tanh_f64(double* nocapture %varray) { } define void @tanh_f32(float* nocapture %varray) { - ; CHECK-LABEL: @tanh_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_tanhf( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @tanh_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @tanhf(float [[CONV:%.*]]) #[[ATTR41:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @tanh_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_tanhf( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @tanhf(float [[CONV:%.*]]) #[[ATTR43:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -1162,17 +1374,22 @@ define void @tanh_f32(float* nocapture %varray) { ret void } -declare double @tgamma(double) #0 -declare float @tgammaf(float) #0 -declare double @llvm.tgamma.f64(double) #0 -declare float @llvm.tgamma.f32(float) #0 +declare double @tgamma(double) +declare float @tgammaf(float) define void @tgamma_f64(double* nocapture %varray) { - ; CHECK-LABEL: @tgamma_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_tgamma(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_tgamma( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @tgamma_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_tgamma(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @tgamma(double [[CONV:%.*]]) #[[ATTR42:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @tgamma_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_tgamma( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @tgamma(double [[CONV:%.*]]) #[[ATTR44:[0-9]+]] +; SVE: ret void +; entry: br label %for.body @@ -1192,11 +1409,18 @@ define void @tgamma_f64(double* nocapture %varray) { } define void @tgamma_f32(float* nocapture %varray) { - ; CHECK-LABEL: @tgamma_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_tgammaf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_tgammaf( [[TMP4:%.*]], {{.*}}) - ; CHECK: ret void - ; +; NEON-LABEL: define void @tgamma_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_tgammaf(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @tgammaf(float [[CONV:%.*]]) #[[ATTR43:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @tgamma_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_tgammaf( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @tgammaf(float [[CONV:%.*]]) #[[ATTR45:[0-9]+]] +; SVE: ret void +; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll new file mode 100644 index 0000000000000..715c2c352b776 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll @@ -0,0 +1,1311 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(\.|_)(ceil|copysign|cos|exp[^e]|exp2|fabs|floor|fma|log|m..num|pow|.*int|round|sin|sqrt|trunc)|(ret)" --version 2 +; RUN: opt -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=NEON +; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=SVE + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + + +; Tests are checking if LV can vectorize loops with llvm math intrinsics using mappings +; from TLI (if such mappings exist) for scalable and fixed width vectors. + +declare double @llvm.ceil.f64(double) +declare float @llvm.ceil.f32(float) + +define void @llvm_ceil_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_ceil_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.ceil.f64(double [[CONV:%.*]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_ceil_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1:[0-9]+]] { +; SVE: [[TMP12:%.*]] = call @llvm.ceil.nxv2f64( [[TMP11:%.*]]) +; SVE: [[CALL:%.*]] = tail call double @llvm.ceil.f64(double [[CONV:%.*]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.ceil.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_ceil_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_ceil_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.ceil.f32(float [[CONV:%.*]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_ceil_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.ceil.nxv4f32( [[TMP11:%.*]]) +; SVE: [[CALL:%.*]] = tail call float @llvm.ceil.f32(float [[CONV:%.*]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.ceil.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.copysign.f64(double, double) +declare float @llvm.copysign.f32(float, float) + +define void @llvm_copysign_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_copysign_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[TMP1:%.*]], <2 x double> [[TMP1]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.copysign.f64(double [[CONV:%.*]], double [[CONV]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_copysign_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.copysign.nxv2f64( [[TMP11:%.*]], [[TMP11]]) +; SVE: [[CALL:%.*]] = tail call double @llvm.copysign.f64(double [[CONV:%.*]], double [[CONV]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.copysign.f64(double %conv, double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_copysign_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_copysign_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[TMP1:%.*]], <4 x float> [[TMP1]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.copysign.f32(float [[CONV:%.*]], float [[CONV]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_copysign_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.copysign.nxv4f32( [[TMP11:%.*]], [[TMP11]]) +; SVE: [[CALL:%.*]] = tail call float @llvm.copysign.f32(float [[CONV:%.*]], float [[CONV]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.copysign.f32(float %conv, float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.cos.f64(double) +declare float @llvm.cos.f32(float) + +define void @llvm_cos_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_cos_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.cos.f64(double [[CONV:%.*]]) #[[ATTR1:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @llvm_cos_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_cos( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @llvm.cos.f64(double [[CONV:%.*]]) #[[ATTR4:[0-9]+]] +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.cos.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_cos_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_cos_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.cos.f32(float [[CONV:%.*]]) #[[ATTR2:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @llvm_cos_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_cosf( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @llvm.cos.f32(float [[CONV:%.*]]) #[[ATTR5:[0-9]+]] +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.cos.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.exp.f64(double) +declare float @llvm.exp.f32(float) + +define void @llvm_exp_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_exp_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.exp.f64(double [[CONV:%.*]]) #[[ATTR3:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @llvm_exp_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_exp( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @llvm.exp.f64(double [[CONV:%.*]]) #[[ATTR6:[0-9]+]] +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.exp.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_exp_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_exp_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.exp.f32(float [[CONV:%.*]]) #[[ATTR4:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @llvm_exp_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_expf( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @llvm.exp.f32(float [[CONV:%.*]]) #[[ATTR7:[0-9]+]] +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.exp.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.exp2.f64(double) +declare float @llvm.exp2.f32(float) + +define void @llvm_exp2_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_exp2_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.exp2.f64(double [[CONV:%.*]]) #[[ATTR5:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @llvm_exp2_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_exp2( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @llvm.exp2.f64(double [[CONV:%.*]]) #[[ATTR8:[0-9]+]] +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.exp2.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_exp2_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_exp2_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.exp2.f32(float [[CONV:%.*]]) #[[ATTR6:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @llvm_exp2_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_exp2f( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @llvm.exp2.f32(float [[CONV:%.*]]) #[[ATTR9:[0-9]+]] +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.exp2.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.fabs.f64(double) +declare float @llvm.fabs.f32(float) + +define void @llvm_fabs_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_fabs_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.fabs.f64(double [[CONV:%.*]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_fabs_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.fabs.nxv2f64( [[TMP11:%.*]]) +; SVE: [[CALL:%.*]] = tail call double @llvm.fabs.f64(double [[CONV:%.*]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.fabs.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + + +define void @llvm_fabs_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_fabs_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.fabs.f32(float [[CONV:%.*]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_fabs_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.fabs.nxv4f32( [[TMP11:%.*]]) +; SVE: [[CALL:%.*]] = tail call float @llvm.fabs.f32(float [[CONV:%.*]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.fabs.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.floor.f64(double) +declare float @llvm.floor.f32(float) + +define void @llvm_floor_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_floor_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.floor.f64(double [[CONV:%.*]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_floor_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.floor.nxv2f64( [[TMP11:%.*]]) +; SVE: [[CALL:%.*]] = tail call double @llvm.floor.f64(double [[CONV:%.*]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.floor.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_floor_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_floor_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.floor.f32(float [[CONV:%.*]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_floor_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.floor.nxv4f32( [[TMP11:%.*]]) +; SVE: [[CALL:%.*]] = tail call float @llvm.floor.f32(float [[CONV:%.*]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.floor.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.fma.f64(double, double, double) +declare float @llvm.fma.f32(float, float, float) + +define void @llvm_fma_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_fma_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP1:%.*]], <2 x double> [[TMP1]], <2 x double> [[TMP1]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.fma.f64(double [[CONV:%.*]], double [[CONV]], double [[CONV]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_fma_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.fma.nxv2f64( [[TMP11:%.*]], [[TMP11]], [[TMP11]]) +; SVE: [[CALL:%.*]] = tail call double @llvm.fma.f64(double [[CONV:%.*]], double [[CONV]], double [[CONV]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.fma.f64(double %conv, double %conv, double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_fma_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_fma_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP1:%.*]], <4 x float> [[TMP1]], <4 x float> [[TMP1]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.fma.f32(float [[CONV:%.*]], float [[CONV]], float [[CONV]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_fma_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.fma.nxv4f32( [[TMP11:%.*]], [[TMP11]], [[TMP11]]) +; SVE: [[CALL:%.*]] = tail call float @llvm.fma.f32(float [[CONV:%.*]], float [[CONV]], float [[CONV]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.fma.f32(float %conv, float %conv, float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.log.f64(double) +declare float @llvm.log.f32(float) + +define void @llvm_log_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_log_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.log.f64(double [[CONV:%.*]]) #[[ATTR7:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @llvm_log_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @llvm.log.f64(double [[CONV:%.*]]) #[[ATTR10:[0-9]+]] +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.log.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_log_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_log_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.log.f32(float [[CONV:%.*]]) #[[ATTR8:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @llvm_log_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_logf( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @llvm.log.f32(float [[CONV:%.*]]) #[[ATTR11:[0-9]+]] +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.log.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.log10.f64(double) +declare float @llvm.log10.f32(float) + +define void @llvm_log10_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_log10_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[CONV:%.*]]) #[[ATTR9:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @llvm_log10_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log10( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[CONV:%.*]]) #[[ATTR12:[0-9]+]] +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.log10.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_log10_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_log10_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.log10.f32(float [[CONV:%.*]]) #[[ATTR10:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @llvm_log10_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log10f( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @llvm.log10.f32(float [[CONV:%.*]]) #[[ATTR13:[0-9]+]] +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.log10.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.log2.f64(double) +declare float @llvm.log2.f32(float) + +define void @llvm_log2_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_log2_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.log2.f64(double [[CONV:%.*]]) #[[ATTR11:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @llvm_log2_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log2( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @llvm.log2.f64(double [[CONV:%.*]]) #[[ATTR14:[0-9]+]] +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.log2.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_log2_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_log2_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.log2.f32(float [[CONV:%.*]]) #[[ATTR12:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @llvm_log2_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log2f( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @llvm.log2.f32(float [[CONV:%.*]]) #[[ATTR15:[0-9]+]] +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.log2.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.maxnum.f64(double, double) +declare float @llvm.maxnum.f32(float, float) + +define void @llvm_maxnum_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_maxnum_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP1:%.*]], <2 x double> [[TMP1]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.maxnum.f64(double [[CONV:%.*]], double [[CONV]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_maxnum_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.maxnum.nxv2f64( [[TMP11:%.*]], [[TMP11]]) +; SVE: [[CALL:%.*]] = tail call double @llvm.maxnum.f64(double [[CONV:%.*]], double [[CONV]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.maxnum.f64(double %conv, double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_maxnum_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_maxnum_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP1:%.*]], <4 x float> [[TMP1]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.maxnum.f32(float [[CONV:%.*]], float [[CONV]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_maxnum_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.maxnum.nxv4f32( [[TMP11:%.*]], [[TMP11]]) +; SVE: [[CALL:%.*]] = tail call float @llvm.maxnum.f32(float [[CONV:%.*]], float [[CONV]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.maxnum.f32(float %conv, float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.minnum.f64(double, double) +declare float @llvm.minnum.f32(float, float) + +define void @llvm_minnum_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_minnum_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP1:%.*]], <2 x double> [[TMP1]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.minnum.f64(double [[CONV:%.*]], double [[CONV]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_minnum_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.minnum.nxv2f64( [[TMP11:%.*]], [[TMP11]]) +; SVE: [[CALL:%.*]] = tail call double @llvm.minnum.f64(double [[CONV:%.*]], double [[CONV]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.minnum.f64(double %conv, double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_minnum_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_minnum_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP1:%.*]], <4 x float> [[TMP1]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.minnum.f32(float [[CONV:%.*]], float [[CONV]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_minnum_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.minnum.nxv4f32( [[TMP11:%.*]], [[TMP11]]) +; SVE: [[CALL:%.*]] = tail call float @llvm.minnum.f32(float [[CONV:%.*]], float [[CONV]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.minnum.f32(float %conv, float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.nearbyint.f64(double) +declare float @llvm.nearbyint.f32(float) + +define void @llvm_nearbyint_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_nearbyint_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.nearbyint.f64(double [[CONV:%.*]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_nearbyint_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.nearbyint.nxv2f64( [[TMP11:%.*]]) +; SVE: [[CALL:%.*]] = tail call double @llvm.nearbyint.f64(double [[CONV:%.*]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.nearbyint.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_nearbyint_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_nearbyint_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.nearbyint.f32(float [[CONV:%.*]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_nearbyint_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.nearbyint.nxv4f32( [[TMP11:%.*]]) +; SVE: [[CALL:%.*]] = tail call float @llvm.nearbyint.f32(float [[CONV:%.*]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.nearbyint.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.pow.f64(double, double) +declare float @llvm.pow.f32(float, float) + +define void @llvm_pow_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_pow_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[TMP1:%.*]], <2 x double> [[TMP1]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.pow.f64(double [[CONV:%.*]], double [[CONV]]) #[[ATTR13:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @llvm_pow_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_pow( [[TMP11:%.*]], [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @llvm.pow.f64(double [[CONV:%.*]], double [[CONV]]) #[[ATTR16:[0-9]+]] +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.pow.f64(double %conv, double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_pow_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_pow_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[TMP1:%.*]], <4 x float> [[TMP1]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.pow.f32(float [[CONV:%.*]], float [[CONV]]) #[[ATTR14:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @llvm_pow_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_powf( [[TMP11:%.*]], [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @llvm.pow.f32(float [[CONV:%.*]], float [[CONV]]) #[[ATTR17:[0-9]+]] +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.pow.f32(float %conv, float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.rint.f64(double) +declare float @llvm.rint.f32(float) + +define void @llvm_rint_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_rint_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.rint.f64(double [[CONV:%.*]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_rint_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.rint.nxv2f64( [[TMP11:%.*]]) +; SVE: [[CALL:%.*]] = tail call double @llvm.rint.f64(double [[CONV:%.*]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.rint.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_rint_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_rint_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.rint.f32(float [[CONV:%.*]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_rint_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.rint.nxv4f32( [[TMP11:%.*]]) +; SVE: [[CALL:%.*]] = tail call float @llvm.rint.f32(float [[CONV:%.*]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.rint.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.round.f64(double) +declare float @llvm.round.f32(float) + +define void @llvm_round_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_round_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.round.f64(double [[CONV:%.*]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_round_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.round.nxv2f64( [[TMP11:%.*]]) +; SVE: [[CALL:%.*]] = tail call double @llvm.round.f64(double [[CONV:%.*]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.round.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_round_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_round_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.round.f32(float [[CONV:%.*]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_round_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.round.nxv4f32( [[TMP11:%.*]]) +; SVE: [[CALL:%.*]] = tail call float @llvm.round.f32(float [[CONV:%.*]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.round.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.sin.f64(double) +declare float @llvm.sin.f32(float) + +define void @llvm_sin_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_sin_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.sin.f64(double [[CONV:%.*]]) #[[ATTR15:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @llvm_sin_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_sin( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call double @llvm.sin.f64(double [[CONV:%.*]]) #[[ATTR18:[0-9]+]] +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.sin.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_sin_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_sin_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.sin.f32(float [[CONV:%.*]]) #[[ATTR16:[0-9]+]] +; NEON: ret void +; +; SVE-LABEL: define void @llvm_sin_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @_ZGVsMxv_sinf( [[TMP11:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE: [[CALL:%.*]] = tail call float @llvm.sin.f32(float [[CONV:%.*]]) #[[ATTR19:[0-9]+]] +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.sin.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.sqrt.f64(double) +declare float @llvm.sqrt.f32(float) + +define void @llvm_sqrt_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_sqrt_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.sqrt.f64(double [[CONV:%.*]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_sqrt_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.sqrt.nxv2f64( [[TMP11:%.*]]) +; SVE: [[CALL:%.*]] = tail call double @llvm.sqrt.f64(double [[CONV:%.*]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.sqrt.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_sqrt_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_sqrt_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.sqrt.f32(float [[CONV:%.*]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_sqrt_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.sqrt.nxv4f32( [[TMP11:%.*]]) +; SVE: [[CALL:%.*]] = tail call float @llvm.sqrt.f32(float [[CONV:%.*]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.sqrt.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.trunc.f64(double) +declare float @llvm.trunc.f32(float) + +define void @llvm_trunc_f64(double* nocapture %varray) { +; NEON-LABEL: define void @llvm_trunc_f64 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call double @llvm.trunc.f64(double [[CONV:%.*]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_trunc_f64 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.trunc.nxv2f64( [[TMP11:%.*]]) +; SVE: [[CALL:%.*]] = tail call double @llvm.trunc.f64(double [[CONV:%.*]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.trunc.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_trunc_f32(float* nocapture %varray) { +; NEON-LABEL: define void @llvm_trunc_f32 +; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) { +; NEON: [[TMP2:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP1:%.*]]) +; NEON: [[CALL:%.*]] = tail call float @llvm.trunc.f32(float [[CONV:%.*]]) +; NEON: ret void +; +; SVE-LABEL: define void @llvm_trunc_f32 +; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] { +; SVE: [[TMP12:%.*]] = call @llvm.trunc.nxv4f32( [[TMP11:%.*]]) +; SVE: [[CALL:%.*]] = tail call float @llvm.trunc.f32(float [[CONV:%.*]]) +; SVE: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.trunc.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} From dbb9faedec5e28ab3f584f5e14d31e475ac268ac Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Fri, 13 Oct 2023 08:38:43 +0100 Subject: [PATCH 072/720] Re-apply '[AArch64] Enable "sink-and-fold" in MachineSink by default (#67432)' This re-applies commit a9d0ab2ee572f179f80483f3ebbbcdd03c3b4481, which was reverted by 8abb2ace888bdd04a1bdb4ac2f2fc25d57a5760a. The issue was fixed by 7510f32f906ab4e583542eae2611b020f88629af --- llvm/lib/Target/AArch64/AArch64TargetMachine.cpp | 2 +- llvm/test/CodeGen/AArch64/machine-sink-cache-invalidation.ll | 3 +-- llvm/test/CodeGen/AArch64/sink-and-fold.ll | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 3d818c76bd4b7..fcc30a7cfceaf 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -200,7 +200,7 @@ static cl::opt EnableGISelLoadStoreOptPostLegal( static cl::opt EnableSinkFold("aarch64-enable-sink-fold", cl::desc("Enable sinking and folding of instruction copies"), - cl::init(false), cl::Hidden); + cl::init(true), cl::Hidden); extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() { // Register the target. diff --git a/llvm/test/CodeGen/AArch64/machine-sink-cache-invalidation.ll b/llvm/test/CodeGen/AArch64/machine-sink-cache-invalidation.ll index ad6fdb6f1f9b9..ce000021fb29b 100644 --- a/llvm/test/CodeGen/AArch64/machine-sink-cache-invalidation.ll +++ b/llvm/test/CodeGen/AArch64/machine-sink-cache-invalidation.ll @@ -22,8 +22,7 @@ define i32 @nsis_BZ2_bzDecompress(ptr %pos.i, i1 %cmp661.not3117.i, i1 %exitcond ; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: mov x9, xzr ; CHECK-NEXT: ldrb w9, [x9] -; CHECK-NEXT: add x9, x0, x9 -; CHECK-NEXT: strb wzr, [x9] +; CHECK-NEXT: strb wzr, [x0, x9] ; CHECK-NEXT: b .LBB0_1 ; CHECK-NEXT: .LBB0_4: // %for.end677.i ; CHECK-NEXT: mov w0, wzr diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold.ll b/llvm/test/CodeGen/AArch64/sink-and-fold.ll index 632fdb3910531..52007221e12a7 100644 --- a/llvm/test/CodeGen/AArch64/sink-and-fold.ll +++ b/llvm/test/CodeGen/AArch64/sink-and-fold.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -aarch64-enable-sink-fold=true < %s | FileCheck %s +; RUN: llc < %s | FileCheck %s target triple = "aarch64-linux" declare i32 @use(...) From 5d35273a32d239b7407338e13ed71b59174d6536 Mon Sep 17 00:00:00 2001 From: Mikhail Goncharov Date: Fri, 13 Oct 2023 13:53:31 +0200 Subject: [PATCH 073/720] [lldb] fix release build (#68979) due to 64d78d8b3cd09dff32c97fbefa56bcfc8b676406 that used side effects in assert() --- lldb/source/Commands/CommandObjectTarget.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index 0c378b069086d..7c20893db243c 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -2682,12 +2682,14 @@ class CommandObjectTargetModulesDumpSeparateDebugInfoFiles llvm::StringRef type; llvm::StringRef symfile; StructuredData::Array *files; - assert(separate_debug_info_list->GetValueForKeyAsString("type", - type)); - assert(separate_debug_info_list->GetValueForKeyAsString("symfile", - symfile)); - assert(separate_debug_info_list->GetValueForKeyAsArray( - "separate-debug-info-files", files)); + if (!(separate_debug_info_list->GetValueForKeyAsString("type", + type) && + separate_debug_info_list->GetValueForKeyAsString("symfile", + symfile) && + separate_debug_info_list->GetValueForKeyAsArray( + "separate-debug-info-files", files))) { + assert(false); + } strm << "Symbol file: " << symfile; strm.EOL(); From 8d59fc5fd1599bd7153817d2af903ae9a6103343 Mon Sep 17 00:00:00 2001 From: Thomas Preud'homme Date: Fri, 13 Oct 2023 08:57:53 +0000 Subject: [PATCH 074/720] Fix typo in CMake var --- mlir/docs/Dialects/Linalg/OpDSL.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/docs/Dialects/Linalg/OpDSL.md b/mlir/docs/Dialects/Linalg/OpDSL.md index b2868efa092ca..5c4c8b4e1880a 100644 --- a/mlir/docs/Dialects/Linalg/OpDSL.md +++ b/mlir/docs/Dialects/Linalg/OpDSL.md @@ -12,7 +12,7 @@ corresponding `linalg.generic` IR for the composition. The tool is bundled with the MLIR Python bindings. To use from the CMake build tree, MLIR must be build with Python bindings enabled -(`-DMLIR_ENALBE_BINDINGS_PYTHON=ON`). Then add the `python` directory in the +(`-DMLIR_ENABLE_BINDINGS_PYTHON=ON`). Then add the `python` directory in the build tree to your `PYTHONPATH` environment variable (i.e. `export PYTHONPATH=$PWD/build/tools/mlir/python_packages/mlir_core`). Optionally, use an installed MLIR package, if available, to avoid building. From 3efa4794ecd5ca6235f9f7e3fc83a8d9e59b66c9 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Fri, 13 Oct 2023 14:47:46 +0200 Subject: [PATCH 075/720] [clang][Interp] Support AddOffset with 128bit offsets (#68679) We do a similar thing a few lines above for `Index`: ```c++ // Get a version of the index comparable to the type. T Index = T::from(Ptr.getIndex(), Offset.bitWidth()); ``` --- clang/lib/AST/Interp/Boolean.h | 1 + clang/lib/AST/Interp/Integral.h | 3 +++ clang/lib/AST/Interp/IntegralAP.h | 3 +++ clang/lib/AST/Interp/Interp.h | 4 ++-- clang/test/AST/Interp/intap.cpp | 10 ++++++++++ 5 files changed, 19 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/Interp/Boolean.h b/clang/lib/AST/Interp/Boolean.h index c3ed3d61f76ca..336f7941dfc47 100644 --- a/clang/lib/AST/Interp/Boolean.h +++ b/clang/lib/AST/Interp/Boolean.h @@ -42,6 +42,7 @@ class Boolean final { bool operator>(unsigned RHS) const { return static_cast(V) > RHS; } Boolean operator-() const { return Boolean(V); } + Boolean operator-(const Boolean &Other) const { return Boolean(V - Other.V); } Boolean operator~() const { return Boolean(true); } explicit operator int8_t() const { return V; } diff --git a/clang/lib/AST/Interp/Integral.h b/clang/lib/AST/Interp/Integral.h index 4dbe9c9bcb14b..cc1cab8f39fb1 100644 --- a/clang/lib/AST/Interp/Integral.h +++ b/clang/lib/AST/Interp/Integral.h @@ -88,6 +88,9 @@ template class Integral final { } Integral operator-() const { return Integral(-V); } + Integral operator-(const Integral &Other) const { + return Integral(V - Other.V); + } Integral operator~() const { return Integral(~V); } template diff --git a/clang/lib/AST/Interp/IntegralAP.h b/clang/lib/AST/Interp/IntegralAP.h index f9a33bbcd7bd7..f17fb8e484415 100644 --- a/clang/lib/AST/Interp/IntegralAP.h +++ b/clang/lib/AST/Interp/IntegralAP.h @@ -59,6 +59,9 @@ template class IntegralAP final { IntegralAP() : V(APSInt::getMaxValue(1024, Signed)) {} IntegralAP operator-() const { return IntegralAP(-V); } + IntegralAP operator-(const IntegralAP &Other) const { + return IntegralAP(V - Other.V); + } bool operator>(IntegralAP RHS) const { return V > RHS.V; } bool operator>=(IntegralAP RHS) const { return V >= RHS.V; } bool operator<(IntegralAP RHS) const { return V < RHS.V; } diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h index 1ad3b8bfc7711..e3e6a4cec63b1 100644 --- a/clang/lib/AST/Interp/Interp.h +++ b/clang/lib/AST/Interp/Interp.h @@ -1421,7 +1421,7 @@ bool OffsetHelper(InterpState &S, CodePtr OpPC, const T &Offset, // Get a version of the index comparable to the type. T Index = T::from(Ptr.getIndex(), Offset.bitWidth()); // Compute the largest index into the array. - unsigned MaxIndex = Ptr.getNumElems(); + T MaxIndex = T::from(Ptr.getNumElems(), Offset.bitWidth()); // Helper to report an invalid offset, computed as APSInt. auto InvalidOffset = [&]() { @@ -1437,7 +1437,7 @@ bool OffsetHelper(InterpState &S, CodePtr OpPC, const T &Offset, return false; }; - unsigned MaxOffset = MaxIndex - Ptr.getIndex(); + T MaxOffset = T::from(MaxIndex - Index, Offset.bitWidth()); if constexpr (Op == ArithOp::Add) { // If the new offset would be negative, bail out. if (Offset.isNegative() && (Offset.isMin() || -Offset > Index)) diff --git a/clang/test/AST/Interp/intap.cpp b/clang/test/AST/Interp/intap.cpp index 8fe65a69a4fee..f9cbc698a3290 100644 --- a/clang/test/AST/Interp/intap.cpp +++ b/clang/test/AST/Interp/intap.cpp @@ -90,4 +90,14 @@ namespace i128 { // expected-error {{must be initialized by a constant expression}} \ // expected-note {{is outside the range of representable values of type}} } + +namespace AddSubOffset { + constexpr __int128 A = 1; + constexpr int arr[] = {1,2,3}; + constexpr const int *P = arr + A; + static_assert(*P == 2, ""); + constexpr const int *P2 = P - A; + static_assert(*P2 == 1,""); +} + #endif From 3f4bf998e897274758006f8423f2bdcd68cb2d55 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 13 Oct 2023 09:11:34 -0400 Subject: [PATCH 076/720] [gn] port 46cb8d9a3252 --- llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn index 7abf53f94d8f4..ce5f863158820 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn @@ -127,6 +127,8 @@ target(tsan_target_type, "rtl") { sources += [ "tsan_rtl_mips64.S" ] } else if (target_cpu == "powerpc64") { sources += [ "tsan_rtl_ppc64.S" ] + } else if (target_cpu == "riscv64") { + sources += [ "tsan_rtl_riscv64.S" ] } else if (target_cpu == "s390x") { sources += [ "tsan_rtl_s390x.S" ] } From e4e02e31c24fa15456d938e9e37ee54f8202c079 Mon Sep 17 00:00:00 2001 From: CarolineConcatto <51754594+CarolineConcatto@users.noreply.github.com> Date: Fri, 13 Oct 2023 14:25:42 +0100 Subject: [PATCH 077/720] =?UTF-8?q?[AArch64][NFC]=20Refactor=20NEON,=20SVE?= =?UTF-8?q?=20and=20SME=20classes=20and=20multiclasses=20fo=E2=80=A6=20(#6?= =?UTF-8?q?8800)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …r the assembly disassembly This NFC patch refactors the assembly/disassembly class and multiclass in the AArch64 backend to receive a new 2023/09 AArch64[1] ISA release. The encoding for the 2023 instructions re-uses encoding blocks from previous assembly/disassembly instructions. The refactoring makes the class and multiclass for assembly/disassembly generic so it can be used to describe the instructions for the new ISA. [1]https://developer.arm.com/documentation/ddi0602/2023-09 --- .../lib/Target/AArch64/AArch64InstrFormats.td | 36 +- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 262 +++++++------- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 8 +- .../AArch64/AsmParser/AArch64AsmParser.cpp | 4 + .../MCTargetDesc/AArch64InstPrinter.cpp | 3 +- .../AArch64/MCTargetDesc/AArch64InstPrinter.h | 1 + llvm/lib/Target/AArch64/SMEInstrFormats.td | 333 ++++++++++-------- llvm/lib/Target/AArch64/SVEInstrFormats.td | 31 +- 9 files changed, 363 insertions(+), 317 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 57d69ae05c47f..e5dbfa404b3c6 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1517,7 +1517,7 @@ def UImm3s8Operand : UImmScaledMemoryIndexed<3, 8>; def uimm3s8 : Operand, ImmLeaf= 0 && Imm <= 56 && ((Imm % 8) == 0); }], UImmS8XForm> { - let PrintMethod = "printVectorIndex<8>"; + let PrintMethod = "printMatrixIndex<8>"; let ParserMatchClass = UImm3s8Operand; } @@ -6011,11 +6011,11 @@ multiclass SIMDLogicalThreeVectorTied size, // ARMv8.2-A Dot Product Instructions (Vector): These instructions extract // bytes from S-sized elements. -class BaseSIMDThreeSameVectorDot sz, bits<4> opc, string asm, + string kind1, string kind2, RegisterOperand RegType, ValueType AccumType, ValueType InputType, SDPatternOperator OpNode> : - BaseSIMDThreeSameVectorTied { - def v8i8 : BaseSIMDThreeSameVectorDot<0, U, Mixed, asm, ".2s", ".8b", V64, + def v8i8 : BaseSIMDThreeSameVectorDot<0, U, 0b10, {0b001, Mixed}, asm, ".2s", ".8b", V64, v2i32, v8i8, OpNode>; - def v16i8 : BaseSIMDThreeSameVectorDot<1, U, Mixed, asm, ".4s", ".16b", V128, + def v16i8 : BaseSIMDThreeSameVectorDot<1, U, 0b10, {0b001, Mixed}, asm, ".4s", ".16b", V128, v4i32, v16i8, OpNode>; } @@ -8482,12 +8482,12 @@ class SIMDThreeSameVectorMatMul size, string asm, +class BaseSIMDThreeSameVectorIndexS size, bits<4> opc, string asm, string dst_kind, string lhs_kind, string rhs_kind, RegisterOperand RegType, ValueType AccumType, ValueType InputType, SDPatternOperator OpNode> : - BaseSIMDIndexedTied size, str multiclass SIMDThreeSameVectorDotIndex size, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVectorDotIndex<0, U, Mixed, size, asm, ".2s", ".8b", ".4b", + def v8i8 : BaseSIMDThreeSameVectorIndexS<0, U, size, {0b111, Mixed}, asm, ".2s", ".8b", ".4b", V64, v2i32, v8i8, OpNode>; - def v16i8 : BaseSIMDThreeSameVectorDotIndex<1, U, Mixed, size, asm, ".4s", ".16b", ".4b", + def v16i8 : BaseSIMDThreeSameVectorIndexS<1, U, size, {0b111, Mixed}, asm, ".4s", ".16b", ".4b", V128, v4i32, v16i8, OpNode>; } // ARMv8.2-A Fused Multiply Add-Long Instructions (Indexed) let mayRaiseFPException = 1, Uses = [FPCR] in -class BaseSIMDThreeSameVectorFMLIndex opc, string asm, +class BaseSIMDThreeSameVectorIndexH sz, bits<4> opc, string asm, string dst_kind, string lhs_kind, string rhs_kind, RegisterOperand RegType, - ValueType AccumType, ValueType InputType, - SDPatternOperator OpNode> : - BaseSIMDIndexedTied : + BaseSIMDIndexedTied opc, string asm, multiclass SIMDThreeSameVectorFMLIndex opc, string asm, SDPatternOperator OpNode> { - def v4f16 : BaseSIMDThreeSameVectorFMLIndex<0, U, opc, asm, ".2s", ".2h", ".h", - V64, v2f32, v4f16, OpNode>; - def v8f16 : BaseSIMDThreeSameVectorFMLIndex<1, U, opc, asm, ".4s", ".4h", ".h", - V128, v4f32, v8f16, OpNode>; + def v4f16 : BaseSIMDThreeSameVectorIndexH<0, U, 0b10, opc, asm, ".2s", ".2h", ".h", + V64, V128_lo, v2f32, v4f16, OpNode>; + def v8f16 : BaseSIMDThreeSameVectorIndexH<1, U, 0b10, opc, asm, ".4s", ".4h", ".h", + V128, V128_lo, v4f32, v8f16, OpNode>; } multiclass SIMDFPIndexed opc, string asm, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 5293df90b880b..df59dc4ad27fa 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1206,7 +1206,7 @@ defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_ne class BaseSIMDSUDOTIndex - : BaseSIMDThreeSameVectorDotIndex { let Pattern = [(set (AccumType RegType:$dst), diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index f306021dd7533..2685f2e3c8108 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -66,8 +66,8 @@ let Predicates = [HasSME] in { defm BFMOPA_MPPZZ : sme_bf16_outer_product<0b000, "bfmopa", int_aarch64_sme_mopa_wide>; defm BFMOPS_MPPZZ : sme_bf16_outer_product<0b001, "bfmops", int_aarch64_sme_mops_wide>; -defm FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, "fmopa", int_aarch64_sme_mopa>; -defm FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, "fmops", int_aarch64_sme_mops>; +defm FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, 0b00, ZPR32, "fmopa", int_aarch64_sme_mopa>; +defm FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, 0b00, ZPR32, "fmops", int_aarch64_sme_mops>; } let Predicates = [HasSMEF64F64] in { @@ -216,29 +216,29 @@ def : Pat<(AArch64_smstop (i32 svcr_op:$pstate), (i64 GPR64:$rtpstate), (i64 tim let Predicates = [HasSME2] in { defm ADD_VG2_M2ZZ_S : sme2_dot_mla_add_sub_array_vg2_single<"add", 0b0011010, MatrixOp32, ZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_add_write_single_za_vg1x2>; defm ADD_VG4_M4ZZ_S : sme2_dot_mla_add_sub_array_vg4_single<"add", 0b0111010, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_add_write_single_za_vg1x4>; -defm ADD_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"add", 0b011010, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_write_za_vg1x2>; -defm ADD_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"add", 0b011010, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_write_za_vg1x4>; +defm ADD_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"add", 0b0110010, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_write_za_vg1x2>; +defm ADD_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"add", 0b0110010, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_write_za_vg1x4>; defm ADD_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"add", 0b0110000>; defm ADD_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"add", 0b0110000>; defm SUB_VG2_M2ZZ_S : sme2_dot_mla_add_sub_array_vg2_single<"sub", 0b0011011, MatrixOp32, ZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_sub_write_single_za_vg1x2>; defm SUB_VG4_M4ZZ_S : sme2_dot_mla_add_sub_array_vg4_single<"sub", 0b0111011, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_sub_write_single_za_vg1x4>; -defm SUB_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"sub", 0b011011, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_write_za_vg1x2>; -defm SUB_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"sub", 0b011011, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_write_za_vg1x4>; +defm SUB_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"sub", 0b0110011, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_write_za_vg1x2>; +defm SUB_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"sub", 0b0110011, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_write_za_vg1x4>; defm FMLA_VG2_M2ZZ_S : sme2_dot_mla_add_sub_array_vg2_single<"fmla", 0b0011000, MatrixOp32, ZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_single_vg1x2>; defm FMLA_VG4_M4ZZ_S : sme2_dot_mla_add_sub_array_vg4_single<"fmla", 0b0111000, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_single_vg1x4>; -defm FMLA_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b011000, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmla_vg1x2>; -defm FMLA_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b011000, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmla_vg1x4>; -defm FMLA_VG2_M2ZZI_S : sme2_multi_vec_array_vg2_index_32b<"fmla", 0b0000, ZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_lane_vg1x2>; +defm FMLA_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b0110000, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmla_vg1x2>; +defm FMLA_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b0110000, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmla_vg1x4>; +defm FMLA_VG2_M2ZZI_S : sme2_multi_vec_array_vg2_index_32b<"fmla", 0b01, 0b0000, ZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_lane_vg1x2>; defm FMLA_VG4_M4ZZI_S : sme2_multi_vec_array_vg4_index_32b<"fmla", 0b0000, ZZZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_lane_vg1x4>; defm FMLS_VG2_M2ZZ_S : sme2_dot_mla_add_sub_array_vg2_single<"fmls", 0b0011001, MatrixOp32, ZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_single_vg1x2>; defm FMLS_VG4_M4ZZ_S : sme2_dot_mla_add_sub_array_vg4_single<"fmls", 0b0111001, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_single_vg1x4>; -defm FMLS_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b011001, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmls_vg1x2>; -defm FMLS_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b011001, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmls_vg1x4>; -defm FMLS_VG2_M2ZZI_S : sme2_multi_vec_array_vg2_index_32b<"fmls", 0b0010, ZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_lane_vg1x2>; +defm FMLS_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b0110001, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmls_vg1x2>; +defm FMLS_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0110001, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmls_vg1x4>; +defm FMLS_VG2_M2ZZI_S : sme2_multi_vec_array_vg2_index_32b<"fmls", 0b01, 0b0010, ZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_lane_vg1x2>; defm FMLS_VG4_M4ZZI_S : sme2_multi_vec_array_vg4_index_32b<"fmls", 0b0010, ZZZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_lane_vg1x4>; defm ADD_VG2_M2Z_S : sme2_multivec_accum_add_sub_vg2<"add", 0b0010, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_za32_vg1x2>; @@ -262,37 +262,37 @@ defm FMLAL_MZZI : sme2_mla_long_array_index<"fmlal", 0b10, 0b00, nxv8f16 defm FMLAL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_lane_vg2x2>; defm FMLAL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_lane_vg2x4>; defm FMLAL_MZZ : sme2_mla_long_array_single<"fmlal", 0b00, 0b00, nxv8f16, int_aarch64_sme_fmlal_single_vg2x1>; -defm FMLAL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_single_vg2x2>; -defm FMLAL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_single_vg2x4>; -defm FMLAL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_vg2x2>; -defm FMLAL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_vg2x4>; +defm FMLAL_VG2_M2ZZ_HtoS : sme2_fp_mla_long_array_vg2_single<"fmlal", 0b000, MatrixOp32, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlal_single_vg2x2>; +defm FMLAL_VG4_M4ZZ_HtoS : sme2_fp_mla_long_array_vg4_single<"fmlal", 0b000, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlal_single_vg2x4>; +defm FMLAL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"fmlal", 0b000, MatrixOp32, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlal_vg2x2>; +defm FMLAL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"fmlal", 0b000, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlal_vg2x4>; defm FMLSL_MZZI : sme2_mla_long_array_index<"fmlsl", 0b10, 0b01, nxv8f16, int_aarch64_sme_fmlsl_lane_vg2x1>; defm FMLSL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_lane_vg2x2>; defm FMLSL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_lane_vg2x4>; defm FMLSL_MZZ : sme2_mla_long_array_single<"fmlsl", 0b00, 0b01, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x1>; -defm FMLSL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x2>; -defm FMLSL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x4>; -defm FMLSL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_vg2x2>; -defm FMLSL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_vg2x4>; +defm FMLSL_VG2_M2ZZ_HtoS : sme2_fp_mla_long_array_vg2_single<"fmlsl", 0b010, MatrixOp32, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x2>; +defm FMLSL_VG4_M4ZZ_HtoS : sme2_fp_mla_long_array_vg4_single<"fmlsl", 0b010, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x4>; +defm FMLSL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"fmlsl", 0b001, MatrixOp32, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlsl_vg2x2>; +defm FMLSL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"fmlsl", 0b001, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlsl_vg2x4>; defm BFMLAL_MZZI : sme2_mla_long_array_index<"bfmlal", 0b10, 0b10, nxv8bf16, int_aarch64_sme_fmlal_lane_vg2x1>; defm BFMLAL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_lane_vg2x2>; defm BFMLAL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_lane_vg2x4>; defm BFMLAL_MZZ : sme2_mla_long_array_single<"bfmlal", 0b00, 0b10, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x1>; -defm BFMLAL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x2>; -defm BFMLAL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x4>; -defm BFMLAL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_vg2x2>; -defm BFMLAL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_vg2x4>; +defm BFMLAL_VG2_M2ZZ_HtoS : sme2_fp_mla_long_array_vg2_single<"bfmlal", 0b100, MatrixOp32, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x2>; +defm BFMLAL_VG4_M4ZZ_HtoS : sme2_fp_mla_long_array_vg4_single<"bfmlal", 0b100, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x4>; +defm BFMLAL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"bfmlal", 0b010, MatrixOp32, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlal_vg2x2>; +defm BFMLAL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"bfmlal", 0b010, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlal_vg2x4>; defm BFMLSL_MZZI : sme2_mla_long_array_index<"bfmlsl", 0b10, 0b11, nxv8bf16, int_aarch64_sme_fmlsl_lane_vg2x1>; defm BFMLSL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_lane_vg2x2>; defm BFMLSL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_lane_vg2x4>; defm BFMLSL_MZZ : sme2_mla_long_array_single<"bfmlsl", 0b00, 0b11, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x1>; -defm BFMLSL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x2>; -defm BFMLSL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x4>; -defm BFMLSL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_vg2x2>; -defm BFMLSL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_vg2x4>; +defm BFMLSL_VG2_M2ZZ_HtoS : sme2_fp_mla_long_array_vg2_single<"bfmlsl", 0b110, MatrixOp32, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x2>; +defm BFMLSL_VG4_M4ZZ_HtoS : sme2_fp_mla_long_array_vg4_single<"bfmlsl", 0b110, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x4>; +defm BFMLSL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"bfmlsl", 0b011, MatrixOp32, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlsl_vg2x2>; +defm BFMLSL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"bfmlsl", 0b011, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlsl_vg2x4>; defm SMLAL_MZZI : sme2_mla_long_array_index<"smlal", 0b11, 0b00, nxv8i16, int_aarch64_sme_smlal_lane_vg2x1>; defm SMLAL_VG2_M2ZZI : sme2_int_mla_long_array_vg2_index<"smlal", 0b00, int_aarch64_sme_smlal_lane_vg2x2>; @@ -413,122 +413,122 @@ defm SCLAMP_VG4_4Z4Z : sme2_int_clamp_vector_vg4_multi<"sclamp", 0b0>; defm UCLAMP_VG2_2Z2Z : sme2_int_clamp_vector_vg2_multi<"uclamp", 0b1>; defm UCLAMP_VG4_4Z4Z : sme2_int_clamp_vector_vg4_multi<"uclamp", 0b1>; -defm FDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"fdot", 0b1001, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_lane_za32_vg1x2>; +defm FDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"fdot", 0b01, 0b1001, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_lane_za32_vg1x2>; defm FDOT_VG4_M4ZZI_HtoS : sme2_multi_vec_array_vg4_index_32b<"fdot", 0b1001, ZZZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_lane_za32_vg1x4>; defm FDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"fdot", 0b0010000, MatrixOp32, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_single_za32_vg1x2>; defm FDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"fdot", 0b0110000, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_single_za32_vg1x4>; -defm FDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"fdot", 0b010000, MatrixOp32, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fdot_za32_vg1x2>; -defm FDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"fdot", 0b010000, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fdot_za32_vg1x4>; +defm FDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"fdot", 0b0100000, MatrixOp32, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fdot_za32_vg1x2>; +defm FDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"fdot", 0b0100000, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fdot_za32_vg1x4>; -defm BFDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"bfdot", 0b1011, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_lane_za32_vg1x2>; +defm BFDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"bfdot", 0b01, 0b1011, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_lane_za32_vg1x2>; defm BFDOT_VG4_M4ZZI_HtoS : sme2_multi_vec_array_vg4_index_32b<"bfdot", 0b1011, ZZZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_lane_za32_vg1x4>; defm BFDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"bfdot", 0b0010010, MatrixOp32, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_single_za32_vg1x2>; defm BFDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"bfdot", 0b0110010, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_single_za32_vg1x4>; -defm BFDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"bfdot", 0b010010, MatrixOp32, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fdot_za32_vg1x2>; -defm BFDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"bfdot", 0b010010, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fdot_za32_vg1x4>; +defm BFDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"bfdot", 0b0100010, MatrixOp32, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fdot_za32_vg1x2>; +defm BFDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"bfdot", 0b0100010, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fdot_za32_vg1x4>; -defm BFVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"bfvdot", 0b0011, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fvdot_lane_za32_vg1x2>; +defm BFVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"bfvdot", 0b01, 0b0011, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fvdot_lane_za32_vg1x2>; -defm FVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"fvdot", 0b0001, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fvdot_lane_za32_vg1x2>; +defm FVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"fvdot", 0b01, 0b0001, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fvdot_lane_za32_vg1x2>; -defm SDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b1000, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za32_vg1x2>; -defm SDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b1100, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_lane_za32_vg1x2>; +defm SDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b01, 0b1000, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za32_vg1x2>; +defm SDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b01, 0b1100, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_lane_za32_vg1x2>; defm SDOT_VG4_M4ZZI_HToS : sme2_multi_vec_array_vg4_index_32b<"sdot", 0b1000, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za32_vg1x4>; defm SDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"sdot", 0b1100, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_lane_za32_vg1x4>; defm SDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"sdot", 0b1010101, MatrixOp32, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za32_vg1x2>; defm SDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"sdot", 0b1110101, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za32_vg1x4>; -defm SDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b110101, MatrixOp32, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za32_vg1x2>; -defm SDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b110101, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za32_vg1x4>; +defm SDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b1101001, MatrixOp32, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za32_vg1x2>; +defm SDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b1101001, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za32_vg1x4>; defm SDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg2_single<"sdot", 0b0010100, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_single_za32_vg1x2>; defm SDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg4_single<"sdot", 0b0110100, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_single_za32_vg1x4>; -defm SDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b010100, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_sdot_za32_vg1x2>; -defm SDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b010100, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_sdot_za32_vg1x4>; +defm SDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b0101000, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_sdot_za32_vg1x2>; +defm SDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b0101000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_sdot_za32_vg1x4>; -defm SUDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sudot", 0b1111, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_lane_za32_vg1x2>; +defm SUDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sudot", 0b01, 0b1111, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_lane_za32_vg1x2>; defm SUDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"sudot", 0b1111, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_lane_za32_vg1x4>; defm SUDOT_VG2_M2ZZ_BToS : sme2_dot_mla_add_sub_array_vg2_single<"sudot", 0b0010111, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_single_za32_vg1x2>; defm SUDOT_VG4_M4ZZ_BToS : sme2_dot_mla_add_sub_array_vg4_single<"sudot", 0b0110111, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_single_za32_vg1x4>; -defm SVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"svdot", 0b0100, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_svdot_lane_za32_vg1x2>; +defm SVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"svdot", 0b01, 0b0100, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_svdot_lane_za32_vg1x2>; defm SVDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"svdot", 0b0100, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_svdot_lane_za32_vg1x4>; defm SUVDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"suvdot", 0b0111, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_suvdot_lane_za32_vg1x4>; -defm UDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b1010, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za32_vg1x2>; -defm UDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b1110, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_udot_lane_za32_vg1x2>; +defm UDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b01, 0b1010, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za32_vg1x2>; +defm UDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b01, 0b1110, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_udot_lane_za32_vg1x2>; defm UDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"udot", 0b1110, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_udot_lane_za32_vg1x4>; defm UDOT_VG4_M4ZZI_HToS : sme2_multi_vec_array_vg4_index_32b<"udot", 0b1010, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za32_vg1x4>; defm UDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"udot", 0b1010111, MatrixOp32, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za32_vg1x2>; defm UDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"udot", 0b1110111, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za32_vg1x4>; -defm UDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b110111, MatrixOp32, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za32_vg1x2>; -defm UDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b110111, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za32_vg1x4>; +defm UDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b1101011, MatrixOp32, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za32_vg1x2>; +defm UDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b1101011, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za32_vg1x4>; defm UDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg2_single<"udot", 0b0010110, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_udot_single_za32_vg1x2>; defm UDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg4_single<"udot", 0b0110110, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_udot_single_za32_vg1x4>; -defm UDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b010110, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_udot_za32_vg1x2>; -defm UDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b010110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_udot_za32_vg1x4>; +defm UDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b0101010, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_udot_za32_vg1x2>; +defm UDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b0101010, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_udot_za32_vg1x4>; -defm USDOT_VG2_M2ZZI_BToS: sme2_multi_vec_array_vg2_index_32b<"usdot", 0b1101, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_lane_za32_vg1x2>; +defm USDOT_VG2_M2ZZI_BToS: sme2_multi_vec_array_vg2_index_32b<"usdot", 0b01, 0b1101, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_lane_za32_vg1x2>; defm USDOT_VG4_M4ZZI_BToS: sme2_multi_vec_array_vg4_index_32b<"usdot", 0b1101, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_lane_za32_vg1x4>; defm USDOT_VG2_M2ZZ_BToS : sme2_dot_mla_add_sub_array_vg2_single<"usdot", 0b0010101, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_single_za32_vg1x2>; defm USDOT_VG4_M4ZZ_BToS : sme2_dot_mla_add_sub_array_vg4_single<"usdot", 0b0110101, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_single_za32_vg1x4>; -defm USDOT_VG2_M2Z2Z_BToS : sme2_dot_mla_add_sub_array_vg2_multi<"usdot", 0b010101, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_usdot_za32_vg1x2>; -defm USDOT_VG4_M4Z4Z_BToS : sme2_dot_mla_add_sub_array_vg4_multi<"usdot", 0b010101, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_usdot_za32_vg1x4>; +defm USDOT_VG2_M2Z2Z_BToS : sme2_dot_mla_add_sub_array_vg2_multi<"usdot", 0b0101001, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_usdot_za32_vg1x2>; +defm USDOT_VG4_M4Z4Z_BToS : sme2_dot_mla_add_sub_array_vg4_multi<"usdot", 0b0101001, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_usdot_za32_vg1x4>; defm USVDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"usvdot", 0b0101, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usvdot_lane_za32_vg1x4>; -defm UVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"uvdot", 0b0110, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_uvdot_lane_za32_vg1x2>; +defm UVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"uvdot", 0b01, 0b0110, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_uvdot_lane_za32_vg1x2>; defm UVDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"uvdot", 0b0110, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_uvdot_lane_za32_vg1x4>; -defm SMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlall", 0b000, int_aarch64_sme_smla_za32_lane_vg4x1>; -defm SMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlall", 0b000, int_aarch64_sme_smla_za32_lane_vg4x2>; -defm SMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlall", 0b000, int_aarch64_sme_smla_za32_lane_vg4x4>; -defm SMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"smlall", 0b0000, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x1>; +defm SMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlall", 0b00, 0b000, int_aarch64_sme_smla_za32_lane_vg4x1>; +defm SMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlall", 0b00, 0b000, int_aarch64_sme_smla_za32_lane_vg4x2>; +defm SMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlall", 0b00, 0b0000, int_aarch64_sme_smla_za32_lane_vg4x4>; +defm SMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"smlall", 0b00000, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x1>; defm SMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"smlall", 0b00000, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x2>; defm SMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"smlall", 0b01000, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x4>; -defm SMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlall", 0b0000, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_smla_za32_vg4x2>; -defm SMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlall", 0b0000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_smla_za32_vg4x4>; +defm SMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlall", 0b00000, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_smla_za32_vg4x2>; +defm SMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlall", 0b00000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_smla_za32_vg4x4>; -defm USMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"usmlall", 0b001, int_aarch64_sme_usmla_za32_lane_vg4x1>; -defm USMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"usmlall", 0b100, int_aarch64_sme_usmla_za32_lane_vg4x2>; -defm USMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"usmlall", 0b100, int_aarch64_sme_usmla_za32_lane_vg4x4>; -defm USMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"usmlall", 0b0001, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x1>; +defm USMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"usmlall", 0b00, 0b001, int_aarch64_sme_usmla_za32_lane_vg4x1>; +defm USMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"usmlall", 0b00, 0b100, int_aarch64_sme_usmla_za32_lane_vg4x2>; +defm USMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"usmlall", 0b00, 0b0100, int_aarch64_sme_usmla_za32_lane_vg4x4>; +defm USMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"usmlall", 0b00001, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x1>; defm USMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"usmlall", 0b00001, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x2>; defm USMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"usmlall", 0b01001, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x4>; -defm USMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"usmlall", 0b0001, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_usmla_za32_vg4x2>; -defm USMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"usmlall", 0b0001, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_usmla_za32_vg4x4>; +defm USMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"usmlall", 0b00001, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_usmla_za32_vg4x2>; +defm USMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"usmlall", 0b00001, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_usmla_za32_vg4x4>; -defm SMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlsll", 0b010, int_aarch64_sme_smls_za32_lane_vg4x1>; -defm SMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlsll", 0b001, int_aarch64_sme_smls_za32_lane_vg4x2>; -defm SMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlsll", 0b001, int_aarch64_sme_smls_za32_lane_vg4x4>; -defm SMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"smlsll", 0b0010, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x1>; +defm SMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlsll", 0b00, 0b010, int_aarch64_sme_smls_za32_lane_vg4x1>; +defm SMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlsll", 0b00, 0b001, int_aarch64_sme_smls_za32_lane_vg4x2>; +defm SMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlsll", 0b00, 0b0001, int_aarch64_sme_smls_za32_lane_vg4x4>; +defm SMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"smlsll", 0b00010, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x1>; defm SMLSLL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"smlsll", 0b00010, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x2>; defm SMLSLL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"smlsll", 0b01010, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x4>; -defm SMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlsll", 0b0010, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_smls_za32_vg4x2>; -defm SMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlsll", 0b0010, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_smls_za32_vg4x4>; +defm SMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlsll", 0b00010, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_smls_za32_vg4x2>; +defm SMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlsll", 0b00010, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_smls_za32_vg4x4>; -defm UMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlall", 0b100, int_aarch64_sme_umla_za32_lane_vg4x1>; -defm UMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlall", 0b010, int_aarch64_sme_umla_za32_lane_vg4x2>; -defm UMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlall", 0b010, int_aarch64_sme_umla_za32_lane_vg4x4>; -defm UMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"umlall", 0b0100, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x1>; +defm UMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlall", 0b00, 0b100, int_aarch64_sme_umla_za32_lane_vg4x1>; +defm UMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlall", 0b00, 0b010, int_aarch64_sme_umla_za32_lane_vg4x2>; +defm UMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlall", 0b00, 0b0010, int_aarch64_sme_umla_za32_lane_vg4x4>; +defm UMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"umlall", 0b00100, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x1>; defm UMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"umlall", 0b00100, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x2>; defm UMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"umlall", 0b01100, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x4>; -defm UMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlall", 0b0100, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_umla_za32_vg4x2>; -defm UMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlall", 0b0100, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_umla_za32_vg4x4>; +defm UMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlall", 0b00100, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_umla_za32_vg4x2>; +defm UMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlall", 0b00100, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_umla_za32_vg4x4>; -defm SUMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"sumlall", 0b101, int_aarch64_sme_sumla_za32_lane_vg4x1>; -defm SUMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"sumlall", 0b110, int_aarch64_sme_sumla_za32_lane_vg4x2>; -defm SUMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"sumlall", 0b110, int_aarch64_sme_sumla_za32_lane_vg4x4>; +defm SUMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"sumlall", 0b00, 0b101, int_aarch64_sme_sumla_za32_lane_vg4x1>; +defm SUMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"sumlall", 0b00, 0b110, int_aarch64_sme_sumla_za32_lane_vg4x2>; +defm SUMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"sumlall", 0b00, 0b0110, int_aarch64_sme_sumla_za32_lane_vg4x4>; defm SUMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"sumlall", 0b00101, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sumla_za32_single_vg4x2>; defm SUMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"sumlall", 0b01101, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sumla_za32_single_vg4x4>; -defm UMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlsll", 0b110, int_aarch64_sme_umls_za32_lane_vg4x1>; -defm UMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlsll", 0b011, int_aarch64_sme_umls_za32_lane_vg4x2>; -defm UMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlsll", 0b011, int_aarch64_sme_umls_za32_lane_vg4x4>; -defm UMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"umlsll", 0b0110, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x1>; +defm UMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlsll", 0b00, 0b110, int_aarch64_sme_umls_za32_lane_vg4x1>; +defm UMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlsll", 0b00, 0b011, int_aarch64_sme_umls_za32_lane_vg4x2>; +defm UMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlsll", 0b00, 0b0011, int_aarch64_sme_umls_za32_lane_vg4x4>; +defm UMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"umlsll", 0b00110, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x1>; defm UMLSLL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"umlsll", 0b00110, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x2>; defm UMLSLL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"umlsll", 0b01110, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x4>; -defm UMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlsll", 0b0110, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_umls_za32_vg4x2>; -defm UMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlsll", 0b0110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_umls_za32_vg4x4>; +defm UMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlsll", 0b00110, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_umls_za32_vg4x2>; +defm UMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlsll", 0b00110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_umls_za32_vg4x4>; defm BMOPA_MPPZZ_S : sme2_int_bmopx_tile<"bmopa", 0b100, int_aarch64_sme_bmopa_za32>; defm BMOPS_MPPZZ_S : sme2_int_bmopx_tile<"bmops", 0b101, int_aarch64_sme_bmops_za32>; @@ -674,13 +674,13 @@ defm STNT1D_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b11, 0b1 let Predicates = [HasSME2, HasSMEI16I64] in { defm ADD_VG2_M2ZZ_D : sme2_dot_mla_add_sub_array_vg2_single<"add", 0b1011010, MatrixOp64, ZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_add_write_single_za_vg1x2>; defm ADD_VG4_M4ZZ_D : sme2_dot_mla_add_sub_array_vg4_single<"add", 0b1111010, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_add_write_single_za_vg1x4>; -defm ADD_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"add", 0b111010, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_write_za_vg1x2>; -defm ADD_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"add", 0b111010, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_write_za_vg1x4>; +defm ADD_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"add", 0b1110010, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_write_za_vg1x2>; +defm ADD_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"add", 0b1110010, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_write_za_vg1x4>; defm SUB_VG2_M2ZZ_D : sme2_dot_mla_add_sub_array_vg2_single<"sub", 0b1011011, MatrixOp64, ZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_sub_write_single_za_vg1x2>; defm SUB_VG4_M4ZZ_D : sme2_dot_mla_add_sub_array_vg4_single<"sub", 0b1111011, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_sub_write_single_za_vg1x4>; -defm SUB_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"sub", 0b111011, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_write_za_vg1x2>; -defm SUB_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"sub", 0b111011, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_write_za_vg1x4>; +defm SUB_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"sub", 0b1110011, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_write_za_vg1x2>; +defm SUB_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"sub", 0b1110011, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_write_za_vg1x4>; defm ADD_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"add", 0b1010, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_za64_vg1x2>; defm ADD_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"add", 0b1010, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_za64_vg1x4>; @@ -692,8 +692,8 @@ defm SDOT_VG2_M2ZZI_HtoD : sme2_multi_vec_array_vg2_index_64b<"sdot", 0b01, ZZ_h defm SDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"sdot", 0b001, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za64_vg1x4>; defm SDOT_VG2_M2ZZ_HtoD : sme2_dot_mla_add_sub_array_vg2_single<"sdot", 0b1010100, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za64_vg1x2>; defm SDOT_VG4_M4ZZ_HtoD : sme2_dot_mla_add_sub_array_vg4_single<"sdot", 0b1110100, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za64_vg1x4>; -defm SDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b110100, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za64_vg1x2>; -defm SDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b110100, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za64_vg1x4>; +defm SDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b1101000, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za64_vg1x2>; +defm SDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b1101000, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za64_vg1x4>; defm SVDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"svdot", 0b101, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_svdot_lane_za64_vg1x4>; @@ -701,46 +701,46 @@ defm UDOT_VG2_M2ZZI_HtoD : sme2_multi_vec_array_vg2_index_64b<"udot", 0b11, ZZ_h defm UDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"udot", 0b011, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za64_vg1x4>; defm UDOT_VG2_M2ZZ_HtoD : sme2_dot_mla_add_sub_array_vg2_single<"udot", 0b1010110, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za64_vg1x2>; defm UDOT_VG4_M4ZZ_HtoD : sme2_dot_mla_add_sub_array_vg4_single<"udot", 0b1110110, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za64_vg1x4>; -defm UDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b110110, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za64_vg1x2>; -defm UDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b110110, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za64_vg1x4>; +defm UDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b1101010, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za64_vg1x2>; +defm UDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b1101010, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za64_vg1x4>; defm UVDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"uvdot", 0b111, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_uvdot_lane_za64_vg1x4>; defm SMLALL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"smlall", 0b00, int_aarch64_sme_smla_za64_lane_vg4x1>; defm SMLALL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"smlall", 0b00, int_aarch64_sme_smla_za64_lane_vg4x2>; defm SMLALL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"smlall", 0b00, int_aarch64_sme_smla_za64_lane_vg4x4>; -defm SMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"smlall", 0b1000, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x1>; +defm SMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"smlall", 0b10000, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x1>; defm SMLALL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"smlall", 0b10000, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x2>; defm SMLALL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"smlall", 0b11000, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x4>; -defm SMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlall", 0b1000, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_smla_za64_vg4x2>; -defm SMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlall", 0b1000, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_smla_za64_vg4x4>; +defm SMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlall", 0b10000, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_smla_za64_vg4x2>; +defm SMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlall", 0b10000, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_smla_za64_vg4x4>; defm SMLSLL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"smlsll", 0b01, int_aarch64_sme_smls_za64_lane_vg4x1>; defm SMLSLL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"smlsll", 0b01, int_aarch64_sme_smls_za64_lane_vg4x2>; defm SMLSLL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"smlsll", 0b01, int_aarch64_sme_smls_za64_lane_vg4x4>; -defm SMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"smlsll", 0b1010, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x1>; +defm SMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"smlsll", 0b10010, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x1>; defm SMLSLL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"smlsll", 0b10010, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x2>; defm SMLSLL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"smlsll", 0b11010, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x4>; -defm SMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlsll", 0b1010, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_smls_za64_vg4x2>; -defm SMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlsll", 0b1010, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_smls_za64_vg4x4>; +defm SMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlsll", 0b10010, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_smls_za64_vg4x2>; +defm SMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlsll", 0b10010, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_smls_za64_vg4x4>; defm UMLALL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"umlall", 0b10, int_aarch64_sme_umla_za64_lane_vg4x1>; defm UMLALL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"umlall", 0b10, int_aarch64_sme_umla_za64_lane_vg4x2>; defm UMLALL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"umlall", 0b10, int_aarch64_sme_umla_za64_lane_vg4x4>; -defm UMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"umlall", 0b1100, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x1>; +defm UMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"umlall", 0b10100, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x1>; defm UMLALL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"umlall", 0b10100, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x2>; defm UMLALL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"umlall", 0b11100, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x4>; -defm UMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlall", 0b1100, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_umla_za64_vg4x2>; -defm UMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlall", 0b1100, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_umla_za64_vg4x4>; +defm UMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlall", 0b10100, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_umla_za64_vg4x2>; +defm UMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlall", 0b10100, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_umla_za64_vg4x4>; defm UMLSLL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"umlsll", 0b11, int_aarch64_sme_umls_za64_lane_vg4x1>; defm UMLSLL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"umlsll", 0b11, int_aarch64_sme_umls_za64_lane_vg4x2>; defm UMLSLL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"umlsll", 0b11, int_aarch64_sme_umls_za64_lane_vg4x4>; -defm UMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"umlsll", 0b1110, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x1>; +defm UMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"umlsll", 0b10110, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x1>; defm UMLSLL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"umlsll", 0b10110, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x2>; defm UMLSLL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"umlsll", 0b11110, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x4>; -defm UMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlsll", 0b1110, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_umls_za64_vg4x2>; -defm UMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlsll", 0b1110, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_umls_za64_vg4x4>; +defm UMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlsll", 0b10110, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_umls_za64_vg4x2>; +defm UMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlsll", 0b10110, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_umls_za64_vg4x4>; } let Predicates = [HasSME2, HasSMEF64F64] in { @@ -748,15 +748,15 @@ defm FMLA_VG2_M2ZZI_D : sme2_multi_vec_array_vg2_index_64b<"fmla", 0b00, ZZ_d_mu defm FMLA_VG4_M4ZZI_D : sme2_multi_vec_array_vg4_index_64b<"fmla", 0b000, ZZZZ_d_mul_r, ZPR4b64, nxv2f64, int_aarch64_sme_fmla_lane_vg1x4>; defm FMLA_VG2_M2ZZ_D : sme2_dot_mla_add_sub_array_vg2_single<"fmla", 0b1011000, MatrixOp64, ZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmla_single_vg1x2>; defm FMLA_VG4_M4ZZ_D : sme2_dot_mla_add_sub_array_vg4_single<"fmla", 0b1111000, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmla_single_vg1x4>; -defm FMLA_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b111000, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmla_vg1x2>; -defm FMLA_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b111000, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmla_vg1x4>; +defm FMLA_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b1110000, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmla_vg1x2>; +defm FMLA_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b1110000, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmla_vg1x4>; defm FMLS_VG2_M2ZZI_D : sme2_multi_vec_array_vg2_index_64b<"fmls", 0b10, ZZ_d_mul_r, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_lane_vg1x2>; defm FMLS_VG4_M4ZZI_D : sme2_multi_vec_array_vg4_index_64b<"fmls", 0b010, ZZZZ_d_mul_r, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_lane_vg1x4>; defm FMLS_VG2_M2ZZ_D : sme2_dot_mla_add_sub_array_vg2_single<"fmls", 0b1011001, MatrixOp64, ZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_single_vg1x2>; defm FMLS_VG4_M4ZZ_D : sme2_dot_mla_add_sub_array_vg4_single<"fmls", 0b1111001, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_single_vg1x4>; -defm FMLS_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b111001, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmls_vg1x2>; -defm FMLS_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b111001, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmls_vg1x4>; +defm FMLS_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b1110001, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmls_vg1x2>; +defm FMLS_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b1110001, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmls_vg1x4>; defm FADD_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"fadd", 0b1000, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_add_za64_vg1x2>; defm FADD_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"fadd", 0b1000, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_add_za64_vg1x4>; @@ -787,25 +787,25 @@ defm FADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fadd", 0b0100, MatrixOp16 defm FSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"fsub", 0b0101, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>; defm FSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fsub", 0b0101, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>; -defm FMLA_VG2_M2ZZI_H : sme2p1_multi_vec_array_vg2_index_16b<"fmla", 0b00>; -defm FMLA_VG4_M4ZZI_H : sme2p1_multi_vec_array_vg4_index_16b<"fmla", 0b00>; +defm FMLA_VG2_M2ZZI_H : sme2p1_multi_vec_array_vg2_index_16b<"fmla", 0b00, 0b100, ZZ_h_mul_r, ZPR4b16>; +defm FMLA_VG4_M4ZZI_H : sme2p1_multi_vec_array_vg4_index_16b<"fmla", 0b000, ZZZZ_h_mul_r, ZPR4b16>; defm FMLA_VG2_M2ZZ_H : sme2_dot_mla_add_sub_array_vg24_single<"fmla", 0b0011100, MatrixOp16, ZZ_h, ZPR4b16>; defm FMLA_VG4_M4ZZ_H : sme2_dot_mla_add_sub_array_vg24_single<"fmla", 0b0111100, MatrixOp16, ZZZZ_h, ZPR4b16>; -defm FMLA_VG2_M2Z4Z_H : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b010001, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>; -defm FMLA_VG4_M4Z4Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b010001, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>; +defm FMLA_VG2_M2Z4Z_H : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b0100001, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>; +defm FMLA_VG4_M4Z4Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b0100001, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>; -defm FMLS_VG2_M2ZZI_H : sme2p1_multi_vec_array_vg2_index_16b<"fmls", 0b01>; -defm FMLS_VG4_M4ZZI_H : sme2p1_multi_vec_array_vg4_index_16b<"fmls", 0b01>; +defm FMLS_VG2_M2ZZI_H : sme2p1_multi_vec_array_vg2_index_16b<"fmls", 0b00, 0b101, ZZ_h_mul_r, ZPR4b16>; +defm FMLS_VG4_M4ZZI_H : sme2p1_multi_vec_array_vg4_index_16b<"fmls", 0b001, ZZZZ_h_mul_r, ZPR4b16>; defm FMLS_VG2_M2ZZ_H : sme2_dot_mla_add_sub_array_vg24_single<"fmls", 0b0011101, MatrixOp16, ZZ_h, ZPR4b16>; defm FMLS_VG4_M4ZZ_H : sme2_dot_mla_add_sub_array_vg24_single<"fmls", 0b0111101, MatrixOp16, ZZZZ_h, ZPR4b16>; -defm FMLS_VG2_M2Z2Z_H : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b010011, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>; -defm FMLS_VG4_M4Z2Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b010011, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>; +defm FMLS_VG2_M2Z2Z_H : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b0100011, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>; +defm FMLS_VG4_M4Z2Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0100011, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>; defm FCVT_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvt", 0b0>; defm FCVTL_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvtl", 0b1>; -defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0>; -defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1>; +defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, ZPR16>; +defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, ZPR16>; } let Predicates = [HasSME2p1, HasB16B16] in { @@ -814,19 +814,19 @@ defm BFADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfadd", 0b1100, MatrixOp defm BFSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfsub", 0b1101, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>; defm BFSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfsub", 0b1101, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>; -defm BFMLA_VG2_M2ZZI : sme2p1_multi_vec_array_vg2_index_16b<"bfmla", 0b10>; -defm BFMLA_VG4_M4ZZI : sme2p1_multi_vec_array_vg4_index_16b<"bfmla", 0b10>; +defm BFMLA_VG2_M2ZZI : sme2p1_multi_vec_array_vg2_index_16b<"bfmla", 0b00, 0b110, ZZ_h_mul_r, ZPR4b16>; +defm BFMLA_VG4_M4ZZI : sme2p1_multi_vec_array_vg4_index_16b<"bfmla", 0b010, ZZZZ_h_mul_r, ZPR4b16>; defm BFMLA_VG2_M2ZZ : sme2_dot_mla_add_sub_array_vg24_single<"bfmla", 0b1011100, MatrixOp16, ZZ_h, ZPR4b16>; defm BFMLA_VG4_M4ZZ : sme2_dot_mla_add_sub_array_vg24_single<"bfmla", 0b1111100, MatrixOp16, ZZZZ_h, ZPR4b16>; -defm BFMLA_VG2_M2Z2Z : sme2_dot_mla_add_sub_array_vg2_multi<"bfmla", 0b110001, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>; -defm BFMLA_VG4_M4Z4Z : sme2_dot_mla_add_sub_array_vg4_multi<"bfmla", 0b110001, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>; +defm BFMLA_VG2_M2Z2Z : sme2_dot_mla_add_sub_array_vg2_multi<"bfmla", 0b1100001, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>; +defm BFMLA_VG4_M4Z4Z : sme2_dot_mla_add_sub_array_vg4_multi<"bfmla", 0b1100001, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>; -defm BFMLS_VG2_M2ZZI : sme2p1_multi_vec_array_vg2_index_16b<"bfmls", 0b11>; -defm BFMLS_VG4_M4ZZI : sme2p1_multi_vec_array_vg4_index_16b<"bfmls", 0b11>; +defm BFMLS_VG2_M2ZZI : sme2p1_multi_vec_array_vg2_index_16b<"bfmls", 0b00, 0b111, ZZ_h_mul_r, ZPR4b16>; +defm BFMLS_VG4_M4ZZI : sme2p1_multi_vec_array_vg4_index_16b<"bfmls", 0b011, ZZZZ_h_mul_r, ZPR4b16>; defm BFMLS_VG2_M2ZZ : sme2_dot_mla_add_sub_array_vg24_single<"bfmls", 0b1011101, MatrixOp16, ZZ_h, ZPR4b16>; defm BFMLS_VG4_M4ZZ : sme2_dot_mla_add_sub_array_vg24_single<"bfmls", 0b1111101, MatrixOp16, ZZZZ_h, ZPR4b16>; -defm BFMLS_VG2_M2Z2Z : sme2_dot_mla_add_sub_array_vg2_multi<"bfmls", 0b110011, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>; -defm BFMLS_VG4_M4Z4Z : sme2_dot_mla_add_sub_array_vg4_multi<"bfmls", 0b110011, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>; +defm BFMLS_VG2_M2Z2Z : sme2_dot_mla_add_sub_array_vg2_multi<"bfmls", 0b1100011, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>; +defm BFMLS_VG4_M4Z4Z : sme2_dot_mla_add_sub_array_vg4_multi<"bfmls", 0b1100011, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>; defm BFMAX_VG2_2ZZ : sme2p1_bf_max_min_vector_vg2_single<"bfmax", 0b0010000>; @@ -852,6 +852,6 @@ defm BFMINNM_VG4_4Z2Z : sme2p1_bf_max_min_vector_vg4_multi<"bfminnm", 0b0010011 defm BFCLAMP_VG2_2ZZZ: sme2p1_bfclamp_vector_vg2_multi<"bfclamp">; defm BFCLAMP_VG4_4ZZZ: sme2p1_bfclamp_vector_vg4_multi<"bfclamp">; -defm BFMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmopa", 0b1, 0b0>; -defm BFMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmops", 0b1, 0b1>; +defm BFMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmopa", 0b1, 0b0, 0b11, ZPR16>; +defm BFMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmops", 0b1, 0b1, 0b11, ZPR16>; } diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index a7a64c6b20d84..752f58596a2f0 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2204,8 +2204,8 @@ let Predicates = [HasSVEorSME] in { } // End HasSVEorSME let Predicates = [HasBF16, HasSVEorSME] in { - defm BFDOT_ZZZ : sve_float_dot<0b1, "bfdot", nxv8bf16, int_aarch64_sve_bfdot>; - defm BFDOT_ZZI : sve_float_dot_indexed<0b1, "bfdot", nxv8bf16, int_aarch64_sve_bfdot_lane_v2>; + defm BFDOT_ZZZ : sve_float_dot<0b1, 0b0, ZPR32, ZPR16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot>; + defm BFDOT_ZZI : sve_float_dot_indexed<0b1, 0b00, ZPR16, ZPR3b16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot_lane_v2>; } // End HasBF16, HasSVEorSME let Predicates = [HasBF16, HasSVE] in { @@ -3753,8 +3753,8 @@ defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel", int_aarch64_sve_psel>; let Predicates = [HasSVE2p1_or_HasSME2] in { defm FCLAMP_ZZZ : sve2p1_fclamp<"fclamp", int_aarch64_sve_fclamp>; -defm FDOT_ZZZ_S : sve_float_dot<0b0, "fdot", nxv8f16, int_aarch64_sve_fdot_x2>; -defm FDOT_ZZZI_S : sve_float_dot_indexed<0b0, "fdot", nxv8f16, int_aarch64_sve_fdot_lane_x2>; +defm FDOT_ZZZ_S : sve_float_dot<0b0, 0b0, ZPR32, ZPR16, "fdot", nxv8f16, int_aarch64_sve_fdot_x2>; +defm FDOT_ZZZI_S : sve_float_dot_indexed<0b0, 0b00, ZPR16, ZPR3b16, "fdot", nxv8f16, int_aarch64_sve_fdot_lane_x2>; def BFMLSLB_ZZZ_S : sve2_fp_mla_long<0b110, "bfmlslb">; def BFMLSLT_ZZZ_S : sve2_fp_mla_long<0b111, "bfmlslt">; def BFMLSLB_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b110, "bfmlslb">; diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index ce59cf8dba957..ae5ba6b13a1bd 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -4537,6 +4537,8 @@ ParseStatus AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) { // Check if register is followed by an index if (parseOptionalToken(AsmToken::LBrac)) { + Operands.push_back( + AArch64Operand::CreateToken("[", getLoc(), getContext())); const MCExpr *ImmVal; if (getParser().parseExpression(ImmVal)) return ParseStatus::NoMatch; @@ -4549,6 +4551,8 @@ ParseStatus AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) { Operands.push_back(AArch64Operand::CreateImm( MCConstantExpr::create(MCE->getValue(), getContext()), StartLoc, getLoc(), getContext())); + Operands.push_back( + AArch64Operand::CreateToken("]", getLoc(), getContext())); } return ParseStatus::Success; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp index 5273a02f18404..988c78699179f 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -1756,10 +1756,11 @@ void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, O << "[" << Scale * MI->getOperand(OpNum).getImm() << "]"; } +template void AArch64InstPrinter::printMatrixIndex(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { - O << MI->getOperand(OpNum).getImm(); + O << Scale * MI->getOperand(OpNum).getImm(); } void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, uint64_t Address, diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h index c4c83f0f25adc..9dccdf42361b2 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h @@ -173,6 +173,7 @@ class AArch64InstPrinter : public MCInstPrinter { template void printVectorIndex(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); + template void printMatrixIndex(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); void printAdrAdrpLabel(const MCInst *MI, uint64_t Address, unsigned OpNum, diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index edd24b4a849b5..823115c7d0250 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -230,7 +230,7 @@ def : InstAlias<"smstop za", (MSRpstatesvcrImm1 0b010, 0b0)>; // SME Outer Products //===----------------------------------------------------------------------===// -class sme_fp_outer_product_inst sz, bit op, MatrixTileOperand za_ty, +class sme_fp_outer_product_inst sz, bits<2> op, MatrixTileOperand za_ty, ZPRRegOp zpr_ty, string mnemonic> : I<(outs za_ty:$ZAda), (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), @@ -242,7 +242,7 @@ class sme_fp_outer_product_inst sz, bit op, MatrixTileOperand za_ bits<3> Pn; bits<5> Zn; let Inst{31-25} = 0b1000000; - let Inst{24} = op; + let Inst{24} = op{1}; let Inst{23} = 0b1; let Inst{22-21} = sz; let Inst{20-16} = Zm; @@ -250,25 +250,25 @@ class sme_fp_outer_product_inst sz, bit op, MatrixTileOperand za_ let Inst{12-10} = Pn; let Inst{9-5} = Zn; let Inst{4} = S; - let Inst{3} = op; + let Inst{3} = op{0}; let Constraints = "$ZAda = $_ZAda"; } -multiclass sme_outer_product_fp32 { - def NAME : sme_fp_outer_product_inst, SMEPseudo2Instr { +multiclass sme_outer_product_fp32 sz, ZPRRegOp zpr_ty, string mnemonic, SDPatternOperator op> { + def NAME : sme_fp_outer_product_inst, SMEPseudo2Instr { bits<2> ZAda; let Inst{1-0} = ZAda; let Inst{2} = 0b0; } - def NAME # _PSEUDO : sme_outer_product_pseudo, SMEPseudo2Instr; + def NAME # _PSEUDO : sme_outer_product_pseudo, SMEPseudo2Instr; def : SME_ZA_Tile_TwoPred_TwoVec_Pat; } multiclass sme_outer_product_fp64 { - def NAME : sme_fp_outer_product_inst, SMEPseudo2Instr { + def NAME : sme_fp_outer_product_inst, SMEPseudo2Instr { bits<3> ZAda; let Inst{2-0} = ZAda; } @@ -278,8 +278,8 @@ multiclass sme_outer_product_fp64 def : SME_ZA_Tile_TwoPred_TwoVec_Pat; } -multiclass sme2p1_fmop_tile_fp16{ - def NAME : sme_fp_outer_product_inst { +multiclass sme2p1_fmop_tile_fp16 op, ZPRRegOp zpr_ty>{ + def NAME : sme_fp_outer_product_inst { bits<1> ZAda; let Inst{2-1} = 0b00; let Inst{0} = ZAda; @@ -1449,7 +1449,7 @@ multiclass sme2_dot_mla_add_sub_array_vg4_single op, //===----------------------------------------------------------------------===// // SME2 multiple vectors ternary INT/FP two and four registers -class sme2_dot_mla_add_sub_array_vg2_multi op, +class sme2_dot_mla_add_sub_array_vg2_multi op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, string mnemonic> @@ -1463,20 +1463,19 @@ class sme2_dot_mla_add_sub_array_vg2_multi op, bits<2> Rv; bits<3> imm3; let Inst{31-23} = 0b110000011; - let Inst{22} = op{5}; //sz + let Inst{22} = op{6}; //sz let Inst{21} = 0b1; let Inst{20-17} = Zm; let Inst{16-15} = 0b00; let Inst{14-13} = Rv; - let Inst{12-10} = op{4-2}; + let Inst{12-10} = op{5-3}; let Inst{9-6} = Zn; - let Inst{5} = 0b0; - let Inst{4-3} = op{1-0}; + let Inst{5-3} = op{2-0}; let Inst{2-0} = imm3; let Constraints = "$ZAd = $_ZAd"; } -multiclass sme2_dot_mla_add_sub_array_vg2_multi op, +multiclass sme2_dot_mla_add_sub_array_vg2_multi op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ValueType zpr_ty, SDPatternOperator intrinsic> { @@ -1490,7 +1489,7 @@ multiclass sme2_dot_mla_add_sub_array_vg2_multi op, (!cast(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>; } -class sme2_dot_mla_add_sub_array_vg4_multi op, +class sme2_dot_mla_add_sub_array_vg4_multi op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, string mnemonic> @@ -1504,20 +1503,20 @@ class sme2_dot_mla_add_sub_array_vg4_multi op, bits<2> Rv; bits<3> imm3; let Inst{31-23} = 0b110000011; - let Inst{22} = op{5}; //sz + let Inst{22} = op{6}; //sz let Inst{21} = 0b1; let Inst{20-18} = Zm; let Inst{17-15} = 0b010; let Inst{14-13} = Rv; - let Inst{12-10} = op{4-2}; + let Inst{12-10} = op{5-3}; let Inst{9-7} = Zn; - let Inst{6-5} = 0b00; - let Inst{4-3} = op{1-0}; + let Inst{6} = 0b0; + let Inst{5-3} = op{2-0}; let Inst{2-0} = imm3; let Constraints = "$ZAd = $_ZAd"; } -multiclass sme2_dot_mla_add_sub_array_vg4_multi op, +multiclass sme2_dot_mla_add_sub_array_vg4_multi op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ValueType zpr_ty, SDPatternOperator intrinsic>{ @@ -1794,8 +1793,8 @@ class sme2_mla_long_array_index_base op0, bits<2> op, Operand index_ty, } multiclass sme2_mla_long_array_index op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_index_base, SMEPseudo2Instr { + def _HtoS : sme2_mla_long_array_index_base, SMEPseudo2Instr { bits<3> i3; bits<5> Zn; bits<3> imm; @@ -1805,9 +1804,9 @@ multiclass sme2_mla_long_array_index op0, bits<2> op, V let Inst{2-0} = imm; } - def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo; + def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo; - def : SME2_ZA_TwoOp_Multi_Index_Pat; + def : SME2_ZA_TwoOp_Multi_Index_Pat; } class sme2_mla_long_array_vg2_index op0, bits<2> op> @@ -1825,14 +1824,14 @@ class sme2_mla_long_array_vg2_index op0, bits<2> op> } multiclass sme2_fp_mla_long_array_vg2_index op, ValueType zpr_ty, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg2_index, SMEPseudo2Instr; + def _HtoS : sme2_mla_long_array_vg2_index, SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo; + def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo; - def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat; + def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; + (!cast(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; } multiclass sme2_int_mla_long_array_vg2_index op, SDPatternOperator intrinsic> { @@ -1861,33 +1860,35 @@ class sme2_mla_long_array_vg4_index op0, bits<2> op> } multiclass sme2_fp_mla_long_array_vg4_index op, ValueType zpr_ty, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg4_index, SMEPseudo2Instr; + def _HtoS : sme2_mla_long_array_vg4_index, SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo; + def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo; - def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat; + def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; + (!cast(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; } multiclass sme2_int_mla_long_array_vg4_index op, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg4_index, SMEPseudo2Instr; + def _HtoS : sme2_mla_long_array_vg4_index, SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo; + def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo; - def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat; + def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; + (!cast(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; } -class sme2_mla_long_arrayop0, bits<2> op, Operand index_ty, +class sme2_mla_long_arrayop0, bits<2> op, + MatrixOperand matrix_ty, + Operand index_ty, RegisterOperand first_vector_ty, RegisterOperand second_vector_ty, string mnemonic, string vg_acronym=""> - : I<(outs MatrixOp32:$ZAda), - (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, + : I<(outs matrix_ty:$ZAda), + (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm, first_vector_ty:$Zn, second_vector_ty:$Zm), mnemonic,"\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm", "", []> , Sched<[]> { @@ -1905,8 +1906,8 @@ class sme2_mla_long_arrayop0, bits<2> op, Operand index_ty, } multiclass sme2_mla_long_array_single op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array , SMEPseudo2Instr{ + def _HtoS : sme2_mla_long_array , SMEPseudo2Instr{ bits<4> Zm; bits<5> Zn; bits<3> imm; @@ -1916,15 +1917,15 @@ multiclass sme2_mla_long_array_single op0, bits<2> op, let Inst{2-0} = imm; } - def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo; + def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo; - def : SME2_ZA_TwoOp_Multi_Single_Pat; + def : SME2_ZA_TwoOp_Multi_Single_Pat; } -class sme2_mla_long_array_vg24_single op0, bit vg4, bits<2> op, - RegisterOperand first_vector_ty, - string mnemonic, string vg_acronym> - : sme2_mla_long_array op0, bit vg4, bits<2> op, bit o2, + MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, + ZPRRegOp zpr_ty, string mnemonic, string vg_acronym> + : sme2_mla_long_array { bits<4> Zm; bits<5> Zn; @@ -1932,96 +1933,117 @@ class sme2_mla_long_array_vg24_single op0, bit vg4, bits<2> op, let Inst{20} = vg4; let Inst{19-16} = Zm; let Inst{9-5} = Zn; - let Inst{2} = 0b0; + let Inst{2} = o2; let Inst{1-0} = imm; } -multiclass sme2_fp_mla_long_array_vg2_single op, ValueType zpr_ty, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg24_single<0b00, 0b0, op, ZZ_h, mnemonic, - "vgx2">, SMEPseudo2Instr; + +multiclass sme2_fp_mla_long_array_vg2_single op, MatrixOperand matrix_ty, + RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, + ValueType zpr_ty, SDPatternOperator intrinsic> { + def NAME : sme2_mla_long_array_vg24_single<0b00, 0b0, op{2-1}, op{0}, matrix_ty, multi_vector_ty, + vector_ty, mnemonic, "vgx2">, SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo; + def _PSEUDO : sme2_za_array_2op_multi_single_pseudo; - def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat; + def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>; + (!cast(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, + uimm2s2range:$imm, multi_vector_ty:$Zn, vector_ty:$Zm), 0>; } multiclass sme2_int_mla_long_array_vg2_single op, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg24_single<0b01, 0b0, op, ZZ_h, mnemonic, - "vgx2">, SMEPseudo2Instr; + def _HtoS : sme2_mla_long_array_vg24_single<0b01, 0b0, op, 0b0, MatrixOp32, ZZ_h, ZPR4b16, mnemonic, + "vgx2">, SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo; + def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo; - def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat; + def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>; + (!cast(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>; } -multiclass sme2_fp_mla_long_array_vg4_single op, ValueType zpr_ty, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg24_single<0b00, 0b1, op, ZZZZ_h, mnemonic, - "vgx4">, SMEPseudo2Instr; +multiclass sme2_fp_mla_long_array_vg4_single op, MatrixOperand matrix_ty, + RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, + ValueType zpr_ty, SDPatternOperator intrinsic> { + def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty, + vector_ty, mnemonic, "vgx4">, SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo; + def _PSEUDO : sme2_za_array_2op_multi_single_pseudo; - def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat; + def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>; + (!cast(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, + uimm2s2range:$imm, multi_vector_ty:$Zn, vector_ty:$Zm), 0>; } multiclass sme2_int_mla_long_array_vg4_single op, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg24_single<0b01, 0b1, op, ZZZZ_h, mnemonic, - "vgx4">, SMEPseudo2Instr; + def _HtoS : sme2_mla_long_array_vg24_single<0b01, 0b1, op, 0b0, MatrixOp32, ZZZZ_h, ZPR4b16, mnemonic, + "vgx4">, SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo; + def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo; - def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat; + def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>; + (!cast(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>; } -class sme2_mla_long_array_vg2_multi op0, bits<2> op> - : sme2_mla_long_array { + +class sme2_mla_long_array_vg2_multi op0, bits<3> op, + MatrixOperand matrix_ty, RegisterOperand multi_vector_ty> + : sme2_mla_long_array { bits<4> Zm; bits<4> Zn; bits<2> imm; let Inst{20-17} = Zm; let Inst{16} = 0b0; let Inst{9-6} = Zn; - let Inst{5} = 0b0; + let Inst{5} = op{2}; // fp8 let Inst{2} = 0b0; let Inst{1-0} = imm; } -multiclass sme2_fp_mla_long_array_vg2_multi op, ValueType zpr_ty, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg2_multi, SMEPseudo2Instr; +multiclass sme2_fp_mla_long_array_vg2_multi op, MatrixOperand matrix_ty, + RegisterOperand multi_vector_ty, + ValueType zpr_ty, SDPatternOperator intrinsic> { + + def NAME : sme2_mla_long_array_vg2_multi, + SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo; + def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo; - def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat; + def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>; + (!cast(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, + uimm2s2range:$imm, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>; } multiclass sme2_int_mla_long_array_vg2_multi op, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg2_multi, SMEPseudo2Instr; + def _HtoS : sme2_mla_long_array_vg2_multi, + SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo; + def _HtoS_PSEUDO : sme2_za_array_2op_multi_multi_pseudo; - def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat; + def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>; + (!cast(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>; } -class sme2_mla_long_array_vg4_multi op0, bits<2> op> - : sme2_mla_long_array { +class sme2_mla_long_array_vg4_multi op0, bits<3> op, + MatrixOperand matrix_ty, + RegisterOperand multi_vector_ty> + : sme2_mla_long_array { bits<3> Zm; bits<3> Zn; bits<2> imm; @@ -2029,31 +2051,37 @@ class sme2_mla_long_array_vg4_multi op0, bits<2> op> let Inst{17} = 0b0; let Inst{16} = 0b1; let Inst{9-7} = Zn; - let Inst{6-5} = 0b00; + let Inst{6} = 0b0; + let Inst{5} = op{2}; //fp8 let Inst{2} = 0b0; let Inst{1-0} = imm; } -multiclass sme2_fp_mla_long_array_vg4_multi op, ValueType zpr_ty, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg4_multi, SMEPseudo2Instr; +multiclass sme2_fp_mla_long_array_vg4_multi op, MatrixOperand matrix_ty, + RegisterOperand multi_vector_ty, ValueType zpr_ty, + SDPatternOperator intrinsic> { + def NAME : sme2_mla_long_array_vg4_multi, + SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo; + def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo; - def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat; + def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>; + (!cast(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, + uimm2s2range:$imm, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>; } multiclass sme2_int_mla_long_array_vg4_multi op, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg4_multi, SMEPseudo2Instr; + def _HtoS : sme2_mla_long_array_vg4_multi, + SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo; + def _HtoS_PSEUDO : sme2_za_array_2op_multi_multi_pseudo; - def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat; + def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>; + (!cast(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>; } //===----------------------------------------------------------------------===// @@ -2344,7 +2372,7 @@ multiclass sme2_zip_vector_vg2 { //===----------------------------------------------------------------------===// // SME2 Dot Products and MLA -class sme2_multi_vec_array_vg2_index op, MatrixOperand matrix_ty, +class sme2_multi_vec_array_vg2_index sz, bits<6> op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, Operand index_ty, string mnemonic> @@ -2357,8 +2385,8 @@ class sme2_multi_vec_array_vg2_index op, MatrixOperand matrix_ty bits<2> Rv; bits<4> Zn; bits<3> imm3; - let Inst{31-23} = 0b110000010; - let Inst{22} = sz; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; let Inst{21-20} = 0b01; let Inst{19-16} = Zm; let Inst{15} = 0b0; @@ -2372,11 +2400,11 @@ class sme2_multi_vec_array_vg2_index op, MatrixOperand matrix_ty } // SME2 multi-vec ternary indexed two registers 32-bit -multiclass sme2_multi_vec_array_vg2_index_32b op, +multiclass sme2_multi_vec_array_vg2_index_32b sz, bits<4> op, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, ValueType vt, SDPatternOperator intrinsic> { - def NAME : sme2_multi_vec_array_vg2_index<0b1, {op{3},?,?,op{2-0}}, MatrixOp32, multi_vector_ty, vector_ty, + def NAME : sme2_multi_vec_array_vg2_index, SMEPseudo2Instr { bits<2> i; let Inst{11-10} = i; @@ -2392,9 +2420,10 @@ multiclass sme2_multi_vec_array_vg2_index_32b op, } // SME2.1 multi-vec ternary indexed two registers 16-bit -multiclass sme2p1_multi_vec_array_vg2_index_16b op> { - def NAME : sme2_multi_vec_array_vg2_index<0b0, {0b1,?,?,op,?}, MatrixOp16, - ZZ_h_mul_r, ZPR4b16, +multiclass sme2p1_multi_vec_array_vg2_index_16b sz, bits<3> op, + RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty> { + def NAME : sme2_multi_vec_array_vg2_index { bits<3> i; let Inst{11-10} = i{2-1}; @@ -2402,7 +2431,7 @@ multiclass sme2p1_multi_vec_array_vg2_index_16b op> { } def : InstAlias(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, - ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH:$i), 0>; + multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>; } // SME2 multi-vec ternary indexed two registers 64-bit @@ -2451,7 +2480,7 @@ multiclass sme2_multi_vec_array_vg2_index_64b op, multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>; } -class sme2_multi_vec_array_vg4_index op, MatrixOperand matrix_ty, +class sme2_multi_vec_array_vg4_index op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, Operand index_ty, string mnemonic> @@ -2470,10 +2499,9 @@ class sme2_multi_vec_array_vg4_index op, MatrixOperand matrix_ty let Inst{19-16} = Zm; let Inst{15} = 0b1; let Inst{14-13} = Rv; - let Inst{12-10} = op{5-3}; + let Inst{12-10} = op{6-4}; let Inst{9-7} = Zn; - let Inst{6} = 0b0; - let Inst{5-3} = op{2-0}; + let Inst{6-3} = op{3-0}; let Inst{2-0} = imm3; let Constraints = "$ZAda = $_ZAda"; @@ -2484,7 +2512,7 @@ multiclass sme2_multi_vec_array_vg4_index_32b op, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, ValueType vt, SDPatternOperator intrinsic> { - def NAME : sme2_multi_vec_array_vg4_index<0b1, {op{3},?,?,op{2-0}}, MatrixOp32, multi_vector_ty, + def NAME : sme2_multi_vec_array_vg4_index<0b1, {op{3},?,?,0b0, op{2-0}}, MatrixOp32, multi_vector_ty, vector_ty, VectorIndexS32b_timm, mnemonic>, SMEPseudo2Instr { bits<2> i; let Inst{11-10} = i; @@ -2500,9 +2528,11 @@ multiclass sme2_multi_vec_array_vg4_index_32b op, } // SME2.1 multi-vec ternary indexed four registers 16-bit -multiclass sme2p1_multi_vec_array_vg4_index_16b op> { +multiclass sme2p1_multi_vec_array_vg4_index_16b op, + RegisterOperand multi_vector_ty, + ZPRRegOp zpr_ty> { def NAME : sme2_multi_vec_array_vg4_index<0b0,{0b1,?,?,op,?}, MatrixOp16, - ZZZZ_h_mul_r, ZPR4b16, + multi_vector_ty, zpr_ty, VectorIndexH, mnemonic>{ bits<3> i; let Inst{11-10} = i{2-1}; @@ -2511,7 +2541,7 @@ multiclass sme2p1_multi_vec_array_vg4_index_16b op> { def : InstAlias(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, - sme_elm_idx0_7:$imm3, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH:$i), 0>; + sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>; } // SME2 multi-vec ternary indexed four registers 64-bit @@ -2561,7 +2591,7 @@ multiclass sme2_multi_vec_array_vg4_index_64b op, } //===----------------------------------------------------------------------===// // SME2 multi-vec indexed long long MLA one source 32-bit -class sme2_mla_ll_array_index_32b op> +class sme2_mla_ll_array_index_32b sz, bits<3> op> : I<(outs MatrixOp32:$ZAda), (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i", @@ -2571,7 +2601,9 @@ class sme2_mla_ll_array_index_32b op> bits<4> i; bits<5> Zn; bits<2> imm2; - let Inst{31-20} = 0b110000010000; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21-20} = 0b00; let Inst{19-16} = Zm; let Inst{15} = i{3}; let Inst{14-13} = Rv; @@ -2583,8 +2615,8 @@ class sme2_mla_ll_array_index_32b op> let Constraints = "$ZAda = $_ZAda"; } -multiclass sme2_mla_ll_array_index_32b op, SDPatternOperator intrinsic> { - def NAME : sme2_mla_ll_array_index_32b, SMEPseudo2Instr; +multiclass sme2_mla_ll_array_index_32b sz, bits<3> op, SDPatternOperator intrinsic> { + def NAME : sme2_mla_ll_array_index_32b, SMEPseudo2Instr; def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; @@ -2625,7 +2657,7 @@ multiclass sme2_mla_ll_array_index_64b op, SDPatternOpe def : SME2_ZA_TwoOp_Multi_Index_Pat; } -class sme2_mla_ll_array_vg24_index_32b op, +class sme2_mla_ll_array_vg24_index_32b sz, bit vg4, bits<3> op, RegisterOperand vector_ty, string mnemonic> : I<(outs MatrixOp32:$ZAda), @@ -2637,7 +2669,9 @@ class sme2_mla_ll_array_vg24_index_32b op, bits<2> Rv; bits<4> i; bit imm; - let Inst{31-20} = 0b110000010001; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21-20} = 0b01; let Inst{19-16} = Zm; let Inst{15} = vg4; let Inst{14-13} = Rv; @@ -2652,8 +2686,8 @@ class sme2_mla_ll_array_vg24_index_32b op, //SME2 multi-vec indexed long long MLA two sources 32-bit -multiclass sme2_mla_ll_array_vg2_index_32b op, SDPatternOperator intrinsic> { - def NAME: sme2_mla_ll_array_vg24_index_32b<0b0, op, ZZ_b_mul_r, mnemonic>, SMEPseudo2Instr { +multiclass sme2_mla_ll_array_vg2_index_32b sz, bits<3> op, SDPatternOperator intrinsic> { + def NAME: sme2_mla_ll_array_vg24_index_32b, SMEPseudo2Instr { bits<4> Zn; let Inst{9-6} = Zn; } @@ -2668,11 +2702,11 @@ multiclass sme2_mla_ll_array_vg2_index_32b op, SDPatter // SME2 multi-vec indexed long long MLA four sources 32-bit -multiclass sme2_mla_ll_array_vg4_index_32b op, SDPatternOperator intrinsic> { - def NAME: sme2_mla_ll_array_vg24_index_32b<0b1, op, ZZZZ_b_mul_r, mnemonic>, SMEPseudo2Instr { +multiclass sme2_mla_ll_array_vg4_index_32b sz, bits<4> op, SDPatternOperator intrinsic> { + def NAME: sme2_mla_ll_array_vg24_index_32b, SMEPseudo2Instr { bits<3> Zn; let Inst{9-7} = Zn; - let Inst{6} = 0b0; + let Inst{6} = op{3}; } def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; @@ -2744,7 +2778,7 @@ multiclass sme2_mla_ll_array_vg4_index_64b op, SDPatter //SME2 multiple and single vector long long FMA one source -class sme2_mla_ll_array_single op, +class sme2_mla_ll_array_single op, MatrixOperand matrix_ty, ZPRRegOp vector_ty, ZPRRegOp zpr_ty> : I<(outs matrix_ty:$ZAda), @@ -2757,8 +2791,9 @@ class sme2_mla_ll_array_single op, bits<5> Zn; bits<2> imm; let Inst{31-23} = 0b110000010; - let Inst{22} = op{3}; //sz - let Inst{21-20} = 0b10; + let Inst{22} = op{4}; //sz + let Inst{21} = 0b1; + let Inst{20} = op{3}; //fp8 let Inst{19-16} = Zm; let Inst{15} = 0b0; let Inst{14-13} = Rv; @@ -2770,7 +2805,7 @@ class sme2_mla_ll_array_single op, let Constraints = "$ZAda = $_ZAda"; } -multiclass sme2_mla_ll_array_single op, +multiclass sme2_mla_ll_array_single op, MatrixOperand matrix_ty, ZPRRegOp vector_ty, ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> { def NAME : sme2_mla_ll_array_single, SMEPseudo2Instr; @@ -2780,29 +2815,28 @@ multiclass sme2_mla_ll_array_single op, def : SME2_ZA_TwoOp_Multi_Single_Pat; } -class sme2_mla_ll_array_vg24_single op, MatrixOperand matrix_ty, +class sme2_mla_ll_array_vg24_single op, MatrixOperand matrix_ty, RegisterOperand vector_ty, ZPRRegOp zpr_ty, string mnemonic> : I<(outs matrix_ty:$ZAda), (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, zpr_ty:$Zm), - mnemonic, "\t$ZAda[$Rv, $imm, " # !if(op{3}, "vgx4", "vgx2") # "], $Zn, $Zm", + mnemonic, "\t$ZAda[$Rv, $imm, " # !if(op{4}, "vgx4", "vgx2") # "], $Zn, $Zm", "", []>, Sched<[]> { bits<4> Zm; bits<2> Rv; bits<5> Zn; bit imm; let Inst{31-23} = 0b110000010; - let Inst{22} = op{4}; //sz + let Inst{22} = op{5}; //sz let Inst{21} = 0b1; - let Inst{20} = op{3}; //vg4 + let Inst{20} = op{4}; //vg4 let Inst{19-16} = Zm; let Inst{15} = 0b0; let Inst{14-13} = Rv; let Inst{12-10} = 0b000; let Inst{9-5} = Zn; - let Inst{4-2} = op{2-0}; - let Inst{1} = 0b0; + let Inst{4-1} = op{3-0}; let Inst{0} = imm; let Constraints = "$ZAda = $_ZAda"; @@ -2810,7 +2844,7 @@ class sme2_mla_ll_array_vg24_single op, MatrixOperand matrix_ty, //SME2 single-multi long long MLA two and four sources -multiclass sme2_mla_ll_array_vg24_single op, +multiclass sme2_mla_ll_array_vg24_single op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty> { @@ -2828,7 +2862,7 @@ multiclass sme2_mla_ll_array_vg2_single op, RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> { - defm NAME: sme2_mla_ll_array_vg24_single; + defm NAME: sme2_mla_ll_array_vg24_single; def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat; } @@ -2837,14 +2871,14 @@ multiclass sme2_mla_ll_array_vg4_single op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> { - defm NAME: sme2_mla_ll_array_vg24_single; + defm NAME: sme2_mla_ll_array_vg24_single; def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat; } // SME2 multiple vectors long long MLA two sources -class sme2_mla_ll_array_vg2_multi op, MatrixOperand matrix_ty, +class sme2_mla_ll_array_vg2_multi op, MatrixOperand matrix_ty, RegisterOperand vector_ty,string mnemonic> : I<(outs matrix_ty:$ZAda), (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, @@ -2856,22 +2890,21 @@ class sme2_mla_ll_array_vg2_multi op, MatrixOperand matrix_ty, bits<4> Zn; bit imm; let Inst{31-23} = 0b110000011; - let Inst{22} = op{3}; // sz + let Inst{22} = op{4}; // sz let Inst{21} = 0b1; let Inst{20-17} = Zm; let Inst{16-15} = 0b00; let Inst{14-13} = Rv; let Inst{12-10} = 0b000; let Inst{9-6} = Zn; - let Inst{5} = 0b0; - let Inst{4-2} = op{2-0}; + let Inst{5-2} = op{3-0}; let Inst{1} = 0b0; let Inst{0} = imm; let Constraints = "$ZAda = $_ZAda"; } -multiclass sme2_mla_ll_array_vg2_multi op, +multiclass sme2_mla_ll_array_vg2_multi op, MatrixOperand matrix_ty, RegisterOperand vector_ty, ValueType vt, SDPatternOperator intrinsic> { @@ -2887,7 +2920,7 @@ multiclass sme2_mla_ll_array_vg2_multi op, // SME2 multiple vectors long long MLA four sources -class sme2_mla_ll_array_vg4_multi op,MatrixOperand matrix_ty, +class sme2_mla_ll_array_vg4_multi op,MatrixOperand matrix_ty, RegisterOperand vector_ty, string mnemonic> : I<(outs matrix_ty:$ZAda), @@ -2900,22 +2933,22 @@ class sme2_mla_ll_array_vg4_multi op,MatrixOperand matrix_ty, bits<3> Zn; bit imm; let Inst{31-23} = 0b110000011; - let Inst{22} = op{3}; // sz + let Inst{22} = op{4}; // sz let Inst{21} = 0b1; let Inst{20-18} = Zm; let Inst{17-15} = 0b010; let Inst{14-13} = Rv; let Inst{12-10} = 0b000; let Inst{9-7} = Zn; - let Inst{6-5} = 0b00; - let Inst{4-2} = op{2-0}; + let Inst{6} = 0b0; + let Inst{5-2} = op{3-0}; let Inst{1} = 0b0; let Inst{0} = imm; let Constraints = "$ZAda = $_ZAda"; } -multiclass sme2_mla_ll_array_vg4_multi op, +multiclass sme2_mla_ll_array_vg4_multi op, MatrixOperand matrix_ty, RegisterOperand vector_ty, ValueType vt, SDPatternOperator intrinsic> { @@ -2985,7 +3018,7 @@ class sme2_spill_fill_vector opc> // SME2 move to/from lookup table class sme2_movt_zt_to_scalar opc> : I<(outs GPR64:$Rt), (ins ZTR:$ZTt, uimm3s8:$imm3), - mnemonic, "\t$Rt, $ZTt$imm3", + mnemonic, "\t$Rt, $ZTt[$imm3]", "", []>, Sched<[]> { bits<3> imm3; bits<5> Rt; @@ -2997,7 +3030,7 @@ class sme2_movt_zt_to_scalar opc> class sme2_movt_scalar_to_zt opc> : I<(outs ZTR:$ZTt), (ins uimm3s8:$imm3, GPR64:$Rt), - mnemonic, "\t$ZTt$imm3, $Rt", + mnemonic, "\t$ZTt[$imm3], $Rt", "", []>, Sched<[]> { bits<3> imm3; bits<5> Rt; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index d56540acf7ae5..7bb457d918821 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -8721,8 +8721,8 @@ multiclass sve2_crypto_unary_op { // SVE BFloat16 Group //===----------------------------------------------------------------------===// -class sve_float_dot -: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm), +class sve_float_dot +: I<(outs dst_ty:$Zda), (ins dst_ty:$_Zda, src_ty:$Zn, src_ty:$Zm), asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> { bits<5> Zda; bits<5> Zn; @@ -8731,7 +8731,8 @@ class sve_float_dot let Inst{22} = bf; let Inst{21} = 0b1; let Inst{20-16} = Zm; - let Inst{15-10} = 0b100000; + let Inst{15-11} = 0b10000; + let Inst{10} = o2; let Inst{9-5} = Zn; let Inst{4-0} = Zda; @@ -8741,24 +8742,24 @@ class sve_float_dot let mayRaiseFPException = 1; } -multiclass sve_float_dot { - def NAME : sve_float_dot; +multiclass sve_float_dot { + def NAME : sve_float_dot; def : SVE_3_Op_Pat(NAME)>; } -class sve_float_dot_indexed -: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS32b:$iop), +class sve_float_dot_indexed +: I<(outs dst_ty:$Zda), (ins dst_ty:$_Zda, src1_ty:$Zn, src2_ty:$Zm, iop_ty:$iop), asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> { bits<5> Zda; bits<5> Zn; bits<3> Zm; - bits<2> iop; let Inst{31-23} = 0b011001000; let Inst{22} = bf; let Inst{21} = 0b1; - let Inst{20-19} = iop; let Inst{18-16} = Zm; - let Inst{15-10} = 0b010000; + let Inst{15-12} = 0b0100; let Inst{9-5} = Zn; let Inst{4-0} = Zda; @@ -8768,8 +8769,14 @@ class sve_float_dot_indexed let mayRaiseFPException = 1; } -multiclass sve_float_dot_indexed { - def NAME : sve_float_dot_indexed; +multiclass sve_float_dot_indexed opc, ZPRRegOp src1_ty, + ZPRRegOp src2_ty, string asm, ValueType InVT, + SDPatternOperator op> { + def NAME : sve_float_dot_indexed { + bits<2> iop; + let Inst{20-19} = iop; + let Inst{11-10} = opc; + } def : SVE_4_Op_Imm_Pat(NAME)>; } From d2aa523f2a2efcffbc0485b3958c0cab772051b3 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 13 Oct 2023 13:30:38 +0000 Subject: [PATCH 078/720] [gn build] Port 2cea1babefbb --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index b57a35396293c..0833f4c033d35 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -1046,7 +1046,6 @@ if (current_toolchain == default_toolchain) { "scoped_allocator", "semaphore", "set", - "setjmp.h", "shared_mutex", "span", "sstream", From b22917e6e2a0aec05474f58e64b7e87d1ea0a054 Mon Sep 17 00:00:00 2001 From: XChy Date: Fri, 13 Oct 2023 22:02:57 +0800 Subject: [PATCH 079/720] [InstCombine] Fold Ext(i1) Pred shr(A, BW - 1) => i1 Pred A s< 0 (#68244) Resolves #67916 . This patch folds `Ext(icmp (A, xxx)) Pred shr(A, BW - 1)` into `i1 Pred A s< 0`. [Alive2](https://alive2.llvm.org/ce/z/k53Xwa). --- .../InstCombine/InstCombineCompares.cpp | 56 +++--- llvm/test/Transforms/InstCombine/icmp-shr.ll | 161 +++++++++++++----- .../InstCombine/icmp-xor-signbit.ll | 104 +++++++++++ 3 files changed, 249 insertions(+), 72 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index aa18c7e73ad5f..66e2b6c72cce4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5390,35 +5390,6 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) { return new ICmpInst(Pred, A, Builder.CreateTrunc(B, A->getType())); } - // Test if 2 values have different or same signbits: - // (X u>> BitWidth - 1) == zext (Y s> -1) --> (X ^ Y) < 0 - // (X u>> BitWidth - 1) != zext (Y s> -1) --> (X ^ Y) > -1 - // (X s>> BitWidth - 1) == sext (Y s> -1) --> (X ^ Y) < 0 - // (X s>> BitWidth - 1) != sext (Y s> -1) --> (X ^ Y) > -1 - Instruction *ExtI; - if (match(Op1, m_CombineAnd(m_Instruction(ExtI), m_ZExtOrSExt(m_Value(A)))) && - (Op0->hasOneUse() || Op1->hasOneUse())) { - unsigned OpWidth = Op0->getType()->getScalarSizeInBits(); - Instruction *ShiftI; - Value *X, *Y; - ICmpInst::Predicate Pred2; - if (match(Op0, m_CombineAnd(m_Instruction(ShiftI), - m_Shr(m_Value(X), - m_SpecificIntAllowUndef(OpWidth - 1)))) && - match(A, m_ICmp(Pred2, m_Value(Y), m_AllOnes())) && - Pred2 == ICmpInst::ICMP_SGT && X->getType() == Y->getType()) { - unsigned ExtOpc = ExtI->getOpcode(); - unsigned ShiftOpc = ShiftI->getOpcode(); - if ((ExtOpc == Instruction::ZExt && ShiftOpc == Instruction::LShr) || - (ExtOpc == Instruction::SExt && ShiftOpc == Instruction::AShr)) { - Value *Xor = Builder.CreateXor(X, Y, "xor.signbits"); - Value *R = (Pred == ICmpInst::ICMP_EQ) ? Builder.CreateIsNeg(Xor) - : Builder.CreateIsNotNeg(Xor); - return replaceInstUsesWith(I, R); - } - } - } - // (A >> C) == (B >> C) --> (A^B) u< (1 << C) // For lshr and ashr pairs. const APInt *AP1, *AP2; @@ -7194,6 +7165,33 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { if (Instruction *R = processUMulZExtIdiom(I, Op1, Op0, *this)) return R; } + + Value *X, *Y; + // Signbit test folds + // Fold (X u>> BitWidth - 1 Pred ZExt(i1)) --> X s< 0 Pred i1 + // Fold (X s>> BitWidth - 1 Pred SExt(i1)) --> X s< 0 Pred i1 + Instruction *ExtI; + if ((I.isUnsigned() || I.isEquality()) && + match(Op1, + m_CombineAnd(m_Instruction(ExtI), m_ZExtOrSExt(m_Value(Y)))) && + Y->getType()->getScalarSizeInBits() == 1 && + (Op0->hasOneUse() || Op1->hasOneUse())) { + unsigned OpWidth = Op0->getType()->getScalarSizeInBits(); + Instruction *ShiftI; + if (match(Op0, m_CombineAnd(m_Instruction(ShiftI), + m_Shr(m_Value(X), m_SpecificIntAllowUndef( + OpWidth - 1))))) { + unsigned ExtOpc = ExtI->getOpcode(); + unsigned ShiftOpc = ShiftI->getOpcode(); + if ((ExtOpc == Instruction::ZExt && ShiftOpc == Instruction::LShr) || + (ExtOpc == Instruction::SExt && ShiftOpc == Instruction::AShr)) { + Value *SLTZero = + Builder.CreateICmpSLT(X, Constant::getNullValue(X->getType())); + Value *Cmp = Builder.CreateICmp(Pred, SLTZero, Y, I.getName()); + return replaceInstUsesWith(I, Cmp); + } + } + } } if (Instruction *Res = foldICmpEquality(I)) diff --git a/llvm/test/Transforms/InstCombine/icmp-shr.ll b/llvm/test/Transforms/InstCombine/icmp-shr.ll index f4dfa2edfa177..1067897420705 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shr.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shr.ll @@ -1302,9 +1302,9 @@ define i1 @lshr_neg_sgt_zero(i8 %x) { define i1 @exactly_one_set_signbit(i8 %x, i8 %y) { ; CHECK-LABEL: @exactly_one_set_signbit( -; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[XOR_SIGNBITS]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i8 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[TMP2]] ; %xsign = lshr i8 %x, 7 %ypos = icmp sgt i8 %y, -1 @@ -1317,9 +1317,9 @@ define i1 @exactly_one_set_signbit_use1(i8 %x, i8 %y) { ; CHECK-LABEL: @exactly_one_set_signbit_use1( ; CHECK-NEXT: [[XSIGN:%.*]] = lshr i8 [[X:%.*]], 7 ; CHECK-NEXT: call void @use(i8 [[XSIGN]]) -; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor i8 [[X]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[XOR_SIGNBITS]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i8 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[TMP2]] ; %xsign = lshr i8 %x, 7 call void @use(i8 %xsign) @@ -1331,9 +1331,9 @@ define i1 @exactly_one_set_signbit_use1(i8 %x, i8 %y) { define <2 x i1> @same_signbit(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @same_signbit( -; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i8> [[XOR_SIGNBITS]], -; CHECK-NEXT: ret <2 x i1> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R1:%.*]] = icmp sgt <2 x i8> [[TMP1]], +; CHECK-NEXT: ret <2 x i1> [[R1]] ; %xsign = lshr <2 x i8> %x, %ypos = icmp sgt <2 x i8> %y, @@ -1347,9 +1347,9 @@ define i1 @same_signbit_use2(i8 %x, i8 %y) { ; CHECK-NEXT: [[YPOS:%.*]] = icmp sgt i8 [[Y:%.*]], -1 ; CHECK-NEXT: [[YPOSZ:%.*]] = zext i1 [[YPOS]] to i8 ; CHECK-NEXT: call void @use(i8 [[YPOSZ]]) -; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor i8 [[X:%.*]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt i8 [[XOR_SIGNBITS]], -1 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X:%.*]], [[Y]] +; CHECK-NEXT: [[R1:%.*]] = icmp sgt i8 [[TMP1]], -1 +; CHECK-NEXT: ret i1 [[R1]] ; %xsign = lshr i8 %x, 7 %ypos = icmp sgt i8 %y, -1 @@ -1382,9 +1382,10 @@ define i1 @same_signbit_use3(i8 %x, i8 %y) { define <2 x i1> @same_signbit_poison_elts(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @same_signbit_poison_elts( -; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i8> [[XOR_SIGNBITS]], -; CHECK-NEXT: ret <2 x i1> [[R]] +; CHECK-NEXT: [[YPOS:%.*]] = icmp sgt <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer +; CHECK-NEXT: [[R1:%.*]] = xor <2 x i1> [[TMP1]], [[YPOS]] +; CHECK-NEXT: ret <2 x i1> [[R1]] ; %xsign = lshr <2 x i8> %x, %ypos = icmp sgt <2 x i8> %y, @@ -1397,11 +1398,10 @@ define <2 x i1> @same_signbit_poison_elts(<2 x i8> %x, <2 x i8> %y) { define i1 @same_signbit_wrong_type(i8 %x, i32 %y) { ; CHECK-LABEL: @same_signbit_wrong_type( -; CHECK-NEXT: [[XSIGN:%.*]] = lshr i8 [[X:%.*]], 7 ; CHECK-NEXT: [[YPOS:%.*]] = icmp sgt i32 [[Y:%.*]], -1 -; CHECK-NEXT: [[YPOSZ:%.*]] = zext i1 [[YPOS]] to i8 -; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[XSIGN]], [[YPOSZ]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 0 +; CHECK-NEXT: [[R1:%.*]] = xor i1 [[TMP1]], [[YPOS]] +; CHECK-NEXT: ret i1 [[R1]] ; %xsign = lshr i8 %x, 7 %ypos = icmp sgt i32 %y, -1 @@ -1450,11 +1450,9 @@ define i1 @exactly_one_set_signbit_wrong_shr(i8 %x, i8 %y) { define i1 @exactly_one_set_signbit_wrong_pred(i8 %x, i8 %y) { ; CHECK-LABEL: @exactly_one_set_signbit_wrong_pred( -; CHECK-NEXT: [[XSIGN:%.*]] = lshr i8 [[X:%.*]], 7 -; CHECK-NEXT: [[YPOS:%.*]] = icmp sgt i8 [[Y:%.*]], -1 -; CHECK-NEXT: [[YPOSZ:%.*]] = zext i1 [[YPOS]] to i8 -; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[XSIGN]], [[YPOSZ]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[R1:%.*]] = icmp slt i8 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[R1]] ; %xsign = lshr i8 %x, 7 %ypos = icmp sgt i8 %y, -1 @@ -1465,9 +1463,9 @@ define i1 @exactly_one_set_signbit_wrong_pred(i8 %x, i8 %y) { define i1 @exactly_one_set_signbit_signed(i8 %x, i8 %y) { ; CHECK-LABEL: @exactly_one_set_signbit_signed( -; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[XOR_SIGNBITS]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i8 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[TMP2]] ; %xsign = ashr i8 %x, 7 %ypos = icmp sgt i8 %y, -1 @@ -1480,9 +1478,9 @@ define i1 @exactly_one_set_signbit_use1_signed(i8 %x, i8 %y) { ; CHECK-LABEL: @exactly_one_set_signbit_use1_signed( ; CHECK-NEXT: [[XSIGN:%.*]] = ashr i8 [[X:%.*]], 7 ; CHECK-NEXT: call void @use(i8 [[XSIGN]]) -; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor i8 [[X]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[XOR_SIGNBITS]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i8 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[TMP2]] ; %xsign = ashr i8 %x, 7 call void @use(i8 %xsign) @@ -1494,9 +1492,9 @@ define i1 @exactly_one_set_signbit_use1_signed(i8 %x, i8 %y) { define <2 x i1> @same_signbit_signed(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @same_signbit_signed( -; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i8> [[XOR_SIGNBITS]], -; CHECK-NEXT: ret <2 x i1> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R1:%.*]] = icmp sgt <2 x i8> [[TMP1]], +; CHECK-NEXT: ret <2 x i1> [[R1]] ; %xsign = ashr <2 x i8> %x, %ypos = icmp sgt <2 x i8> %y, @@ -1510,9 +1508,9 @@ define i1 @same_signbit_use2_signed(i8 %x, i8 %y) { ; CHECK-NEXT: [[YPOS:%.*]] = icmp sgt i8 [[Y:%.*]], -1 ; CHECK-NEXT: [[YPOSZ:%.*]] = sext i1 [[YPOS]] to i8 ; CHECK-NEXT: call void @use(i8 [[YPOSZ]]) -; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor i8 [[X:%.*]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt i8 [[XOR_SIGNBITS]], -1 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X:%.*]], [[Y]] +; CHECK-NEXT: [[R1:%.*]] = icmp sgt i8 [[TMP1]], -1 +; CHECK-NEXT: ret i1 [[R1]] ; %xsign = ashr i8 %x, 7 %ypos = icmp sgt i8 %y, -1 @@ -1545,9 +1543,10 @@ define i1 @same_signbit_use3_signed(i8 %x, i8 %y) { define <2 x i1> @same_signbit_poison_elts_signed(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @same_signbit_poison_elts_signed( -; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i8> [[XOR_SIGNBITS]], -; CHECK-NEXT: ret <2 x i1> [[R]] +; CHECK-NEXT: [[YPOS:%.*]] = icmp sgt <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer +; CHECK-NEXT: [[R1:%.*]] = xor <2 x i1> [[TMP1]], [[YPOS]] +; CHECK-NEXT: ret <2 x i1> [[R1]] ; %xsign = ashr <2 x i8> %x, %ypos = icmp sgt <2 x i8> %y, @@ -1560,11 +1559,10 @@ define <2 x i1> @same_signbit_poison_elts_signed(<2 x i8> %x, <2 x i8> %y) { define i1 @same_signbit_wrong_type_signed(i8 %x, i32 %y) { ; CHECK-LABEL: @same_signbit_wrong_type_signed( -; CHECK-NEXT: [[XSIGN:%.*]] = ashr i8 [[X:%.*]], 7 ; CHECK-NEXT: [[YPOS:%.*]] = icmp sgt i32 [[Y:%.*]], -1 -; CHECK-NEXT: [[YPOSZ:%.*]] = sext i1 [[YPOS]] to i8 -; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[XSIGN]], [[YPOSZ]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 0 +; CHECK-NEXT: [[R1:%.*]] = xor i1 [[TMP1]], [[YPOS]] +; CHECK-NEXT: ret i1 [[R1]] ; %xsign = ashr i8 %x, 7 %ypos = icmp sgt i32 %y, -1 @@ -1589,3 +1587,80 @@ define i1 @exactly_one_set_signbit_wrong_shamt_signed(i8 %x, i8 %y) { %r = icmp eq i8 %xsign, %yposz ret i1 %r } + +define i1 @slt_zero_ult_i1(i32 %a, i1 %b) { +; CHECK-LABEL: @slt_zero_ult_i1( +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[A:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[B:%.*]], true +; CHECK-NEXT: [[CMP21:%.*]] = and i1 [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret i1 [[CMP21]] +; + %conv = zext i1 %b to i32 + %cmp1 = lshr i32 %a, 31 + %cmp2 = icmp ult i32 %conv, %cmp1 + ret i1 %cmp2 +} + +define i1 @slt_zero_ult_i1_fail1(i32 %a, i1 %b) { +; CHECK-LABEL: @slt_zero_ult_i1_fail1( +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[B:%.*]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = lshr i32 [[A:%.*]], 30 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[CMP1]], [[CONV]] +; CHECK-NEXT: ret i1 [[CMP2]] +; + %conv = zext i1 %b to i32 + %cmp1 = lshr i32 %a, 30 + %cmp2 = icmp ult i32 %conv, %cmp1 + ret i1 %cmp2 +} + +define i1 @slt_zero_ult_i1_fail2(i32 %a, i1 %b) { +; CHECK-LABEL: @slt_zero_ult_i1_fail2( +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[B:%.*]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = ashr i32 [[A:%.*]], 31 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[CMP1]], [[CONV]] +; CHECK-NEXT: ret i1 [[CMP2]] +; + %conv = zext i1 %b to i32 + %cmp1 = ashr i32 %a, 31 + %cmp2 = icmp ult i32 %conv, %cmp1 + ret i1 %cmp2 +} + +define i1 @slt_zero_slt_i1_fail(i32 %a, i1 %b) { +; CHECK-LABEL: @slt_zero_slt_i1_fail( +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[A:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[B:%.*]], true +; CHECK-NEXT: [[CMP21:%.*]] = and i1 [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret i1 [[CMP21]] +; + %conv = zext i1 %b to i32 + %cmp1 = lshr i32 %a, 31 + %cmp2 = icmp slt i32 %conv, %cmp1 + ret i1 %cmp2 +} + +define i1 @slt_zero_eq_i1_signed(i32 %a, i1 %b) { +; CHECK-LABEL: @slt_zero_eq_i1_signed( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[A:%.*]], -1 +; CHECK-NEXT: [[CMP21:%.*]] = xor i1 [[TMP1]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP21]] +; + %conv = sext i1 %b to i32 + %cmp1 = ashr i32 %a, 31 + %cmp2 = icmp eq i32 %conv, %cmp1 + ret i1 %cmp2 +} + +define i1 @slt_zero_eq_i1_fail_signed(i32 %a, i1 %b) { +; CHECK-LABEL: @slt_zero_eq_i1_fail_signed( +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[B:%.*]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = lshr i32 [[A:%.*]], 31 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[CMP1]], [[CONV]] +; CHECK-NEXT: ret i1 [[CMP2]] +; + %conv = sext i1 %b to i32 + %cmp1 = lshr i32 %a, 31 + %cmp2 = icmp eq i32 %conv, %cmp1 + ret i1 %cmp2 +} diff --git a/llvm/test/Transforms/InstCombine/icmp-xor-signbit.ll b/llvm/test/Transforms/InstCombine/icmp-xor-signbit.ll index 29a18ebbdd94e..d08dca225328f 100644 --- a/llvm/test/Transforms/InstCombine/icmp-xor-signbit.ll +++ b/llvm/test/Transforms/InstCombine/icmp-xor-signbit.ll @@ -217,3 +217,107 @@ define <2 x i1> @negative_simplify_splat(<4 x i8> %x) { ret <2 x i1> %c } +define i1 @slt_zero_eq_i1(i32 %a, i1 %b) { +; CHECK-LABEL: @slt_zero_eq_i1( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[A:%.*]], -1 +; CHECK-NEXT: [[CMP21:%.*]] = xor i1 [[TMP1]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP21]] +; + %conv = zext i1 %b to i32 + %cmp1 = lshr i32 %a, 31 + %cmp2 = icmp eq i32 %conv, %cmp1 + ret i1 %cmp2 +} + +define i1 @slt_zero_eq_i1_fail(i32 %a, i1 %b) { +; CHECK-LABEL: @slt_zero_eq_i1_fail( +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[B:%.*]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = ashr i32 [[A:%.*]], 31 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[CMP1]], [[CONV]] +; CHECK-NEXT: ret i1 [[CMP2]] +; + %conv = zext i1 %b to i32 + %cmp1 = ashr i32 %a, 31 + %cmp2 = icmp eq i32 %conv, %cmp1 + ret i1 %cmp2 +} + +define i1 @slt_zero_eq_ne_0(i32 %a) { +; CHECK-LABEL: @slt_zero_eq_ne_0( +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = icmp ne i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = lshr i32 %a, 31 + %cmp2 = icmp eq i32 %conv, %cmp1 + ret i1 %cmp2 +} + +define i1 @slt_zero_ne_ne_0(i32 %a) { +; CHECK-LABEL: @slt_zero_ne_ne_0( +; CHECK-NEXT: [[CMP21:%.*]] = icmp sgt i32 [[A:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMP21]] +; + %cmp = icmp ne i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = lshr i32 %a, 31 + %cmp2 = icmp ne i32 %conv, %cmp1 + ret i1 %cmp2 +} + +define <4 x i1> @slt_zero_eq_ne_0_vec(<4 x i32> %a) { +; CHECK-LABEL: @slt_zero_eq_ne_0_vec( +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[A:%.*]], +; CHECK-NEXT: ret <4 x i1> [[TMP1]] +; + %cmp = icmp ne <4 x i32> %a, zeroinitializer + %conv = zext <4 x i1> %cmp to <4 x i32> + %cmp1 = lshr <4 x i32> %a, + %cmp2 = icmp eq <4 x i32> %conv, %cmp1 + ret <4 x i1> %cmp2 +} + +define i1 @slt_zero_ne_ne_b(i32 %a, i32 %b) { +; CHECK-LABEL: @slt_zero_ne_ne_b( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[A]], 0 +; CHECK-NEXT: [[CMP21:%.*]] = xor i1 [[TMP1]], [[CMP]] +; CHECK-NEXT: ret i1 [[CMP21]] +; + %cmp = icmp ne i32 %a, %b + %conv = zext i1 %cmp to i32 + %cmp1 = lshr i32 %a, 31 + %cmp2 = icmp ne i32 %conv, %cmp1 + ret i1 %cmp2 +} + +define i1 @slt_zero_eq_ne_0_fail1(i32 %a) { +; CHECK-LABEL: @slt_zero_eq_ne_0_fail1( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = ashr i32 [[A]], 31 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[CMP1]], [[CONV]] +; CHECK-NEXT: ret i1 [[CMP2]] +; + %cmp = icmp ne i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = ashr i32 %a, 31 + %cmp2 = icmp eq i32 %conv, %cmp1 + ret i1 %cmp2 +} + +define i1 @slt_zero_eq_ne_0_fail2(i32 %a) { +; CHECK-LABEL: @slt_zero_eq_ne_0_fail2( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = lshr i32 [[A]], 30 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[CMP1]], [[CONV]] +; CHECK-NEXT: ret i1 [[CMP2]] +; + %cmp = icmp ne i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = lshr i32 %a, 30 + %cmp2 = icmp eq i32 %conv, %cmp1 + ret i1 %cmp2 +} From ba79fb2e1ff7130cde02fbbd325f0f96f8a522ca Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 12 Oct 2023 20:13:08 -0700 Subject: [PATCH 080/720] [libc++] Re-apply "Remove UB in list, forward_list and __hash_table" This patch removes undefined behavior in list and forward_list and __hash_table caused by improperly beginning and ending the lifetime of the various node classes. It allows removing the _LIBCPP_STANDALONE_DEBUG macro from these node types since we now properly begin and end their lifetime, meaning that we won't trip up constructor homing. See https://reviews.llvm.org/D98750 for more information on what prompted this patch. This commit re-applies 0687e4d9f310, which had been reverted in b935882bdce7 because it broke the LLDB build. LLDB folks tell me I can go ahead and re-commit this now. Differential Revision: https://reviews.llvm.org/D101206 Co-authored-by: Amy Kwan --- libcxx/include/__hash_table | 119 +++++++++++++++++++++++------------ libcxx/include/__node_handle | 6 +- libcxx/include/__tree | 2 + libcxx/include/ext/hash_map | 8 +-- libcxx/include/forward_list | 73 ++++++++++++++------- libcxx/include/list | 68 +++++++++++++++----- libcxx/include/unordered_map | 12 ++-- libcxx/include/unordered_set | 4 +- 8 files changed, 200 insertions(+), 92 deletions(-) diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table index 98337abe55833..1732c82178568 100644 --- a/libcxx/include/__hash_table +++ b/libcxx/include/__hash_table @@ -21,6 +21,7 @@ #include <__memory/addressof.h> #include <__memory/allocator_traits.h> #include <__memory/compressed_pair.h> +#include <__memory/construct_at.h> #include <__memory/pointer_traits.h> #include <__memory/swap_allocator.h> #include <__memory/unique_ptr.h> @@ -45,6 +46,7 @@ #include #include #include +#include // __launder #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -107,19 +109,44 @@ struct __hash_node_base } _LIBCPP_INLINE_VISIBILITY __hash_node_base() _NOEXCEPT : __next_(nullptr) {} + _LIBCPP_HIDE_FROM_ABI explicit __hash_node_base(__next_pointer __next) _NOEXCEPT : __next_(__next) {} }; template -struct _LIBCPP_STANDALONE_DEBUG __hash_node +struct __hash_node : public __hash_node_base < __rebind_pointer_t<_VoidPtr, __hash_node<_Tp, _VoidPtr> > > { typedef _Tp __node_value_type; + using _Base = __hash_node_base<__rebind_pointer_t<_VoidPtr, __hash_node<_Tp, _VoidPtr> > >; + using __next_pointer = typename _Base::__next_pointer; size_t __hash_; - __node_value_type __value_; + + // We allow starting the lifetime of nodes without initializing the value held by the node, + // since that is handled by the hash table itself in order to be allocator-aware. +#ifndef _LIBCPP_CXX03_LANG +private: + union { + _Tp __value_; + }; + +public: + _LIBCPP_HIDE_FROM_ABI _Tp& __get_value() { return __value_; } +#else +private: + _ALIGNAS_TYPE(_Tp) char __buffer_[sizeof(_Tp)]; + +public: + _LIBCPP_HIDE_FROM_ABI _Tp& __get_value() { + return *std::__launder(reinterpret_cast<_Tp*>(&__buffer_)); + } +#endif + + _LIBCPP_HIDE_FROM_ABI explicit __hash_node(__next_pointer __next, size_t __hash) : _Base(__next), __hash_(__hash) {} + _LIBCPP_HIDE_FROM_ABI ~__hash_node() {} }; inline _LIBCPP_INLINE_VISIBILITY @@ -311,12 +338,12 @@ public: _LIBCPP_INLINE_VISIBILITY reference operator*() const { - return __node_->__upcast()->__value_; + return __node_->__upcast()->__get_value(); } _LIBCPP_INLINE_VISIBILITY pointer operator->() const { - return pointer_traits::pointer_to(__node_->__upcast()->__value_); + return pointer_traits::pointer_to(__node_->__upcast()->__get_value()); } _LIBCPP_INLINE_VISIBILITY @@ -387,11 +414,11 @@ public: _LIBCPP_INLINE_VISIBILITY reference operator*() const { - return __node_->__upcast()->__value_; + return __node_->__upcast()->__get_value(); } _LIBCPP_INLINE_VISIBILITY pointer operator->() const { - return pointer_traits::pointer_to(__node_->__upcast()->__value_); + return pointer_traits::pointer_to(__node_->__upcast()->__get_value()); } _LIBCPP_INLINE_VISIBILITY @@ -453,12 +480,12 @@ public: _LIBCPP_INLINE_VISIBILITY reference operator*() const { - return __node_->__upcast()->__value_; + return __node_->__upcast()->__get_value(); } _LIBCPP_INLINE_VISIBILITY pointer operator->() const { - return pointer_traits::pointer_to(__node_->__upcast()->__value_); + return pointer_traits::pointer_to(__node_->__upcast()->__get_value()); } _LIBCPP_INLINE_VISIBILITY @@ -543,12 +570,12 @@ public: _LIBCPP_INLINE_VISIBILITY reference operator*() const { - return __node_->__upcast()->__value_; + return __node_->__upcast()->__get_value(); } _LIBCPP_INLINE_VISIBILITY pointer operator->() const { - return pointer_traits::pointer_to(__node_->__upcast()->__value_); + return pointer_traits::pointer_to(__node_->__upcast()->__get_value()); } _LIBCPP_INLINE_VISIBILITY @@ -670,8 +697,10 @@ public: _LIBCPP_INLINE_VISIBILITY void operator()(pointer __p) _NOEXCEPT { - if (__value_constructed) - __alloc_traits::destroy(__na_, _NodeTypes::__get_ptr(__p->__value_)); + if (__value_constructed) { + __alloc_traits::destroy(__na_, _NodeTypes::__get_ptr(__p->__get_value())); + std::__destroy_at(std::addressof(*__p)); + } if (__p) __alloc_traits::deallocate(__na_, __p, 1); } @@ -1365,7 +1394,8 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__deallocate_node(__next_pointer __np) { __next_pointer __next = __np->__next_; __node_pointer __real_np = __np->__upcast(); - __node_traits::destroy(__na, _NodeTypes::__get_ptr(__real_np->__value_)); + __node_traits::destroy(__na, _NodeTypes::__get_ptr(__real_np->__get_value())); + std::__destroy_at(std::addressof(*__real_np)); __node_traits::deallocate(__na, __real_np, 1); __np = __next; } @@ -1434,8 +1464,8 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign( const_iterator __i = __u.begin(); while (__cache != nullptr && __u.size() != 0) { - __cache->__upcast()->__value_ = - _VSTD::move(__u.remove(__i++)->__value_); + __cache->__upcast()->__get_value() = + _VSTD::move(__u.remove(__i++)->__get_value()); __next_pointer __next = __cache->__next_; __node_insert_multi(__cache->__upcast()); __cache = __next; @@ -1453,7 +1483,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign( const_iterator __i = __u.begin(); while (__u.size() != 0) { - __node_holder __h = __construct_node(_NodeTypes::__move(__u.remove(__i++)->__value_)); + __node_holder __h = __construct_node(_NodeTypes::__move(__u.remove(__i++)->__get_value())); __node_insert_multi(__h.get()); __h.release(); } @@ -1495,7 +1525,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_unique(_InputIterator __first #endif // _LIBCPP_HAS_NO_EXCEPTIONS for (; __cache != nullptr && __first != __last; ++__first) { - __cache->__upcast()->__value_ = *__first; + __cache->__upcast()->__get_value() = *__first; __next_pointer __next = __cache->__next_; __node_insert_unique(__cache->__upcast()); __cache = __next; @@ -1535,7 +1565,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_multi(_InputIterator __first, #endif // _LIBCPP_HAS_NO_EXCEPTIONS for (; __cache != nullptr && __first != __last; ++__first) { - __cache->__upcast()->__value_ = *__first; + __cache->__upcast()->__get_value() = *__first; __next_pointer __next = __cache->__next_; __node_insert_multi(__cache->__upcast()); __cache = __next; @@ -1629,7 +1659,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique_prepare( __ndptr = __ndptr->__next_) { if ((__ndptr->__hash() == __hash) && - key_eq()(__ndptr->__upcast()->__value_, __value)) + key_eq()(__ndptr->__upcast()->__get_value(), __value)) return __ndptr; } } @@ -1678,9 +1708,9 @@ template pair::iterator, bool> __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique(__node_pointer __nd) { - __nd->__hash_ = hash_function()(__nd->__value_); + __nd->__hash_ = hash_function()(__nd->__get_value()); __next_pointer __existing_node = - __node_insert_unique_prepare(__nd->__hash(), __nd->__value_); + __node_insert_unique_prepare(__nd->__hash(), __nd->__get_value()); // Insert the node, unless it already exists in the container. bool __inserted = false; @@ -1726,7 +1756,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi_prepare( // false true set __found to true // true false break if (__found != (__pn->__next_->__hash() == __cp_hash && - key_eq()(__pn->__next_->__upcast()->__value_, __cp_val))) + key_eq()(__pn->__next_->__upcast()->__get_value(), __cp_val))) { if (!__found) __found = true; @@ -1780,8 +1810,8 @@ template typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi(__node_pointer __cp) { - __cp->__hash_ = hash_function()(__cp->__value_); - __next_pointer __pn = __node_insert_multi_prepare(__cp->__hash(), __cp->__value_); + __cp->__hash_ = hash_function()(__cp->__get_value()); + __next_pointer __pn = __node_insert_multi_prepare(__cp->__hash(), __cp->__get_value()); __node_insert_multi_perform(__cp, __pn); return iterator(__cp->__ptr()); @@ -1792,7 +1822,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi( const_iterator __p, __node_pointer __cp) { - if (__p != end() && key_eq()(*__p, __cp->__value_)) + if (__p != end() && key_eq()(*__p, __cp->__get_value())) { __next_pointer __np = __p.__node_; __cp->__hash_ = __np->__hash(); @@ -1839,7 +1869,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_unique_key_args(_Key const& __nd = __nd->__next_) { if ((__nd->__hash() == __hash) && - key_eq()(__nd->__upcast()->__value_, __k)) + key_eq()(__nd->__upcast()->__get_value(), __k)) goto __done; } } @@ -1983,9 +2013,9 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_handle_merge_unique( __it != __source.end();) { __node_pointer __src_ptr = __it.__node_->__upcast(); - size_t __hash = hash_function()(__src_ptr->__value_); + size_t __hash = hash_function()(__src_ptr->__get_value()); __next_pointer __existing_node = - __node_insert_unique_prepare(__hash, __src_ptr->__value_); + __node_insert_unique_prepare(__hash, __src_ptr->__get_value()); auto __prev_iter = __it++; if (__existing_node == nullptr) { @@ -2037,9 +2067,9 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_handle_merge_multi( __it != __source.end();) { __node_pointer __src_ptr = __it.__node_->__upcast(); - size_t __src_hash = hash_function()(__src_ptr->__value_); + size_t __src_hash = hash_function()(__src_ptr->__get_value()); __next_pointer __pn = - __node_insert_multi_prepare(__src_hash, __src_ptr->__value_); + __node_insert_multi_prepare(__src_hash, __src_ptr->__get_value()); (void)__source.remove(__it++).release(); __src_ptr->__hash_ = __src_hash; __node_insert_multi_perform(__src_ptr, __pn); @@ -2113,8 +2143,8 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __nbc) if _LIBCPP_CONSTEXPR_SINCE_CXX17 (!_UniqueKeys) { for (; __np->__next_ != nullptr && - key_eq()(__cp->__upcast()->__value_, - __np->__next_->__upcast()->__value_); + key_eq()(__cp->__upcast()->__get_value(), + __np->__next_->__upcast()->__get_value()); __np = __np->__next_) ; } @@ -2148,7 +2178,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::find(const _Key& __k) __nd = __nd->__next_) { if ((__nd->__hash() == __hash) - && key_eq()(__nd->__upcast()->__value_, __k)) + && key_eq()(__nd->__upcast()->__get_value(), __k)) return iterator(__nd); } } @@ -2175,7 +2205,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::find(const _Key& __k) const __nd = __nd->__next_) { if ((__nd->__hash() == __hash) - && key_eq()(__nd->__upcast()->__value_, __k)) + && key_eq()(__nd->__upcast()->__get_value(), __k)) return const_iterator(__nd); } } @@ -2193,10 +2223,20 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__construct_node(_Args&& ...__args) "Construct cannot be called with a hash value type"); __node_allocator& __na = __node_alloc(); __node_holder __h(__node_traits::allocate(__na, 1), _Dp(__na)); - __node_traits::construct(__na, _NodeTypes::__get_ptr(__h->__value_), _VSTD::forward<_Args>(__args)...); + + // Begin the lifetime of the node itself. Note that this doesn't begin the lifetime of the value + // held inside the node, since we need to use the allocator's construct() method for that. + // + // We don't use the allocator's construct() method to construct the node itself since the + // Cpp17FooInsertable named requirements don't require the allocator's construct() method + // to work on anything other than the value_type. + std::__construct_at(std::addressof(*__h), /* next = */nullptr, /* hash = */0); + + // Now construct the value_type using the allocator's construct() method. + __node_traits::construct(__na, _NodeTypes::__get_ptr(__h->__get_value()), _VSTD::forward<_Args>(__args)...); __h.get_deleter().__value_constructed = true; - __h->__hash_ = hash_function()(__h->__value_); - __h->__next_ = nullptr; + + __h->__hash_ = hash_function()(__h->__get_value()); return __h; } @@ -2210,12 +2250,11 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__construct_node_hash( "Construct cannot be called with a hash value type"); __node_allocator& __na = __node_alloc(); __node_holder __h(__node_traits::allocate(__na, 1), _Dp(__na)); - __node_traits::construct(__na, _NodeTypes::__get_ptr(__h->__value_), + std::__construct_at(std::addressof(*__h), /* next = */nullptr, /* hash = */__hash); + __node_traits::construct(__na, _NodeTypes::__get_ptr(__h->__get_value()), _VSTD::forward<_First>(__f), _VSTD::forward<_Rest>(__rest)...); __h.get_deleter().__value_constructed = true; - __h->__hash_ = __hash; - __h->__next_ = nullptr; return __h; } diff --git a/libcxx/include/__node_handle b/libcxx/include/__node_handle index cc4eaf73c0bbe..b3cc3619dd5ad 100644 --- a/libcxx/include/__node_handle +++ b/libcxx/include/__node_handle @@ -209,7 +209,7 @@ struct __set_node_handle_specifics _LIBCPP_INLINE_VISIBILITY value_type& value() const { - return static_cast<_Derived const*>(this)->__ptr_->__value_; + return static_cast<_Derived const*>(this)->__ptr_->__get_value(); } }; @@ -223,14 +223,14 @@ struct __map_node_handle_specifics key_type& key() const { return static_cast<_Derived const*>(this)-> - __ptr_->__value_.__ref().first; + __ptr_->__get_value().__ref().first; } _LIBCPP_INLINE_VISIBILITY mapped_type& mapped() const { return static_cast<_Derived const*>(this)-> - __ptr_->__value_.__ref().second; + __ptr_->__get_value().__ref().second; } }; diff --git a/libcxx/include/__tree b/libcxx/include/__tree index 54ce71e442d03..eccadea8a0139 100644 --- a/libcxx/include/__tree +++ b/libcxx/include/__tree @@ -774,6 +774,8 @@ public: __node_value_type __value_; + _LIBCPP_HIDE_FROM_ABI _Tp& __get_value() { return __value_; } + private: ~__tree_node() = delete; __tree_node(__tree_node const&) = delete; diff --git a/libcxx/include/ext/hash_map b/libcxx/include/ext/hash_map index 116b6a72f2c12..de963675eb793 100644 --- a/libcxx/include/ext/hash_map +++ b/libcxx/include/ext/hash_map @@ -357,9 +357,9 @@ public: void operator()(pointer __p) { if (__second_constructed) - __alloc_traits::destroy(__na_, _VSTD::addressof(__p->__value_.second)); + __alloc_traits::destroy(__na_, _VSTD::addressof(__p->__get_value().second)); if (__first_constructed) - __alloc_traits::destroy(__na_, _VSTD::addressof(__p->__value_.first)); + __alloc_traits::destroy(__na_, _VSTD::addressof(__p->__get_value().first)); if (__p) __alloc_traits::deallocate(__na_, __p, 1); } @@ -667,9 +667,9 @@ hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::__construct_node(const key_type& __k) { __node_allocator& __na = __table_.__node_alloc(); __node_holder __h(__node_traits::allocate(__na, 1), _Dp(__na)); - __node_traits::construct(__na, _VSTD::addressof(__h->__value_.first), __k); + __node_traits::construct(__na, _VSTD::addressof(__h->__get_value().first), __k); __h.get_deleter().__first_constructed = true; - __node_traits::construct(__na, _VSTD::addressof(__h->__value_.second)); + __node_traits::construct(__na, _VSTD::addressof(__h->__get_value().second)); __h.get_deleter().__second_constructed = true; return __h; } diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list index 75ac685cc0283..09338ab695713 100644 --- a/libcxx/include/forward_list +++ b/libcxx/include/forward_list @@ -211,6 +211,7 @@ template #include <__memory/allocator.h> #include <__memory/allocator_traits.h> #include <__memory/compressed_pair.h> +#include <__memory/construct_at.h> #include <__memory/pointer_traits.h> #include <__memory/swap_allocator.h> #include <__memory_resource/polymorphic_allocator.h> @@ -230,6 +231,7 @@ template #include <__utility/forward.h> #include <__utility/move.h> #include +#include // __launder #include // standard-mandated includes @@ -318,17 +320,35 @@ template using __begin_node_of = __forward_begin_node<__rebind_pointer_t<_VoidPtr, __forward_list_node<_Tp, _VoidPtr> > >; template -struct _LIBCPP_STANDALONE_DEBUG __forward_list_node +struct __forward_list_node : public __begin_node_of<_Tp, _VoidPtr> { typedef _Tp value_type; typedef __begin_node_of<_Tp, _VoidPtr> _Base; typedef typename _Base::pointer _NodePtr; - value_type __value_; + // We allow starting the lifetime of nodes without initializing the value held by the node, + // since that is handled by the list itself in order to be allocator-aware. +#ifndef _LIBCPP_CXX03_LANG +private: + union { + _Tp __value_; + }; + +public: + _LIBCPP_HIDE_FROM_ABI _Tp& __get_value() { return __value_; } +#else +private: + _ALIGNAS_TYPE(_Tp) char __buffer_[sizeof(_Tp)]; + +public: + _LIBCPP_HIDE_FROM_ABI _Tp& __get_value() { + return *std::__launder(reinterpret_cast<_Tp*>(&__buffer_)); + } +#endif - _LIBCPP_HIDE_FROM_ABI __forward_list_node() = default; - _LIBCPP_HIDE_FROM_ABI __forward_list_node(const value_type& __v, _NodePtr __next) : _Base(__next), __value_(__v) {} + _LIBCPP_HIDE_FROM_ABI explicit __forward_list_node(_NodePtr __next) : _Base(__next) {} + _LIBCPP_HIDE_FROM_ABI ~__forward_list_node() {} }; @@ -383,10 +403,10 @@ public: __forward_list_iterator() _NOEXCEPT : __ptr_(nullptr) {} _LIBCPP_INLINE_VISIBILITY - reference operator*() const {return __get_unsafe_node_pointer()->__value_;} + reference operator*() const {return __get_unsafe_node_pointer()->__get_value();} _LIBCPP_INLINE_VISIBILITY pointer operator->() const { - return pointer_traits::pointer_to(__get_unsafe_node_pointer()->__value_); + return pointer_traits::pointer_to(__get_unsafe_node_pointer()->__get_value()); } _LIBCPP_INLINE_VISIBILITY @@ -468,10 +488,10 @@ public: : __ptr_(__p.__ptr_) {} _LIBCPP_INLINE_VISIBILITY - reference operator*() const {return __get_unsafe_node_pointer()->__value_;} + reference operator*() const {return __get_unsafe_node_pointer()->__get_value();} _LIBCPP_INLINE_VISIBILITY pointer operator->() const {return pointer_traits::pointer_to( - __get_unsafe_node_pointer()->__value_);} + __get_unsafe_node_pointer()->__get_value());} _LIBCPP_INLINE_VISIBILITY __forward_list_const_iterator& operator++() @@ -577,15 +597,26 @@ protected: _LIBCPP_HIDE_FROM_ABI __node_pointer __create_node(__node_pointer __next, _Args&& ...__args) { __node_allocator& __a = __alloc(); __allocation_guard<__node_allocator> __guard(__a, 1); - __guard.__get()->__next_ = __next; - __node_traits::construct(__a, std::addressof(__guard.__get()->__value_), std::forward<_Args>(__args)...); + // Begin the lifetime of the node itself. Note that this doesn't begin the lifetime of the value + // held inside the node, since we need to use the allocator's construct() method for that. + // + // We don't use the allocator's construct() method to construct the node itself since the + // Cpp17FooInsertable named requirements don't require the allocator's construct() method + // to work on anything other than the value_type. + std::__construct_at(std::addressof(*__guard.__get()), __next); + + // Now construct the value_type using the allocator's construct() method. + __node_traits::construct(__a, std::addressof(__guard.__get()->__get_value()), std::forward<_Args>(__args)...); return __guard.__release_ptr(); } template _LIBCPP_HIDE_FROM_ABI void __delete_node(__node_pointer __node) { + // For the same reason as above, we use the allocator's destroy() method for the value_type, + // but not for the node itself. __node_allocator& __a = __alloc(); - __node_traits::destroy(__a, std::addressof(__node->__value_)); + __node_traits::destroy(__a, std::addressof(__node->__get_value())); + std::__destroy_at(std::addressof(*__node)); __node_traits::deallocate(__a, __node, 1); } @@ -847,9 +878,9 @@ public: } _LIBCPP_INLINE_VISIBILITY - reference front() {return base::__before_begin()->__next_->__value_;} + reference front() {return base::__before_begin()->__next_->__get_value();} _LIBCPP_INLINE_VISIBILITY - const_reference front() const {return base::__before_begin()->__next_->__value_;} + const_reference front() const {return base::__before_begin()->__next_->__get_value();} #ifndef _LIBCPP_CXX03_LANG #if _LIBCPP_STD_VER >= 17 @@ -1227,7 +1258,7 @@ forward_list<_Tp, _Alloc>::emplace_front(_Args&&... __args) { base::__before_begin()->__next_ = this->__create_node(/* next = */base::__before_begin()->__next_, std::forward<_Args>(__args)...); #if _LIBCPP_STD_VER >= 17 - return base::__before_begin()->__next_->__value_; + return base::__before_begin()->__next_->__get_value(); #endif } @@ -1556,7 +1587,7 @@ forward_list<_Tp, _Alloc>::remove(const value_type& __v) const iterator __e = end(); for (iterator __i = before_begin(); __i.__get_begin()->__next_ != nullptr;) { - if (__i.__get_begin()->__next_->__value_ == __v) + if (__i.__get_begin()->__next_->__get_value() == __v) { ++__count_removed; iterator __j = _VSTD::next(__i, 2); @@ -1584,7 +1615,7 @@ forward_list<_Tp, _Alloc>::remove_if(_Predicate __pred) const iterator __e = end(); for (iterator __i = before_begin(); __i.__get_begin()->__next_ != nullptr;) { - if (__pred(__i.__get_begin()->__next_->__value_)) + if (__pred(__i.__get_begin()->__next_->__get_value())) { ++__count_removed; iterator __j = _VSTD::next(__i, 2); @@ -1647,11 +1678,11 @@ forward_list<_Tp, _Alloc>::__merge(__node_pointer __f1, __node_pointer __f2, if (__f2 == nullptr) return __f1; __node_pointer __r; - if (__comp(__f2->__value_, __f1->__value_)) + if (__comp(__f2->__get_value(), __f1->__get_value())) { __node_pointer __t = __f2; while (__t->__next_ != nullptr && - __comp(__t->__next_->__value_, __f1->__value_)) + __comp(__t->__next_->__get_value(), __f1->__get_value())) __t = __t->__next_; __r = __f2; __f2 = __t->__next_; @@ -1663,11 +1694,11 @@ forward_list<_Tp, _Alloc>::__merge(__node_pointer __f1, __node_pointer __f2, __f1 = __f1->__next_; while (__f1 != nullptr && __f2 != nullptr) { - if (__comp(__f2->__value_, __f1->__value_)) + if (__comp(__f2->__get_value(), __f1->__get_value())) { __node_pointer __t = __f2; while (__t->__next_ != nullptr && - __comp(__t->__next_->__value_, __f1->__value_)) + __comp(__t->__next_->__get_value(), __f1->__get_value())) __t = __t->__next_; __p->__next_ = __f2; __f2 = __t->__next_; @@ -1703,7 +1734,7 @@ forward_list<_Tp, _Alloc>::__sort(__node_pointer __f1, difference_type __sz, case 1: return __f1; case 2: - if (__comp(__f1->__next_->__value_, __f1->__value_)) + if (__comp(__f1->__next_->__get_value(), __f1->__get_value())) { __node_pointer __t = __f1->__next_; __t->__next_ = __f1; diff --git a/libcxx/include/list b/libcxx/include/list index b02599bc3fe7c..e5b524b8835a1 100644 --- a/libcxx/include/list +++ b/libcxx/include/list @@ -217,6 +217,7 @@ template #include <__memory/allocator.h> #include <__memory/allocator_traits.h> #include <__memory/compressed_pair.h> +#include <__memory/construct_at.h> #include <__memory/pointer_traits.h> #include <__memory/swap_allocator.h> #include <__memory_resource/polymorphic_allocator.h> @@ -237,6 +238,7 @@ template #include <__utility/swap.h> #include #include +#include // __launder #include // standard-mandated includes @@ -308,6 +310,9 @@ struct __list_node_base __list_node_base() : __prev_(_NodeTraits::__unsafe_link_pointer_cast(__self())), __next_(_NodeTraits::__unsafe_link_pointer_cast(__self())) {} + _LIBCPP_HIDE_FROM_ABI explicit __list_node_base(__link_pointer __prev, __link_pointer __next) + : __prev_(__prev), __next_(__next) {} + _LIBCPP_INLINE_VISIBILITY __base_pointer __self() { return pointer_traits<__base_pointer>::pointer_to(*this); @@ -320,14 +325,35 @@ struct __list_node_base }; template -struct _LIBCPP_STANDALONE_DEBUG __list_node +struct __list_node : public __list_node_base<_Tp, _VoidPtr> { - _Tp __value_; + // We allow starting the lifetime of nodes without initializing the value held by the node, + // since that is handled by the list itself in order to be allocator-aware. +#ifndef _LIBCPP_CXX03_LANG +private: + union { + _Tp __value_; + }; + +public: + _LIBCPP_HIDE_FROM_ABI _Tp& __get_value() { return __value_; } +#else +private: + _ALIGNAS_TYPE(_Tp) char __buffer_[sizeof(_Tp)]; + +public: + _LIBCPP_HIDE_FROM_ABI _Tp& __get_value() { + return *std::__launder(reinterpret_cast<_Tp*>(&__buffer_)); + } +#endif typedef __list_node_base<_Tp, _VoidPtr> __base; typedef typename __base::__link_pointer __link_pointer; + _LIBCPP_HIDE_FROM_ABI explicit __list_node(__link_pointer __prev, __link_pointer __next) : __base(__prev, __next) {} + _LIBCPP_HIDE_FROM_ABI ~__list_node() {} + _LIBCPP_INLINE_VISIBILITY __link_pointer __as_link() { return static_cast<__link_pointer>(__base::__self()); @@ -370,12 +396,12 @@ public: _LIBCPP_INLINE_VISIBILITY reference operator*() const { - return __ptr_->__as_node()->__value_; + return __ptr_->__as_node()->__get_value(); } _LIBCPP_INLINE_VISIBILITY pointer operator->() const { - return pointer_traits::pointer_to(__ptr_->__as_node()->__value_); + return pointer_traits::pointer_to(__ptr_->__as_node()->__get_value()); } _LIBCPP_INLINE_VISIBILITY @@ -442,12 +468,12 @@ public: _LIBCPP_INLINE_VISIBILITY reference operator*() const { - return __ptr_->__as_node()->__value_; + return __ptr_->__as_node()->__get_value(); } _LIBCPP_INLINE_VISIBILITY pointer operator->() const { - return pointer_traits::pointer_to(__ptr_->__as_node()->__value_); + return pointer_traits::pointer_to(__ptr_->__as_node()->__get_value()); } _LIBCPP_INLINE_VISIBILITY @@ -600,16 +626,26 @@ protected: _LIBCPP_HIDE_FROM_ABI __node_pointer __create_node(__link_pointer __prev, __link_pointer __next, _Args&& ...__args) { __node_allocator& __alloc = __node_alloc(); __allocation_guard<__node_allocator> __guard(__alloc, 1); - __guard.__get()->__prev_ = __prev; - __guard.__get()->__next_ = __next; - __node_alloc_traits::construct(__alloc, std::addressof(__guard.__get()->__value_), std::forward<_Args>(__args)...); + // Begin the lifetime of the node itself. Note that this doesn't begin the lifetime of the value + // held inside the node, since we need to use the allocator's construct() method for that. + // + // We don't use the allocator's construct() method to construct the node itself since the + // Cpp17FooInsertable named requirements don't require the allocator's construct() method + // to work on anything other than the value_type. + std::__construct_at(std::addressof(*__guard.__get()), __prev, __next); + + // Now construct the value_type using the allocator's construct() method. + __node_alloc_traits::construct(__alloc, std::addressof(__guard.__get()->__get_value()), std::forward<_Args>(__args)...); return __guard.__release_ptr(); } template _LIBCPP_HIDE_FROM_ABI void __delete_node(__node_pointer __node) { + // For the same reason as above, we use the allocator's destroy() method for the value_type, + // but not for the node itself. __node_allocator& __alloc = __node_alloc(); - __node_alloc_traits::destroy(__alloc, std::addressof(__node->__value_)); + __node_alloc_traits::destroy(__alloc, std::addressof(__node->__get_value())); + std::__destroy_at(std::addressof(*__node)); __node_alloc_traits::deallocate(__alloc, __node, 1); } @@ -894,25 +930,25 @@ public: reference front() { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "list::front called on empty list"); - return base::__end_.__next_->__as_node()->__value_; + return base::__end_.__next_->__as_node()->__get_value(); } _LIBCPP_INLINE_VISIBILITY const_reference front() const { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "list::front called on empty list"); - return base::__end_.__next_->__as_node()->__value_; + return base::__end_.__next_->__as_node()->__get_value(); } _LIBCPP_INLINE_VISIBILITY reference back() { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "list::back called on empty list"); - return base::__end_.__prev_->__as_node()->__value_; + return base::__end_.__prev_->__as_node()->__get_value(); } _LIBCPP_INLINE_VISIBILITY const_reference back() const { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "list::back called on empty list"); - return base::__end_.__prev_->__as_node()->__value_; + return base::__end_.__prev_->__as_node()->__get_value(); } #ifndef _LIBCPP_CXX03_LANG @@ -1502,7 +1538,7 @@ list<_Tp, _Alloc>::emplace_front(_Args&&... __args) __link_nodes_at_front(__nl, __nl); ++base::__sz(); #if _LIBCPP_STD_VER >= 17 - return __node->__value_; + return __node->__get_value(); #endif } @@ -1520,7 +1556,7 @@ list<_Tp, _Alloc>::emplace_back(_Args&&... __args) __link_nodes_at_back(__nl, __nl); ++base::__sz(); #if _LIBCPP_STD_VER >= 17 - return __node->__value_; + return __node->__get_value(); #endif } diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map index 8d83063bbeaeb..e5c58feee55d4 100644 --- a/libcxx/include/unordered_map +++ b/libcxx/include/unordered_map @@ -874,9 +874,9 @@ public: void operator()(pointer __p) _NOEXCEPT { if (__second_constructed) - __alloc_traits::destroy(__na_, _VSTD::addressof(__p->__value_.__get_value().second)); + __alloc_traits::destroy(__na_, _VSTD::addressof(__p->__get_value().__get_value().second)); if (__first_constructed) - __alloc_traits::destroy(__na_, _VSTD::addressof(__p->__value_.__get_value().first)); + __alloc_traits::destroy(__na_, _VSTD::addressof(__p->__get_value().__get_value().first)); if (__p) __alloc_traits::deallocate(__na_, __p, 1); } @@ -1828,7 +1828,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( iterator __i = __u.begin(); while (__u.size() != 0) { __table_.__emplace_unique( - __u.__table_.remove((__i++).__i_)->__value_.__move()); + __u.__table_.remove((__i++).__i_)->__get_value().__move()); } } } @@ -1920,9 +1920,9 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::__construct_node_with_key(const { __node_allocator& __na = __table_.__node_alloc(); __node_holder __h(__node_traits::allocate(__na, 1), _Dp(__na)); - __node_traits::construct(__na, _VSTD::addressof(__h->__value_.__get_value().first), __k); + __node_traits::construct(__na, _VSTD::addressof(__h->__get_value().__get_value().first), __k); __h.get_deleter().__first_constructed = true; - __node_traits::construct(__na, _VSTD::addressof(__h->__value_.__get_value().second)); + __node_traits::construct(__na, _VSTD::addressof(__h->__get_value().__get_value().second)); __h.get_deleter().__second_constructed = true; return __h; } @@ -2653,7 +2653,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( while (__u.size() != 0) { __table_.__insert_multi( - __u.__table_.remove((__i++).__i_)->__value_.__move()); + __u.__table_.remove((__i++).__i_)->__get_value().__move()); } } } diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set index 5e47f12446ff9..f1b4104df4f68 100644 --- a/libcxx/include/unordered_set +++ b/libcxx/include/unordered_set @@ -1150,7 +1150,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( { iterator __i = __u.begin(); while (__u.size() != 0) - __table_.__insert_unique(_VSTD::move(__u.__table_.remove(__i++)->__value_)); + __table_.__insert_unique(_VSTD::move(__u.__table_.remove(__i++)->__get_value())); } } @@ -1835,7 +1835,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( { iterator __i = __u.begin(); while (__u.size() != 0) - __table_.__insert_multi(_VSTD::move(__u.__table_.remove(__i++)->__value_)); + __table_.__insert_multi(_VSTD::move(__u.__table_.remove(__i++)->__get_value())); } } From 74c5e474043daa7900686d0a210b8e03cebf9472 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Fri, 13 Oct 2023 16:26:51 +0100 Subject: [PATCH 081/720] [lldb][test] Temporarily disable TestQueueFromStdModule.py (#68970) Started failing since D101206, but root-cause is unclear. It's definitely not an issue with th libc++ patch itself however. So disable the test until we know what's going on. --- .../import-std-module/queue/TestQueueFromStdModule.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lldb/test/API/commands/expression/import-std-module/queue/TestQueueFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/queue/TestQueueFromStdModule.py index 84e8e3cfb86d6..b08a53855e1db 100644 --- a/lldb/test/API/commands/expression/import-std-module/queue/TestQueueFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/queue/TestQueueFromStdModule.py @@ -10,6 +10,11 @@ class TestQueue(TestBase): @add_test_categories(["libc++"]) @skipIf(compiler=no_match("clang")) + @skipIf( + compiler="clang", + compiler_version=[">", "16.0"], + bugnumber="https://github.com/llvm/llvm-project/issues/68968", + ) def test(self): self.build() From 2c9ddfc7852ed88dd88bb38e9518404a623c70b5 Mon Sep 17 00:00:00 2001 From: fabrizio-indirli Date: Fri, 13 Oct 2023 16:42:39 +0100 Subject: [PATCH 082/720] [mlir][Tosa] fix fp16/bf16 support for AvgPool2d (#68718) Currently, the AvgPool2d operation in the TOSA MLIR dialect does not accept half-precision Fp16 and Bf16 tensors, conversely to what stated in the [TOSA specification](https://www.mlplatform.org/tosa/tosa_spec.html#_avg_pool2d). This issue was previously raised: #63424 here on Github and it is due to a bug in the AvgPool2d verifier. This patch fixes the AvgPool2d verifier to accept fp16 & bf16 datatype for input/output tensors and accumulator, and it adds related LIT test cases in Tosa/ops.mlir. --- mlir/lib/Dialect/Tosa/IR/TosaOps.cpp | 18 ++++++++++-------- mlir/test/Dialect/Tosa/ops.mlir | 14 ++++++++++++++ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index a719171b2b359..6db04fe38bcd3 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -247,18 +247,20 @@ LogicalResult tosa::AvgPool2dOp::verify() { if (llvm::isa(inputETy) && !accType.isInteger(32)) return emitOpError("accumulator type for integer tensor is not i32"); - if ((inputETy.isBF16() || inputETy.isF16()) && - !(accType.isF16() || accType.isF32())) - return emitOpError("accumulator type for f16/bf16 tensor is not f16/f32"); + if (inputETy.isF16() && !(accType.isF16() || accType.isF32())) + return emitOpError("accumulator type for f16 tensor is not f16/f32"); + + if (inputETy.isBF16() && !accType.isF32()) + return emitOpError("accumulator type for bf16 tensor is not f32"); if (inputETy.isF32() && !accType.isF32()) return emitOpError("accumulator type for f32 tensor is not f32"); - if (inputETy.isF32() && resultETy.isF32()) - return success(); - if (inputETy.isInteger(8) && resultETy.isInteger(8)) - return success(); - if (inputETy.isInteger(16) && resultETy.isInteger(16)) + if ((inputETy.isF32() && resultETy.isF32()) || + (inputETy.isF16() && resultETy.isF16()) || + (inputETy.isBF16() && resultETy.isBF16()) || + (inputETy.isInteger(8) && resultETy.isInteger(8)) || + (inputETy.isInteger(16) && resultETy.isInteger(16))) return success(); return emitOpError("input/output element types are incompatible."); diff --git a/mlir/test/Dialect/Tosa/ops.mlir b/mlir/test/Dialect/Tosa/ops.mlir index 7d7f2d31a4244..e62bea515d06b 100644 --- a/mlir/test/Dialect/Tosa/ops.mlir +++ b/mlir/test/Dialect/Tosa/ops.mlir @@ -16,6 +16,20 @@ func.func @test_avg_pool2d_f32(%arg0: tensor<1x7x7x9xf32>) -> tensor<1x7x7x9xf32 return %0 : tensor<1x7x7x9xf32> } +// ----- +// CHECK-LABEL: avg_pool2d_f16 +func.func @test_avg_pool2d_f16(%arg0: tensor<1x7x7x9xf16>) -> tensor<1x7x7x9xf16> { + %0 = tosa.avg_pool2d %arg0 {acc_type = f16, kernel = array, pad = array, stride = array} : (tensor<1x7x7x9xf16>) -> tensor<1x7x7x9xf16> + return %0 : tensor<1x7x7x9xf16> +} + +// ----- +// CHECK-LABEL: avg_pool2d_f16_accumf32 +func.func @test_avg_pool2d_f16_accumf32(%arg0: tensor<1x7x7x9xf16>) -> tensor<1x7x7x9xf16> { + %0 = tosa.avg_pool2d %arg0 {acc_type = f32, kernel = array, pad = array, stride = array} : (tensor<1x7x7x9xf16>) -> tensor<1x7x7x9xf16> + return %0 : tensor<1x7x7x9xf16> +} + // ----- // CHECK-LABEL: avg_pool2d_i8 func.func @test_avg_pool2d_i8(%arg0: tensor<1x7x7x9xi8>) -> tensor<1x7x7x9xi8> { From 7493d45408c3469568ff4b23ae71c435384a830d Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Fri, 13 Oct 2023 16:44:11 +0100 Subject: [PATCH 083/720] [lldb][DataFormatter] unordered_map: account for new libc++ __hash_node layout (#68574) Since D101206 (`ba79fb2e1ff7130cde02fbbd325f0f96f8a522ca`) the `__hash_node::__value_` member is wrapped in an anonymous union. `ValueObject::GetChildMemberWithName` doesn't see through the union. This patch accounts for this possible new layout by getting a handle to the union before doing the by-name `__value_` lookup. --- .../Language/CPlusPlus/LibCxxUnorderedMap.cpp | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp index 14776cdf80815..2e8da396a4a7b 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp @@ -162,10 +162,27 @@ lldb::ValueObjectSP lldb_private::formatters:: if (!node_sp || error.Fail()) return nullptr; - value_sp = node_sp->GetChildMemberWithName("__value_"); hash_sp = node_sp->GetChildMemberWithName("__hash_"); - if (!value_sp || !hash_sp) + if (!hash_sp) return nullptr; + + value_sp = node_sp->GetChildMemberWithName("__value_"); + if (!value_sp) { + // clang-format off + // Since D101206 (ba79fb2e1f), libc++ wraps the `__value_` in an + // anonymous union. + // Child 0: __hash_node_base base class + // Child 1: __hash_ + // Child 2: anonymous union + // clang-format on + auto anon_union_sp = node_sp->GetChildAtIndex(2); + if (!anon_union_sp) + return nullptr; + + value_sp = anon_union_sp->GetChildMemberWithName("__value_"); + if (!value_sp) + return nullptr; + } } m_elements_cache.push_back( {value_sp.get(), hash_sp->GetValueAsUnsigned(0)}); From 160e8eb4496104a1d0ed77649af7e8bb679252f9 Mon Sep 17 00:00:00 2001 From: nicole mazzuca Date: Fri, 13 Oct 2023 08:47:23 -0700 Subject: [PATCH 084/720] [ASan] Recognize lea r10, [rip + XX] (#68910) This instruction is present in memcpy in the latest vcruntime This PR has been opened for @AndrewDeanMS (a teammate inside Microsoft) who made the PR to our internal branch. Co-authored-by: Andrew Dean --- compiler-rt/lib/interception/interception_win.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp index d57afa3fda7bc..1b681ada37b17 100644 --- a/compiler-rt/lib/interception/interception_win.cpp +++ b/compiler-rt/lib/interception/interception_win.cpp @@ -624,7 +624,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { // mov rax, QWORD PTR [rip + XXXXXXXX] case 0x25ff48: // 48 ff 25 XX XX XX XX : // rex.W jmp QWORD PTR [rip + XXXXXXXX] - + case 0x158D4C: // 4c 8d 15 XX XX XX XX : lea r10, [rip + XX] // Instructions having offset relative to 'rip' need offset adjustment. if (rel_offset) *rel_offset = 3; From 20f39bf48218515e05126d02f26cec73ac655b0f Mon Sep 17 00:00:00 2001 From: Eric Date: Fri, 13 Oct 2023 11:56:24 -0400 Subject: [PATCH 085/720] Lower std::string's alignment requirement from 16 to 8. (#68807) This allows smaller allocations to occur, closer to the actual std::string's required size. This is particularly effective in decreasing the allocation size upon initial construction (where __recommend is called to determine the size). Although the memory savings per-string are never more than 8 bytes per string initially, this quickly adds up. And has lead to not insigficant memory savings at Google. Unfortunately, this change is ABI breaking because it changes the value returned by max_size. So it has to be guarded. --- libcxx/docs/ReleaseNotes/18.rst | 7 +++ libcxx/include/__config | 5 +++ libcxx/include/string | 9 +++- .../string.capacity/allocation_size.pass.cpp | 45 +++++++++++++++++++ .../string.capacity/max_size.pass.cpp | 8 +++- 5 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 libcxx/test/libcxx/strings/basic.string/string.capacity/allocation_size.pass.cpp diff --git a/libcxx/docs/ReleaseNotes/18.rst b/libcxx/docs/ReleaseNotes/18.rst index 5f43d2f2afe22..ac78563aa7384 100644 --- a/libcxx/docs/ReleaseNotes/18.rst +++ b/libcxx/docs/ReleaseNotes/18.rst @@ -133,6 +133,13 @@ ABI Affecting Changes results in an ABI break, however in practice we expect uses of ``std::projected`` in ABI-sensitive places to be extremely rare. Any error resulting from this change should result in a link-time error. +- Under the unstable ABI, the internal alignment requirements for heap allocations + inside ``std::string`` has decreased from 16 to 8 This save memory since string requests fewer additional + bytes than it did previously. However, this also changes the return value of ``std::string::max_size`` + and can cause code compiled against older libc++ versions but linked at runtime to a new version + to throw a different exception when attempting allocations that are too large + (``std::bad_alloc`` vs ``std::length_error``). + Build System Changes -------------------- diff --git a/libcxx/include/__config b/libcxx/include/__config index 55d9f1c737652..65ce6d6a27f83 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -167,6 +167,11 @@ // The implementation moved to the header, but we still export the symbols from // the dylib for backwards compatibility. # define _LIBCPP_ABI_DO_NOT_EXPORT_TO_CHARS_BASE_10 +// Save memory by providing the allocator more freedom to allocate the most +// efficient size class by dropping the alignment requirements for std::string's +// pointer from 16 to 8. This changes the output of std::string::max_size, +// which makes it ABI breaking +# define _LIBCPP_ABI_STRING_8_BYTE_ALIGNMENT # elif _LIBCPP_ABI_VERSION == 1 # if !(defined(_LIBCPP_OBJECT_FORMAT_COFF) || defined(_LIBCPP_OBJECT_FORMAT_XCOFF)) // Enable compiling copies of now inline methods into the dylib to support diff --git a/libcxx/include/string b/libcxx/include/string index 33e87406a1156..3078715e02b35 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -1851,7 +1851,14 @@ private: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type __align_it(size_type __s) _NOEXCEPT {return (__s + (__a-1)) & ~(__a-1);} - enum {__alignment = 16}; + enum { + __alignment = +#ifdef _LIBCPP_ABI_STRING_8_BYTE_ALIGNMENT + 8 +#else + 16 +#endif + }; static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type __recommend(size_type __s) _NOEXCEPT { diff --git a/libcxx/test/libcxx/strings/basic.string/string.capacity/allocation_size.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.capacity/allocation_size.pass.cpp new file mode 100644 index 0000000000000..c7df56c815a80 --- /dev/null +++ b/libcxx/test/libcxx/strings/basic.string/string.capacity/allocation_size.pass.cpp @@ -0,0 +1,45 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// This test demonstrates the smaller allocation sizes when the alignment +// requirements of std::string are dropped from 16 to 8. +#include +#include +#include +#include + +#include "test_macros.h" + +// alignment of the string heap buffer is hardcoded to either 16 or 8 + +const std::size_t alignment = +#ifdef _LIBCPP_ABI_STRING_8_BYTE_ALIGNMENT + 8; +#else + 16; +#endif + +int main(int, char**) { + std::string input_string; + input_string.resize(64, 'a'); + + // Call a constructor which selects its size using __recommend. + std::string test_string(input_string.data()); + const std::size_t expected_align8_size = 71; + + // Demonstrate the lesser capacity/allocation size when the alignment requirement is 8. + if (alignment == 8) { + assert(test_string.capacity() == expected_align8_size); + } else { + assert(test_string.capacity() == expected_align8_size + 8); + } + + return 0; +} diff --git a/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp index 5af9cab0be4e8..a3cb79522f2e1 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp @@ -18,7 +18,13 @@ #include "test_macros.h" // alignment of the string heap buffer is hardcoded to 16 -static const std::size_t alignment = 16; + +static const std::size_t alignment = +#ifdef _LIBCPP_ABI_STRING_8_BYTE_ALIGNMENT + 8; +#else + 16; +#endif template TEST_CONSTEXPR_CXX20 void full_size() { From 05bde3cc23b05a8ee4a77d00e6c4bea2ac44647b Mon Sep 17 00:00:00 2001 From: David Spickett Date: Fri, 13 Oct 2023 16:57:42 +0100 Subject: [PATCH 086/720] [llvm][TableGen][Jupyter] Link to tutorial notebook from README --- llvm/utils/TableGen/jupyter/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/utils/TableGen/jupyter/README.md b/llvm/utils/TableGen/jupyter/README.md index 4356a907878fc..79c986a3fc66f 100644 --- a/llvm/utils/TableGen/jupyter/README.md +++ b/llvm/utils/TableGen/jupyter/README.md @@ -7,6 +7,8 @@ TableGen. [LLVM_TableGen.ipynb](LLVM_TableGen.ipynb) - A demo of the kernel's capabilities. +[tablegen_tutorial_part_1.ipynb](tablegen_tutorial_part_1.ipynb) - A tutorial on the TableGen language. + [sql_query_backend.ipynb](sql_query_backend.ipynb) - How to write a backend using JSON output and Python. From 3d75c7c11b5a9ccb66e16df65a37f981ae6f0083 Mon Sep 17 00:00:00 2001 From: spupyrev Date: Fri, 13 Oct 2023 09:35:56 -0700 Subject: [PATCH 087/720] [CodeLayout] Fixing initialization of empty ranges (#68917) Fixing libc++'s consistency checks, by eliminating ranges of singular iterators. --- llvm/lib/Transforms/Utils/CodeLayout.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp index dea91dcac21ae..620b52b69c31d 100644 --- a/llvm/lib/Transforms/Utils/CodeLayout.cpp +++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp @@ -476,13 +476,16 @@ void ChainT::mergeEdges(ChainT *Other) { } using NodeIter = std::vector::const_iterator; +static std::vector EmptyList; /// A wrapper around three concatenated vectors (chains) of nodes; it is used /// to avoid extra instantiation of the vectors. struct MergedNodesT { - MergedNodesT(NodeIter Begin1, NodeIter End1, NodeIter Begin2 = NodeIter(), - NodeIter End2 = NodeIter(), NodeIter Begin3 = NodeIter(), - NodeIter End3 = NodeIter()) + MergedNodesT(NodeIter Begin1, NodeIter End1, + NodeIter Begin2 = EmptyList.begin(), + NodeIter End2 = EmptyList.end(), + NodeIter Begin3 = EmptyList.begin(), + NodeIter End3 = EmptyList.end()) : Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3), End3(End3) {} From bbecd422a9bf5423109a754ba3417451946027a7 Mon Sep 17 00:00:00 2001 From: Aart Bik <39774503+aartbik@users.noreply.github.com> Date: Fri, 13 Oct 2023 09:41:53 -0700 Subject: [PATCH 088/720] [mlir][sparse] cleanup sparse tensor materialization parameter setup (#68956) --- .../Transforms/SparseTensorConversion.cpp | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index ce3b49915319c..a76f81410aa87 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -209,27 +209,12 @@ class NewCallParams final { genMapBuffers(builder, loc, stt, dimSizesValues, params[kParamDimSizes], params[kParamDim2Lvl], params[kParamLvl2Dim]); // Secondary and primary types encoding. - setTemplateTypes(stt); - // Finally, make note that initialization is complete. - assert(isInitialized() && "Initialization failed"); - // And return `this` for method chaining. - return *this; - } - - /// (Re)sets the C++ template type parameters, and returns `this` - /// for method chaining. This is already done as part of `genBuffers`, - /// but is factored out so that it can also be called independently - /// whenever subsequent `genNewCall` calls want to reuse the same - /// buffers but different type parameters. - // - // TODO: This is only ever used by sparse2sparse-viaCOO `ConvertOp`; - // is there a better way to handle that than this one-off setter method? - NewCallParams &setTemplateTypes(SparseTensorType stt) { const auto enc = stt.getEncoding(); params[kParamPosTp] = constantPosTypeEncoding(builder, loc, enc); params[kParamCrdTp] = constantCrdTypeEncoding(builder, loc, enc); params[kParamValTp] = constantPrimaryTypeEncoding(builder, loc, stt.getElementType()); + // Return `this` for method chaining. return *this; } From 8e2bd05c4e86834a318ef2279e271f0769be4988 Mon Sep 17 00:00:00 2001 From: Pete Lawrence Date: Fri, 13 Oct 2023 07:06:50 -1000 Subject: [PATCH 089/720] [lldb] Fix `po` alias by printing fix-its to the console. (#68755) The `po` alias now matches the behavior of the `expression` command when the it can apply a Fix-It to an expression. Modifications - Add has `m_fixed_expression` to the `CommandObjectDWIMPrint` class a `protected` member that stores the post Fix-It expression, just like the `CommandObjectExpression` class. - Converted messages to present tense. - Add test cases that confirms a Fix-It for a C++ expression for both `po` and `expressions` rdar://115317419 --- .../Commands/CommandObjectDWIMPrint.cpp | 15 ++++++- .../Commands/CommandObjectExpression.cpp | 8 ++-- .../commands/expression/fixits/TestFixIts.py | 8 +++- lldb/test/API/lang/cpp/fixits/Makefile | 3 ++ .../test/API/lang/cpp/fixits/TestCppFixIts.py | 44 +++++++++++++++++++ lldb/test/API/lang/cpp/fixits/main.cpp | 5 +++ 6 files changed, 75 insertions(+), 8 deletions(-) create mode 100644 lldb/test/API/lang/cpp/fixits/Makefile create mode 100644 lldb/test/API/lang/cpp/fixits/TestCppFixIts.py create mode 100644 lldb/test/API/lang/cpp/fixits/main.cpp diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp b/lldb/source/Commands/CommandObjectDWIMPrint.cpp index 7b168eab9e02d..bdc17c9cffc77 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp +++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp @@ -172,8 +172,19 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, { auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; - ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options); + std::string fixed_expression; + + ExpressionResults expr_result = target.EvaluateExpression( + expr, exe_scope, valobj_sp, eval_options, &fixed_expression); + + // Only mention Fix-Its if the expression evaluator applied them. + // Compiler errors refer to the final expression after applying Fix-It(s). + if (!fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { + Stream &error_stream = result.GetErrorStream(); + error_stream << " Evaluated this expression after applying Fix-It(s):\n"; + error_stream << " " << fixed_expression << "\n"; + } + if (expr_result == eExpressionCompleted) { if (verbosity != eDWIMPrintVerbosityNone) { StringRef flags; diff --git a/lldb/source/Commands/CommandObjectExpression.cpp b/lldb/source/Commands/CommandObjectExpression.cpp index e7e6e3820b991..2834be660abaf 100644 --- a/lldb/source/Commands/CommandObjectExpression.cpp +++ b/lldb/source/Commands/CommandObjectExpression.cpp @@ -439,11 +439,11 @@ bool CommandObjectExpression::EvaluateExpression(llvm::StringRef expr, ExpressionResults success = target.EvaluateExpression( expr, frame, result_valobj_sp, eval_options, &m_fixed_expression); - // We only tell you about the FixIt if we applied it. The compiler errors - // will suggest the FixIt if it parsed. + // Only mention Fix-Its if the expression evaluator applied them. + // Compiler errors refer to the final expression after applying Fix-It(s). if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { - error_stream.Printf(" Fix-it applied, fixed expression was: \n %s\n", - m_fixed_expression.c_str()); + error_stream << " Evaluated this expression after applying Fix-It(s):\n"; + error_stream << " " << m_fixed_expression << "\n"; } if (result_valobj_sp) { diff --git a/lldb/test/API/commands/expression/fixits/TestFixIts.py b/lldb/test/API/commands/expression/fixits/TestFixIts.py index 3bdeb84b4e797..38b242838c828 100644 --- a/lldb/test/API/commands/expression/fixits/TestFixIts.py +++ b/lldb/test/API/commands/expression/fixits/TestFixIts.py @@ -22,7 +22,9 @@ def test_with_dummy_target(self): self.assertEqual( result, lldb.eReturnStatusSuccessFinishResult, ret_val.GetError() ) - self.assertIn("Fix-it applied", ret_val.GetError()) + self.assertIn( + "Evaluated this expression after applying Fix-It(s):", ret_val.GetError() + ) def test_with_target(self): """Test calling expressions with errors that can be fixed by the FixIts.""" @@ -99,7 +101,9 @@ def test_with_target_error_applies_fixit(self): ) self.assertEqual(result, lldb.eReturnStatusFailed, ret_val.GetError()) - self.assertIn("Fix-it applied, fixed expression was:", ret_val.GetError()) + self.assertIn( + "Evaluated this expression after applying Fix-It(s):", ret_val.GetError() + ) self.assertIn("null_pointer->first", ret_val.GetError()) # The final function call runs into SIGILL on aarch64-linux. diff --git a/lldb/test/API/lang/cpp/fixits/Makefile b/lldb/test/API/lang/cpp/fixits/Makefile new file mode 100644 index 0000000000000..99998b20bcb05 --- /dev/null +++ b/lldb/test/API/lang/cpp/fixits/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/fixits/TestCppFixIts.py b/lldb/test/API/lang/cpp/fixits/TestCppFixIts.py new file mode 100644 index 0000000000000..34b52f796da28 --- /dev/null +++ b/lldb/test/API/lang/cpp/fixits/TestCppFixIts.py @@ -0,0 +1,44 @@ +""" +Tests a C++ fixit for the `expr` command and +`po` alias (aka DWIM aka "do what I mean") alias. +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TestCase(TestBase): + def test_fixit_with_dwim(self): + """Confirms `po` shows an expression after applying Fix-It(s).""" + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "dwim-print -O -- class C { int i; void f() { []() { ++i; }(); } }; 42", + error=True, + substrs=[ + "Evaluated this expression after applying Fix-It(s)", + "class C { int i; void f() { [this]() { ++i; }(); } }", + ], + ) + + def test_fixit_with_expression(self): + """Confirms `expression` shows an expression after applying Fix-It(s).""" + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "expr class C { int i; void f() { []() { ++i; }(); } }; 42", + error=True, + substrs=[ + "Evaluated this expression after applying Fix-It(s)", + "class C { int i; void f() { [this]() { ++i; }(); } }", + ], + ) diff --git a/lldb/test/API/lang/cpp/fixits/main.cpp b/lldb/test/API/lang/cpp/fixits/main.cpp new file mode 100644 index 0000000000000..e9cf11d18a656 --- /dev/null +++ b/lldb/test/API/lang/cpp/fixits/main.cpp @@ -0,0 +1,5 @@ +int main() { + long foo = 1234; + + return 0; // break here +} From 72307960bf4676a15d2404d638403533aee347d0 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Fri, 13 Oct 2023 10:48:25 -0700 Subject: [PATCH 090/720] [mlir] Fix distinct attr mismatch error reporting (#68938) Previously the error reported location would not be where expected. E.g., it would fail in the existing test if it wasn't the last in the file. --- mlir/lib/AsmParser/AttributeParser.cpp | 3 ++- mlir/test/IR/invalid-builtin-attributes.mlir | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/mlir/lib/AsmParser/AttributeParser.cpp b/mlir/lib/AsmParser/AttributeParser.cpp index 8366c18760fd6..d085fb6af6bc1 100644 --- a/mlir/lib/AsmParser/AttributeParser.cpp +++ b/mlir/lib/AsmParser/AttributeParser.cpp @@ -1225,6 +1225,7 @@ Attribute Parser::parseStridedLayoutAttr() { /// `[` integer-literal `]<` attribute-value `>` /// Attribute Parser::parseDistinctAttr(Type type) { + SMLoc loc = getToken().getLoc(); consumeToken(Token::kw_distinct); if (parseToken(Token::l_square, "expected '[' after 'distinct'")) return {}; @@ -1269,7 +1270,7 @@ Attribute Parser::parseDistinctAttr(Type type) { DistinctAttr distinctAttr = DistinctAttr::create(referencedAttr); it = distinctAttrs.try_emplace(*value, distinctAttr).first; } else if (it->getSecond().getReferencedAttr() != referencedAttr) { - emitError("referenced attribute does not match previous definition: ") + emitError(loc, "referenced attribute does not match previous definition: ") << it->getSecond().getReferencedAttr(); return {}; } diff --git a/mlir/test/IR/invalid-builtin-attributes.mlir b/mlir/test/IR/invalid-builtin-attributes.mlir index 1ff44605cb7ec..431c7b12b8f5f 100644 --- a/mlir/test/IR/invalid-builtin-attributes.mlir +++ b/mlir/test/IR/invalid-builtin-attributes.mlir @@ -587,3 +587,5 @@ func.func @duplicate_dictionary_attr_key() { #attr = distinct[0]<42 : i32> // expected-error@below {{referenced attribute does not match previous definition: 42 : i32}} #attr1 = distinct[0]<43 : i32> + +// ----- From 158c0529901ec683a41bafafeb7f14de74999517 Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Fri, 13 Oct 2023 13:54:03 -0400 Subject: [PATCH 091/720] [LLDB][NFC] Remove dead code (#68927) I found some type/typesystem code that is dead and some of it seems to have been replaced by the ValueObjectPrinter. --- lldb/include/lldb/Symbol/CompilerType.h | 12 - lldb/include/lldb/Symbol/Type.h | 9 - lldb/include/lldb/Symbol/TypeSystem.h | 20 +- .../TypeSystem/Clang/TypeSystemClang.cpp | 419 ------------------ .../TypeSystem/Clang/TypeSystemClang.h | 14 +- lldb/source/Symbol/CompilerType.cpp | 31 -- lldb/source/Symbol/Type.cpp | 42 -- 7 files changed, 2 insertions(+), 545 deletions(-) diff --git a/lldb/include/lldb/Symbol/CompilerType.h b/lldb/include/lldb/Symbol/CompilerType.h index 2d7092d2c93f8..414db18e52ed7 100644 --- a/lldb/include/lldb/Symbol/CompilerType.h +++ b/lldb/include/lldb/Symbol/CompilerType.h @@ -146,8 +146,6 @@ class CompilerType { bool IsConst() const; - bool IsCStringType(uint32_t &length) const; - bool IsDefined() const; bool IsFloatingPointType(uint32_t &count, bool &is_complex) const; @@ -437,21 +435,11 @@ class CompilerType { LLVM_DUMP_METHOD void dump() const; #endif - void DumpValue(ExecutionContext *exe_ctx, Stream *s, lldb::Format format, - const DataExtractor &data, lldb::offset_t data_offset, - size_t data_byte_size, uint32_t bitfield_bit_size, - uint32_t bitfield_bit_offset, bool show_types, - bool show_summary, bool verbose, uint32_t depth); - bool DumpTypeValue(Stream *s, lldb::Format format, const DataExtractor &data, lldb::offset_t data_offset, size_t data_byte_size, uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset, ExecutionContextScope *exe_scope); - void DumpSummary(ExecutionContext *exe_ctx, Stream *s, - const DataExtractor &data, lldb::offset_t data_offset, - size_t data_byte_size); - /// Dump to stdout. void DumpTypeDescription(lldb::DescriptionLevel level = lldb::eDescriptionLevelFull) const; diff --git a/lldb/include/lldb/Symbol/Type.h b/lldb/include/lldb/Symbol/Type.h index 046501931d211..d7bccae5f4135 100644 --- a/lldb/include/lldb/Symbol/Type.h +++ b/lldb/include/lldb/Symbol/Type.h @@ -148,15 +148,6 @@ class Type : public std::enable_shared_from_this, public UserID { ConstString GetQualifiedName(); - void DumpValue(ExecutionContext *exe_ctx, Stream *s, - const DataExtractor &data, uint32_t data_offset, - bool show_type, bool show_summary, bool verbose, - lldb::Format format = lldb::eFormatDefault); - - bool DumpValueInMemory(ExecutionContext *exe_ctx, Stream *s, - lldb::addr_t address, AddressType address_type, - bool show_types, bool show_summary, bool verbose); - bool ReadFromMemory(ExecutionContext *exe_ctx, lldb::addr_t address, AddressType address_type, DataExtractor &data); diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h index eb6e453e1aec0..56d09db837051 100644 --- a/lldb/include/lldb/Symbol/TypeSystem.h +++ b/lldb/include/lldb/Symbol/TypeSystem.h @@ -384,14 +384,6 @@ class TypeSystem : public PluginInterface, dump(lldb::opaque_compiler_type_t type) const = 0; #endif - virtual void DumpValue(lldb::opaque_compiler_type_t type, - ExecutionContext *exe_ctx, Stream &s, - lldb::Format format, const DataExtractor &data, - lldb::offset_t data_offset, size_t data_byte_size, - uint32_t bitfield_bit_size, - uint32_t bitfield_bit_offset, bool show_types, - bool show_summary, bool verbose, uint32_t depth) = 0; - virtual bool DumpTypeValue(lldb::opaque_compiler_type_t type, Stream &s, lldb::Format format, const DataExtractor &data, lldb::offset_t data_offset, size_t data_byte_size, @@ -418,16 +410,9 @@ class TypeSystem : public PluginInterface, /// This should not modify the state of the TypeSystem if possible. virtual void Dump(llvm::raw_ostream &output) = 0; - // TODO: These methods appear unused. Should they be removed? - + /// This is used by swift. virtual bool IsRuntimeGeneratedType(lldb::opaque_compiler_type_t type) = 0; - virtual void DumpSummary(lldb::opaque_compiler_type_t type, - ExecutionContext *exe_ctx, Stream &s, - const DataExtractor &data, - lldb::offset_t data_offset, - size_t data_byte_size) = 0; - // TODO: Determine if these methods should move to TypeSystemClang. virtual bool IsPointerOrReferenceType(lldb::opaque_compiler_type_t type, @@ -435,9 +420,6 @@ class TypeSystem : public PluginInterface, virtual unsigned GetTypeQualifiers(lldb::opaque_compiler_type_t type) = 0; - virtual bool IsCStringType(lldb::opaque_compiler_type_t type, - uint32_t &length) = 0; - virtual std::optional GetTypeBitAlign(lldb::opaque_compiler_type_t type, ExecutionContextScope *exe_scope) = 0; diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 69cff0f35ae4a..ddfe5b1a7c52d 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -8512,380 +8512,6 @@ void TypeSystemClang::DumpFromSymbolFile(Stream &s, } } -void TypeSystemClang::DumpValue( - lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx, Stream &s, - lldb::Format format, const lldb_private::DataExtractor &data, - lldb::offset_t data_byte_offset, size_t data_byte_size, - uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset, bool show_types, - bool show_summary, bool verbose, uint32_t depth) { - if (!type) - return; - - clang::QualType qual_type(GetQualType(type)); - switch (qual_type->getTypeClass()) { - case clang::Type::Record: - if (GetCompleteType(type)) { - const clang::RecordType *record_type = - llvm::cast(qual_type.getTypePtr()); - const clang::RecordDecl *record_decl = record_type->getDecl(); - assert(record_decl); - uint32_t field_bit_offset = 0; - uint32_t field_byte_offset = 0; - const clang::ASTRecordLayout &record_layout = - getASTContext().getASTRecordLayout(record_decl); - uint32_t child_idx = 0; - - const clang::CXXRecordDecl *cxx_record_decl = - llvm::dyn_cast(record_decl); - if (cxx_record_decl) { - // We might have base classes to print out first - clang::CXXRecordDecl::base_class_const_iterator base_class, - base_class_end; - for (base_class = cxx_record_decl->bases_begin(), - base_class_end = cxx_record_decl->bases_end(); - base_class != base_class_end; ++base_class) { - const clang::CXXRecordDecl *base_class_decl = - llvm::cast( - base_class->getType()->getAs()->getDecl()); - - // Skip empty base classes - if (!verbose && !TypeSystemClang::RecordHasFields(base_class_decl)) - continue; - - if (base_class->isVirtual()) - field_bit_offset = - record_layout.getVBaseClassOffset(base_class_decl) - .getQuantity() * - 8; - else - field_bit_offset = record_layout.getBaseClassOffset(base_class_decl) - .getQuantity() * - 8; - field_byte_offset = field_bit_offset / 8; - assert(field_bit_offset % 8 == 0); - if (child_idx == 0) - s.PutChar('{'); - else - s.PutChar(','); - - clang::QualType base_class_qual_type = base_class->getType(); - std::string base_class_type_name(base_class_qual_type.getAsString()); - - // Indent and print the base class type name - s.Format("\n{0}{1}", llvm::fmt_repeat(" ", depth + DEPTH_INCREMENT), - base_class_type_name); - - clang::TypeInfo base_class_type_info = - getASTContext().getTypeInfo(base_class_qual_type); - - // Dump the value of the member - CompilerType base_clang_type = GetType(base_class_qual_type); - base_clang_type.DumpValue( - exe_ctx, - &s, // Stream to dump to - base_clang_type - .GetFormat(), // The format with which to display the member - data, // Data buffer containing all bytes for this type - data_byte_offset + field_byte_offset, // Offset into "data" where - // to grab value from - base_class_type_info.Width / 8, // Size of this type in bytes - 0, // Bitfield bit size - 0, // Bitfield bit offset - show_types, // Boolean indicating if we should show the variable - // types - show_summary, // Boolean indicating if we should show a summary - // for the current type - verbose, // Verbose output? - depth + DEPTH_INCREMENT); // Scope depth for any types that have - // children - - ++child_idx; - } - } - uint32_t field_idx = 0; - clang::RecordDecl::field_iterator field, field_end; - for (field = record_decl->field_begin(), - field_end = record_decl->field_end(); - field != field_end; ++field, ++field_idx, ++child_idx) { - // Print the starting squiggly bracket (if this is the first member) or - // comma (for member 2 and beyond) for the struct/union/class member. - if (child_idx == 0) - s.PutChar('{'); - else - s.PutChar(','); - - // Indent - s.Printf("\n%*s", depth + DEPTH_INCREMENT, ""); - - clang::QualType field_type = field->getType(); - // Print the member type if requested - // Figure out the type byte size (field_type_info.first) and alignment - // (field_type_info.second) from the AST context. - clang::TypeInfo field_type_info = - getASTContext().getTypeInfo(field_type); - assert(field_idx < record_layout.getFieldCount()); - // Figure out the field offset within the current struct/union/class - // type - field_bit_offset = record_layout.getFieldOffset(field_idx); - field_byte_offset = field_bit_offset / 8; - uint32_t field_bitfield_bit_size = 0; - uint32_t field_bitfield_bit_offset = 0; - if (FieldIsBitfield(*field, field_bitfield_bit_size)) - field_bitfield_bit_offset = field_bit_offset % 8; - - if (show_types) { - std::string field_type_name(field_type.getAsString()); - if (field_bitfield_bit_size > 0) - s.Printf("(%s:%u) ", field_type_name.c_str(), - field_bitfield_bit_size); - else - s.Printf("(%s) ", field_type_name.c_str()); - } - // Print the member name and equal sign - s.Printf("%s = ", field->getNameAsString().c_str()); - - // Dump the value of the member - CompilerType field_clang_type = GetType(field_type); - field_clang_type.DumpValue( - exe_ctx, - &s, // Stream to dump to - field_clang_type - .GetFormat(), // The format with which to display the member - data, // Data buffer containing all bytes for this type - data_byte_offset + field_byte_offset, // Offset into "data" where to - // grab value from - field_type_info.Width / 8, // Size of this type in bytes - field_bitfield_bit_size, // Bitfield bit size - field_bitfield_bit_offset, // Bitfield bit offset - show_types, // Boolean indicating if we should show the variable - // types - show_summary, // Boolean indicating if we should show a summary for - // the current type - verbose, // Verbose output? - depth + DEPTH_INCREMENT); // Scope depth for any types that have - // children - } - - // Indent the trailing squiggly bracket - if (child_idx > 0) - s.Printf("\n%*s}", depth, ""); - } - return; - - case clang::Type::Enum: - if (GetCompleteType(type)) { - const clang::EnumType *enutype = - llvm::cast(qual_type.getTypePtr()); - const clang::EnumDecl *enum_decl = enutype->getDecl(); - assert(enum_decl); - clang::EnumDecl::enumerator_iterator enum_pos, enum_end_pos; - lldb::offset_t offset = data_byte_offset; - const int64_t enum_value = data.GetMaxU64Bitfield( - &offset, data_byte_size, bitfield_bit_size, bitfield_bit_offset); - for (enum_pos = enum_decl->enumerator_begin(), - enum_end_pos = enum_decl->enumerator_end(); - enum_pos != enum_end_pos; ++enum_pos) { - if (enum_pos->getInitVal() == enum_value) { - s.Printf("%s", enum_pos->getNameAsString().c_str()); - return; - } - } - // If we have gotten here we didn't get find the enumerator in the enum - // decl, so just print the integer. - s.Printf("%" PRIi64, enum_value); - } - return; - - case clang::Type::ConstantArray: { - const clang::ConstantArrayType *array = - llvm::cast(qual_type.getTypePtr()); - bool is_array_of_characters = false; - clang::QualType element_qual_type = array->getElementType(); - - const clang::Type *canonical_type = - element_qual_type->getCanonicalTypeInternal().getTypePtr(); - if (canonical_type) - is_array_of_characters = canonical_type->isCharType(); - - const uint64_t element_count = array->getSize().getLimitedValue(); - - clang::TypeInfo field_type_info = - getASTContext().getTypeInfo(element_qual_type); - - uint32_t element_idx = 0; - uint32_t element_offset = 0; - uint64_t element_byte_size = field_type_info.Width / 8; - uint32_t element_stride = element_byte_size; - - if (is_array_of_characters) { - s.PutChar('"'); - DumpDataExtractor(data, &s, data_byte_offset, lldb::eFormatChar, - element_byte_size, element_count, UINT32_MAX, - LLDB_INVALID_ADDRESS, 0, 0); - s.PutChar('"'); - return; - } else { - CompilerType element_clang_type = GetType(element_qual_type); - lldb::Format element_format = element_clang_type.GetFormat(); - - for (element_idx = 0; element_idx < element_count; ++element_idx) { - // Print the starting squiggly bracket (if this is the first member) or - // comman (for member 2 and beyong) for the struct/union/class member. - if (element_idx == 0) - s.PutChar('{'); - else - s.PutChar(','); - - // Indent and print the index - s.Printf("\n%*s[%u] ", depth + DEPTH_INCREMENT, "", element_idx); - - // Figure out the field offset within the current struct/union/class - // type - element_offset = element_idx * element_stride; - - // Dump the value of the member - element_clang_type.DumpValue( - exe_ctx, - &s, // Stream to dump to - element_format, // The format with which to display the element - data, // Data buffer containing all bytes for this type - data_byte_offset + - element_offset, // Offset into "data" where to grab value from - element_byte_size, // Size of this type in bytes - 0, // Bitfield bit size - 0, // Bitfield bit offset - show_types, // Boolean indicating if we should show the variable - // types - show_summary, // Boolean indicating if we should show a summary for - // the current type - verbose, // Verbose output? - depth + DEPTH_INCREMENT); // Scope depth for any types that have - // children - } - - // Indent the trailing squiggly bracket - if (element_idx > 0) - s.Printf("\n%*s}", depth, ""); - } - } - return; - - case clang::Type::Typedef: { - clang::QualType typedef_qual_type = - llvm::cast(qual_type) - ->getDecl() - ->getUnderlyingType(); - - CompilerType typedef_clang_type = GetType(typedef_qual_type); - lldb::Format typedef_format = typedef_clang_type.GetFormat(); - clang::TypeInfo typedef_type_info = - getASTContext().getTypeInfo(typedef_qual_type); - uint64_t typedef_byte_size = typedef_type_info.Width / 8; - - return typedef_clang_type.DumpValue( - exe_ctx, - &s, // Stream to dump to - typedef_format, // The format with which to display the element - data, // Data buffer containing all bytes for this type - data_byte_offset, // Offset into "data" where to grab value from - typedef_byte_size, // Size of this type in bytes - bitfield_bit_size, // Bitfield bit size - bitfield_bit_offset, // Bitfield bit offset - show_types, // Boolean indicating if we should show the variable types - show_summary, // Boolean indicating if we should show a summary for the - // current type - verbose, // Verbose output? - depth); // Scope depth for any types that have children - } break; - - case clang::Type::Auto: { - clang::QualType elaborated_qual_type = - llvm::cast(qual_type)->getDeducedType(); - CompilerType elaborated_clang_type = GetType(elaborated_qual_type); - lldb::Format elaborated_format = elaborated_clang_type.GetFormat(); - clang::TypeInfo elaborated_type_info = - getASTContext().getTypeInfo(elaborated_qual_type); - uint64_t elaborated_byte_size = elaborated_type_info.Width / 8; - - return elaborated_clang_type.DumpValue( - exe_ctx, - &s, // Stream to dump to - elaborated_format, // The format with which to display the element - data, // Data buffer containing all bytes for this type - data_byte_offset, // Offset into "data" where to grab value from - elaborated_byte_size, // Size of this type in bytes - bitfield_bit_size, // Bitfield bit size - bitfield_bit_offset, // Bitfield bit offset - show_types, // Boolean indicating if we should show the variable types - show_summary, // Boolean indicating if we should show a summary for the - // current type - verbose, // Verbose output? - depth); // Scope depth for any types that have children - } break; - - case clang::Type::Elaborated: { - clang::QualType elaborated_qual_type = - llvm::cast(qual_type)->getNamedType(); - CompilerType elaborated_clang_type = GetType(elaborated_qual_type); - lldb::Format elaborated_format = elaborated_clang_type.GetFormat(); - clang::TypeInfo elaborated_type_info = - getASTContext().getTypeInfo(elaborated_qual_type); - uint64_t elaborated_byte_size = elaborated_type_info.Width / 8; - - return elaborated_clang_type.DumpValue( - exe_ctx, - &s, // Stream to dump to - elaborated_format, // The format with which to display the element - data, // Data buffer containing all bytes for this type - data_byte_offset, // Offset into "data" where to grab value from - elaborated_byte_size, // Size of this type in bytes - bitfield_bit_size, // Bitfield bit size - bitfield_bit_offset, // Bitfield bit offset - show_types, // Boolean indicating if we should show the variable types - show_summary, // Boolean indicating if we should show a summary for the - // current type - verbose, // Verbose output? - depth); // Scope depth for any types that have children - } break; - - case clang::Type::Paren: { - clang::QualType desugar_qual_type = - llvm::cast(qual_type)->desugar(); - CompilerType desugar_clang_type = GetType(desugar_qual_type); - - lldb::Format desugar_format = desugar_clang_type.GetFormat(); - clang::TypeInfo desugar_type_info = - getASTContext().getTypeInfo(desugar_qual_type); - uint64_t desugar_byte_size = desugar_type_info.Width / 8; - - return desugar_clang_type.DumpValue( - exe_ctx, - &s, // Stream to dump to - desugar_format, // The format with which to display the element - data, // Data buffer containing all bytes for this type - data_byte_offset, // Offset into "data" where to grab value from - desugar_byte_size, // Size of this type in bytes - bitfield_bit_size, // Bitfield bit size - bitfield_bit_offset, // Bitfield bit offset - show_types, // Boolean indicating if we should show the variable types - show_summary, // Boolean indicating if we should show a summary for the - // current type - verbose, // Verbose output? - depth); // Scope depth for any types that have children - } break; - - default: - // We are down to a scalar type that we just need to display. - DumpDataExtractor(data, &s, data_byte_offset, format, data_byte_size, 1, - UINT32_MAX, LLDB_INVALID_ADDRESS, bitfield_bit_size, - bitfield_bit_offset); - - if (show_summary) - DumpSummary(type, exe_ctx, s, data, data_byte_offset, data_byte_size); - break; - } -} - static bool DumpEnumValue(const clang::QualType &qual_type, Stream &s, const DataExtractor &data, lldb::offset_t byte_offset, size_t byte_size, uint32_t bitfield_bit_offset, @@ -9091,51 +8717,6 @@ bool TypeSystemClang::DumpTypeValue( return false; } -void TypeSystemClang::DumpSummary(lldb::opaque_compiler_type_t type, - ExecutionContext *exe_ctx, Stream &s, - const lldb_private::DataExtractor &data, - lldb::offset_t data_byte_offset, - size_t data_byte_size) { - uint32_t length = 0; - if (IsCStringType(type, length)) { - if (exe_ctx) { - Process *process = exe_ctx->GetProcessPtr(); - if (process) { - lldb::offset_t offset = data_byte_offset; - lldb::addr_t pointer_address = data.GetMaxU64(&offset, data_byte_size); - std::vector buf; - if (length > 0) - buf.resize(length); - else - buf.resize(256); - - DataExtractor cstr_data(&buf.front(), buf.size(), - process->GetByteOrder(), 4); - buf.back() = '\0'; - size_t bytes_read; - size_t total_cstr_len = 0; - Status error; - while ((bytes_read = process->ReadMemory(pointer_address, &buf.front(), - buf.size(), error)) > 0) { - const size_t len = strlen((const char *)&buf.front()); - if (len == 0) - break; - if (total_cstr_len == 0) - s.PutCString(" \""); - DumpDataExtractor(cstr_data, &s, 0, lldb::eFormatChar, 1, len, - UINT32_MAX, LLDB_INVALID_ADDRESS, 0, 0); - total_cstr_len += len; - if (len < buf.size()) - break; - pointer_address += total_cstr_len; - } - if (total_cstr_len > 0) - s.PutChar('"'); - } - } - } -} - void TypeSystemClang::DumpTypeDescription(lldb::opaque_compiler_type_t type, lldb::DescriptionLevel level) { StreamFile s(stdout, false); diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h index 0544de3cd33be..1d2f25c47b8c7 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h @@ -632,8 +632,7 @@ class TypeSystemClang : public TypeSystem { bool IsConst(lldb::opaque_compiler_type_t type) override; - bool IsCStringType(lldb::opaque_compiler_type_t type, - uint32_t &length) override; + bool IsCStringType(lldb::opaque_compiler_type_t type, uint32_t &length); static bool IsCXXClassType(const CompilerType &type); @@ -1029,23 +1028,12 @@ class TypeSystemClang : public TypeSystem { /// The name of the symbol to dump, if it is empty dump all the symbols void DumpFromSymbolFile(Stream &s, llvm::StringRef symbol_name); - void DumpValue(lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx, - Stream &s, lldb::Format format, const DataExtractor &data, - lldb::offset_t data_offset, size_t data_byte_size, - uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset, - bool show_types, bool show_summary, bool verbose, - uint32_t depth) override; - bool DumpTypeValue(lldb::opaque_compiler_type_t type, Stream &s, lldb::Format format, const DataExtractor &data, lldb::offset_t data_offset, size_t data_byte_size, uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset, ExecutionContextScope *exe_scope) override; - void DumpSummary(lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx, - Stream &s, const DataExtractor &data, - lldb::offset_t data_offset, size_t data_byte_size) override; - void DumpTypeDescription( lldb::opaque_compiler_type_t type, lldb::DescriptionLevel level = lldb::eDescriptionLevelFull) override; diff --git a/lldb/source/Symbol/CompilerType.cpp b/lldb/source/Symbol/CompilerType.cpp index 33f7e63d9be41..7732a66f49d8d 100644 --- a/lldb/source/Symbol/CompilerType.cpp +++ b/lldb/source/Symbol/CompilerType.cpp @@ -108,13 +108,6 @@ bool CompilerType::IsConst() const { return false; } -bool CompilerType::IsCStringType(uint32_t &length) const { - if (IsValid()) - if (auto type_system_sp = GetTypeSystem()) - return type_system_sp->IsCStringType(m_type, length); - return false; -} - bool CompilerType::IsFunctionType() const { if (IsValid()) if (auto type_system_sp = GetTypeSystem()) @@ -821,20 +814,6 @@ CompilerType::GetIndexOfChildWithName(llvm::StringRef name, // Dumping types -void CompilerType::DumpValue(ExecutionContext *exe_ctx, Stream *s, - lldb::Format format, const DataExtractor &data, - lldb::offset_t data_byte_offset, - size_t data_byte_size, uint32_t bitfield_bit_size, - uint32_t bitfield_bit_offset, bool show_types, - bool show_summary, bool verbose, uint32_t depth) { - if (!IsValid()) - if (auto type_system_sp = GetTypeSystem()) - type_system_sp->DumpValue(m_type, exe_ctx, *s, format, data, - data_byte_offset, data_byte_size, - bitfield_bit_size, bitfield_bit_offset, - show_types, show_summary, verbose, depth); -} - bool CompilerType::DumpTypeValue(Stream *s, lldb::Format format, const DataExtractor &data, lldb::offset_t byte_offset, size_t byte_size, @@ -849,16 +828,6 @@ bool CompilerType::DumpTypeValue(Stream *s, lldb::Format format, return false; } -void CompilerType::DumpSummary(ExecutionContext *exe_ctx, Stream *s, - const DataExtractor &data, - lldb::offset_t data_byte_offset, - size_t data_byte_size) { - if (IsValid()) - if (auto type_system_sp = GetTypeSystem()) - type_system_sp->DumpSummary(m_type, exe_ctx, *s, data, data_byte_offset, - data_byte_size); -} - void CompilerType::DumpTypeDescription(lldb::DescriptionLevel level) const { if (IsValid()) if (auto type_system_sp = GetTypeSystem()) diff --git a/lldb/source/Symbol/Type.cpp b/lldb/source/Symbol/Type.cpp index 66284eb73cad0..5f4c6303334a2 100644 --- a/lldb/source/Symbol/Type.cpp +++ b/lldb/source/Symbol/Type.cpp @@ -312,30 +312,6 @@ ConstString Type::GetBaseName() { void Type::DumpTypeName(Stream *s) { GetName().Dump(s, ""); } -void Type::DumpValue(ExecutionContext *exe_ctx, Stream *s, - const DataExtractor &data, uint32_t data_byte_offset, - bool show_types, bool show_summary, bool verbose, - lldb::Format format) { - if (ResolveCompilerType(ResolveState::Forward)) { - if (show_types) { - s->PutChar('('); - if (verbose) - s->Printf("Type{0x%8.8" PRIx64 "} ", GetID()); - DumpTypeName(s); - s->PutCString(") "); - } - - GetForwardCompilerType().DumpValue( - exe_ctx, s, format == lldb::eFormatDefault ? GetFormat() : format, data, - data_byte_offset, - GetByteSize(exe_ctx ? exe_ctx->GetBestExecutionContextScope() : nullptr) - .value_or(0), - 0, // Bitfield bit size - 0, // Bitfield bit offset - show_types, show_summary, verbose, 0); - } -} - Type *Type::GetEncodingType() { if (m_encoding_type == nullptr && m_encoding_uid != LLDB_INVALID_UID) m_encoding_type = m_symbol_file->ResolveTypeUID(m_encoding_uid); @@ -416,24 +392,6 @@ lldb::Encoding Type::GetEncoding(uint64_t &count) { return GetForwardCompilerType().GetEncoding(count); } -bool Type::DumpValueInMemory(ExecutionContext *exe_ctx, Stream *s, - lldb::addr_t address, AddressType address_type, - bool show_types, bool show_summary, bool verbose) { - if (address != LLDB_INVALID_ADDRESS) { - DataExtractor data; - Target *target = nullptr; - if (exe_ctx) - target = exe_ctx->GetTargetPtr(); - if (target) - data.SetByteOrder(target->GetArchitecture().GetByteOrder()); - if (ReadFromMemory(exe_ctx, address, address_type, data)) { - DumpValue(exe_ctx, s, data, 0, show_types, show_summary, verbose); - return true; - } - } - return false; -} - bool Type::ReadFromMemory(ExecutionContext *exe_ctx, lldb::addr_t addr, AddressType address_type, DataExtractor &data) { if (address_type == eAddressTypeFile) { From 475e154331af19f175ec082b08547b155bba1577 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 13 Oct 2023 10:54:54 -0700 Subject: [PATCH 092/720] [libc++] Introduce make_test_jthread for jthread tests (#68837) This patch introduces the support::make_test_jthread utility which is basically the same as support::make_test_thread but for std::jthread. It allows vendors to maintain a downstream way to create threads for use within the test suite, which is especially useful for embedded platforms. --- .../thread.jthread/assign.move.pass.cpp | 37 ++++++++++--------- .../thread/thread.jthread/cons.move.pass.cpp | 9 +++-- .../std/thread/thread.jthread/detach.pass.cpp | 7 ++-- .../std/thread/thread.jthread/dtor.pass.cpp | 10 +++-- .../std/thread/thread.jthread/get_id.pass.cpp | 3 +- .../thread.jthread/get_stop_source.pass.cpp | 3 +- .../thread.jthread/get_stop_token.pass.cpp | 3 +- .../thread.jthread/join.deadlock.pass.cpp | 5 ++- .../std/thread/thread.jthread/join.pass.cpp | 11 +++--- .../thread/thread.jthread/joinable.pass.cpp | 7 ++-- .../thread.jthread/request_stop.pass.cpp | 5 ++- .../thread/thread.jthread/swap.free.pass.cpp | 9 +++-- .../thread.jthread/swap.member.pass.cpp | 9 +++-- libcxx/test/support/make_test_thread.h | 27 ++++++++++++++ 14 files changed, 93 insertions(+), 52 deletions(-) diff --git a/libcxx/test/std/thread/thread.jthread/assign.move.pass.cpp b/libcxx/test/std/thread/thread.jthread/assign.move.pass.cpp index b932ac39d2f37..89521ad7660a1 100644 --- a/libcxx/test/std/thread/thread.jthread/assign.move.pass.cpp +++ b/libcxx/test/std/thread/thread.jthread/assign.move.pass.cpp @@ -23,6 +23,7 @@ #include #include +#include "make_test_thread.h" #include "test_macros.h" static_assert(std::is_nothrow_move_assignable_v); @@ -30,10 +31,10 @@ static_assert(std::is_nothrow_move_assignable_v); int main(int, char**) { // If &x == this is true, there are no effects. { - std::jthread j([] {}); - auto id = j.get_id(); - auto ssource = j.get_stop_source(); - j = std::move(j); + std::jthread j = support::make_test_jthread([] {}); + auto id = j.get_id(); + auto ssource = j.get_stop_source(); + j = std::move(j); assert(j.get_id() == id); assert(j.get_stop_source() == ssource); } @@ -41,12 +42,12 @@ int main(int, char**) { // if joinable() is true, calls request_stop() and then join() // request_stop is called { - std::jthread j1([] {}); - bool called = false; + std::jthread j1 = support::make_test_jthread([] {}); + bool called = false; std::stop_callback cb(j1.get_stop_token(), [&called] { called = true; }); - std::jthread j2([] {}); - j1 = std::move(j2); + std::jthread j2 = support::make_test_jthread([] {}); + j1 = std::move(j2); assert(called); } @@ -58,10 +59,10 @@ int main(int, char**) { constexpr auto numberOfThreads = 10u; jts.reserve(numberOfThreads); for (auto i = 0u; i < numberOfThreads; ++i) { - jts.emplace_back([&] { + jts.emplace_back(support::make_test_jthread([&] { std::this_thread::sleep_for(std::chrono::milliseconds(2)); calledTimes.fetch_add(1, std::memory_order_relaxed); - }); + })); } for (auto i = 0u; i < numberOfThreads; ++i) { @@ -79,10 +80,10 @@ int main(int, char**) { // then assigns the state of x to *this { - std::jthread j1([] {}); - std::jthread j2([] {}); - auto id2 = j2.get_id(); - auto ssource2 = j2.get_stop_source(); + std::jthread j1 = support::make_test_jthread([] {}); + std::jthread j2 = support::make_test_jthread([] {}); + auto id2 = j2.get_id(); + auto ssource2 = j2.get_stop_source(); j1 = std::move(j2); @@ -92,9 +93,9 @@ int main(int, char**) { // sets x to a default constructed state { - std::jthread j1([] {}); - std::jthread j2([] {}); - j1 = std::move(j2); + std::jthread j1 = support::make_test_jthread([] {}); + std::jthread j2 = support::make_test_jthread([] {}); + j1 = std::move(j2); assert(j2.get_id() == std::jthread::id()); assert(!j2.get_stop_source().stop_possible()); @@ -103,7 +104,7 @@ int main(int, char**) { // joinable is false { std::jthread j1; - std::jthread j2([] {}); + std::jthread j2 = support::make_test_jthread([] {}); auto j2Id = j2.get_id(); diff --git a/libcxx/test/std/thread/thread.jthread/cons.move.pass.cpp b/libcxx/test/std/thread/thread.jthread/cons.move.pass.cpp index 9eacf8971c2a5..c3c04467703c9 100644 --- a/libcxx/test/std/thread/thread.jthread/cons.move.pass.cpp +++ b/libcxx/test/std/thread/thread.jthread/cons.move.pass.cpp @@ -19,6 +19,7 @@ #include #include +#include "make_test_thread.h" #include "test_macros.h" static_assert(std::is_nothrow_move_constructible_v); @@ -27,8 +28,8 @@ int main(int, char**) { { // x.get_id() == id() and get_id() returns the value of x.get_id() prior // to the start of construction. - std::jthread j1{[] {}}; - auto id1 = j1.get_id(); + std::jthread j1 = support::make_test_jthread([] {}); + auto id1 = j1.get_id(); std::jthread j2(std::move(j1)); assert(j1.get_id() == std::jthread::id()); @@ -38,8 +39,8 @@ int main(int, char**) { { // ssource has the value of x.ssource prior to the start of construction // and x.ssource.stop_possible() is false. - std::jthread j1{[] {}}; - auto ss1 = j1.get_stop_source(); + std::jthread j1 = support::make_test_jthread([] {}); + auto ss1 = j1.get_stop_source(); std::jthread j2(std::move(j1)); assert(ss1 == j2.get_stop_source()); diff --git a/libcxx/test/std/thread/thread.jthread/detach.pass.cpp b/libcxx/test/std/thread/thread.jthread/detach.pass.cpp index ee48d2691e684..54fd5fd93bed6 100644 --- a/libcxx/test/std/thread/thread.jthread/detach.pass.cpp +++ b/libcxx/test/std/thread/thread.jthread/detach.pass.cpp @@ -23,6 +23,7 @@ #include #include +#include "make_test_thread.h" #include "test_macros.h" int main(int, char**) { @@ -30,10 +31,10 @@ int main(int, char**) { { std::atomic_bool start{false}; std::atomic_bool done{false}; - std::optional jt{[&start, &done] { + std::optional jt = support::make_test_jthread([&start, &done] { start.wait(false); done = true; - }}; + }); // If it blocks, it will deadlock here jt->detach(); @@ -49,7 +50,7 @@ int main(int, char**) { // Postconditions: get_id() == id(). { - std::jthread jt{[] {}}; + std::jthread jt = support::make_test_jthread([] {}); assert(jt.get_id() != std::jthread::id()); jt.detach(); assert(jt.get_id() == std::jthread::id()); diff --git a/libcxx/test/std/thread/thread.jthread/dtor.pass.cpp b/libcxx/test/std/thread/thread.jthread/dtor.pass.cpp index 47ee62023f62d..35be0f6c0dd82 100644 --- a/libcxx/test/std/thread/thread.jthread/dtor.pass.cpp +++ b/libcxx/test/std/thread/thread.jthread/dtor.pass.cpp @@ -20,6 +20,8 @@ #include #include #include + +#include "make_test_thread.h" #include "test_macros.h" int main(int, char**) { @@ -32,8 +34,8 @@ int main(int, char**) { // If joinable() is true, calls request_stop() and then join(). // request_stop is called { - std::optional jt([] {}); - bool called = false; + std::optional jt = support::make_test_jthread([] {}); + bool called = false; std::stop_callback cb(jt->get_stop_token(), [&called] { called = true; }); jt.reset(); assert(called); @@ -48,10 +50,10 @@ int main(int, char**) { constexpr auto numberOfThreads = 10u; jts.reserve(numberOfThreads); for (auto i = 0u; i < numberOfThreads; ++i) { - jts.emplace_back([&calledTimes] { + jts.emplace_back(support::make_test_jthread([&calledTimes] { std::this_thread::sleep_for(std::chrono::milliseconds{2}); calledTimes.fetch_add(1, std::memory_order_relaxed); - }); + })); } jts.clear(); diff --git a/libcxx/test/std/thread/thread.jthread/get_id.pass.cpp b/libcxx/test/std/thread/thread.jthread/get_id.pass.cpp index f92472d3d8dc6..b3a2beff9f416 100644 --- a/libcxx/test/std/thread/thread.jthread/get_id.pass.cpp +++ b/libcxx/test/std/thread/thread.jthread/get_id.pass.cpp @@ -18,6 +18,7 @@ #include #include +#include "make_test_thread.h" #include "test_macros.h" static_assert(noexcept(std::declval().get_id())); @@ -32,7 +33,7 @@ int main(int, char**) { // Represents a thread { - const std::jthread jt{[] {}}; + const std::jthread jt = support::make_test_jthread([] {}); std::same_as decltype(auto) result = jt.get_id(); assert(result != std::jthread::id()); } diff --git a/libcxx/test/std/thread/thread.jthread/get_stop_source.pass.cpp b/libcxx/test/std/thread/thread.jthread/get_stop_source.pass.cpp index 41df2d894f45d..8f35db297b749 100644 --- a/libcxx/test/std/thread/thread.jthread/get_stop_source.pass.cpp +++ b/libcxx/test/std/thread/thread.jthread/get_stop_source.pass.cpp @@ -19,6 +19,7 @@ #include #include +#include "make_test_thread.h" #include "test_macros.h" static_assert(noexcept(std::declval().get_stop_source())); @@ -26,7 +27,7 @@ static_assert(noexcept(std::declval().get_stop_source())); int main(int, char**) { // Represents a thread { - std::jthread jt{[] {}}; + std::jthread jt = support::make_test_jthread([] {}); std::same_as decltype(auto) result = jt.get_stop_source(); assert(result.stop_possible()); } diff --git a/libcxx/test/std/thread/thread.jthread/get_stop_token.pass.cpp b/libcxx/test/std/thread/thread.jthread/get_stop_token.pass.cpp index c65d39b3cdf4a..070761e0a3ab8 100644 --- a/libcxx/test/std/thread/thread.jthread/get_stop_token.pass.cpp +++ b/libcxx/test/std/thread/thread.jthread/get_stop_token.pass.cpp @@ -20,6 +20,7 @@ #include #include +#include "make_test_thread.h" #include "test_macros.h" static_assert(noexcept(std::declval().get_stop_token())); @@ -27,7 +28,7 @@ static_assert(noexcept(std::declval().get_stop_token())); int main(int, char**) { // Represents a thread { - std::jthread jt{[] {}}; + std::jthread jt = support::make_test_jthread([] {}); auto ss = jt.get_stop_source(); std::same_as decltype(auto) st = std::as_const(jt).get_stop_token(); diff --git a/libcxx/test/std/thread/thread.jthread/join.deadlock.pass.cpp b/libcxx/test/std/thread/thread.jthread/join.deadlock.pass.cpp index aa5cdf2783dba..8e2f1e5f5d9d4 100644 --- a/libcxx/test/std/thread/thread.jthread/join.deadlock.pass.cpp +++ b/libcxx/test/std/thread/thread.jthread/join.deadlock.pass.cpp @@ -31,6 +31,7 @@ #include #include +#include "make_test_thread.h" #include "test_macros.h" int main(int, char**) { @@ -40,12 +41,12 @@ int main(int, char**) { std::atomic_bool start = false; std::atomic_bool done = false; - std::jthread jt{[&] { + std::jthread jt = support::make_test_jthread([&] { start.wait(false); f(); done = true; done.notify_all(); - }}; + }); f = [&] { try { diff --git a/libcxx/test/std/thread/thread.jthread/join.pass.cpp b/libcxx/test/std/thread/thread.jthread/join.pass.cpp index 38986bdfed8d7..2bafd86338247 100644 --- a/libcxx/test/std/thread/thread.jthread/join.pass.cpp +++ b/libcxx/test/std/thread/thread.jthread/join.pass.cpp @@ -23,6 +23,7 @@ #include #include +#include "make_test_thread.h" #include "test_macros.h" int main(int, char**) { @@ -33,10 +34,10 @@ int main(int, char**) { constexpr auto numberOfThreads = 10u; jts.reserve(numberOfThreads); for (auto i = 0u; i < numberOfThreads; ++i) { - jts.emplace_back([&] { + jts.emplace_back(support::make_test_jthread([&] { std::this_thread::sleep_for(std::chrono::milliseconds(2)); calledTimes.fetch_add(1, std::memory_order_relaxed); - }); + })); } for (auto i = 0u; i < numberOfThreads; ++i) { @@ -55,15 +56,15 @@ int main(int, char**) { // Synchronization: The completion of the thread represented by *this synchronizes with // ([intro.multithread]) the corresponding successful join() return. { - bool flag = false; - std::jthread jt{[&] { flag = true; }}; + bool flag = false; + std::jthread jt = support::make_test_jthread([&] { flag = true; }); jt.join(); assert(flag); // non atomic write is visible to the current thread } // Postconditions: The thread represented by *this has completed. get_id() == id(). { - std::jthread jt{[] {}}; + std::jthread jt = support::make_test_jthread([] {}); assert(jt.get_id() != std::jthread::id()); jt.join(); assert(jt.get_id() == std::jthread::id()); diff --git a/libcxx/test/std/thread/thread.jthread/joinable.pass.cpp b/libcxx/test/std/thread/thread.jthread/joinable.pass.cpp index 5c0fbece4c21e..3a88100d934db 100644 --- a/libcxx/test/std/thread/thread.jthread/joinable.pass.cpp +++ b/libcxx/test/std/thread/thread.jthread/joinable.pass.cpp @@ -19,6 +19,7 @@ #include #include +#include "make_test_thread.h" #include "test_macros.h" static_assert(noexcept(std::declval().joinable())); @@ -33,7 +34,7 @@ int main(int, char**) { // Non-default constructed { - const std::jthread jt{[] {}}; + const std::jthread jt = support::make_test_jthread([] {}); std::same_as decltype(auto) result = jt.joinable(); assert(result); } @@ -41,8 +42,8 @@ int main(int, char**) { // Non-default constructed // the thread of execution has not finished { - std::atomic_bool done = false; - const std::jthread jt{[&done] { done.wait(false); }}; + std::atomic_bool done = false; + const std::jthread jt = support::make_test_jthread([&done] { done.wait(false); }); std::same_as decltype(auto) result = jt.joinable(); done = true; done.notify_all(); diff --git a/libcxx/test/std/thread/thread.jthread/request_stop.pass.cpp b/libcxx/test/std/thread/thread.jthread/request_stop.pass.cpp index f1109561cf9f2..ccbea9f145e50 100644 --- a/libcxx/test/std/thread/thread.jthread/request_stop.pass.cpp +++ b/libcxx/test/std/thread/thread.jthread/request_stop.pass.cpp @@ -19,6 +19,7 @@ #include #include +#include "make_test_thread.h" #include "test_macros.h" static_assert(noexcept(std::declval().request_stop())); @@ -26,8 +27,8 @@ static_assert(noexcept(std::declval().request_stop())); int main(int, char**) { // Represents a thread { - std::jthread jt{[] {}}; - auto st = jt.get_stop_token(); + std::jthread jt = support::make_test_jthread([] {}); + auto st = jt.get_stop_token(); assert(!st.stop_requested()); std::same_as decltype(auto) result = jt.request_stop(); assert(result); diff --git a/libcxx/test/std/thread/thread.jthread/swap.free.pass.cpp b/libcxx/test/std/thread/thread.jthread/swap.free.pass.cpp index 776537cdff483..01c8ccd659687 100644 --- a/libcxx/test/std/thread/thread.jthread/swap.free.pass.cpp +++ b/libcxx/test/std/thread/thread.jthread/swap.free.pass.cpp @@ -17,6 +17,7 @@ #include #include +#include "make_test_thread.h" #include "test_macros.h" template @@ -30,7 +31,7 @@ int main(int, char**) { // x is default constructed { std::jthread t1; - std::jthread t2{[] {}}; + std::jthread t2 = support::make_test_jthread([] {}); const auto originalId2 = t2.get_id(); swap(t1, t2); @@ -40,7 +41,7 @@ int main(int, char**) { // y is default constructed { - std::jthread t1([] {}); + std::jthread t1 = support::make_test_jthread([] {}); std::jthread t2{}; const auto originalId1 = t1.get_id(); swap(t1, t2); @@ -51,8 +52,8 @@ int main(int, char**) { // both not default constructed { - std::jthread t1([] {}); - std::jthread t2{[] {}}; + std::jthread t1 = support::make_test_jthread([] {}); + std::jthread t2 = support::make_test_jthread([] {}); const auto originalId1 = t1.get_id(); const auto originalId2 = t2.get_id(); swap(t1, t2); diff --git a/libcxx/test/std/thread/thread.jthread/swap.member.pass.cpp b/libcxx/test/std/thread/thread.jthread/swap.member.pass.cpp index 614e3ac8312da..8ae17f435aa31 100644 --- a/libcxx/test/std/thread/thread.jthread/swap.member.pass.cpp +++ b/libcxx/test/std/thread/thread.jthread/swap.member.pass.cpp @@ -17,6 +17,7 @@ #include #include +#include "make_test_thread.h" #include "test_macros.h" template @@ -30,7 +31,7 @@ int main(int, char**) { // this is default constructed { std::jthread t1; - std::jthread t2{[] {}}; + std::jthread t2 = support::make_test_jthread([] {}); const auto originalId2 = t2.get_id(); t1.swap(t2); @@ -40,7 +41,7 @@ int main(int, char**) { // that is default constructed { - std::jthread t1([] {}); + std::jthread t1 = support::make_test_jthread([] {}); std::jthread t2{}; const auto originalId1 = t1.get_id(); t1.swap(t2); @@ -51,8 +52,8 @@ int main(int, char**) { // both not default constructed { - std::jthread t1([] {}); - std::jthread t2{[] {}}; + std::jthread t1 = support::make_test_jthread([] {}); + std::jthread t2 = support::make_test_jthread([] {}); const auto originalId1 = t1.get_id(); const auto originalId2 = t2.get_id(); t1.swap(t2); diff --git a/libcxx/test/support/make_test_thread.h b/libcxx/test/support/make_test_thread.h index eaf967e2180ed..cd548fd909d71 100644 --- a/libcxx/test/support/make_test_thread.h +++ b/libcxx/test/support/make_test_thread.h @@ -12,13 +12,40 @@ #include #include +#include "test_macros.h" + namespace support { +// These functions are used to mock the creation of threads within the test suite. +// +// This provides a vendor-friendly way of making the test suite work even on platforms +// where the standard thread constructors don't work (e.g. embedded environments where +// creating a thread requires additional information like setting attributes). +// +// Vendors can keep a downstream diff in this file to create threads however they +// need on their platform, and the majority of the test suite will work out of the +// box. Of course, tests that exercise the standard thread constructors won't work, +// but any other test that only creates threads as a side effect of testing should +// work if they use the utilities in this file. + template std::thread make_test_thread(F&& f, Args&& ...args) { return std::thread(std::forward(f), std::forward(args)...); } +#if TEST_STD_VER >= 20 && !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_STOP_TOKEN) +# ifdef _LIBCPP_VERSION +# define TEST_AVAILABILITY_SYNC _LIBCPP_AVAILABILITY_SYNC +# else +# define TEST_AVAILABILITY_SYNC +# endif + +template +TEST_AVAILABILITY_SYNC std::jthread make_test_jthread(F&& f, Args&&... args) { + return std::jthread(std::forward(f), std::forward(args)...); +} +#endif + } // end namespace support #endif // TEST_SUPPORT_MAKE_TEST_THREAD_H From ed0a14144ba980ceb29e86c9ca615b785e667dcf Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Fri, 13 Oct 2023 14:14:23 -0400 Subject: [PATCH 093/720] [LLDB] Fix type formatting empty c-strings (#68924) The type formatter code is effectively considering empty strings as read errors, which is wrong. The fix is very simple. We should rely on the error object and stop checking the size. I also added a test. --- lldb/source/DataFormatters/TypeFormat.cpp | 6 +++--- .../builtin-formats/TestBuiltinFormats.py | 16 +++++++++++++++- .../data-formatter/builtin-formats/main.cpp | 2 ++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/lldb/source/DataFormatters/TypeFormat.cpp b/lldb/source/DataFormatters/TypeFormat.cpp index 5ee89fc0d5eb3..126240aeca65e 100644 --- a/lldb/source/DataFormatters/TypeFormat.cpp +++ b/lldb/source/DataFormatters/TypeFormat.cpp @@ -81,9 +81,9 @@ bool TypeFormatImpl_Format::FormatObject(ValueObject *valobj, WritableDataBufferSP buffer_sp( new DataBufferHeap(max_len + 1, 0)); Address address(valobj->GetPointerValue()); - if (target_sp->ReadCStringFromMemory( - address, (char *)buffer_sp->GetBytes(), max_len, error) && - error.Success()) + target_sp->ReadCStringFromMemory( + address, (char *)buffer_sp->GetBytes(), max_len, error); + if (error.Success()) data.SetData(buffer_sp); } } diff --git a/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py b/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py index aa768c158b5b5..4e0f14d039a74 100644 --- a/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py +++ b/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py @@ -3,9 +3,9 @@ """ import lldb +from lldbsuite.test import lldbutil from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * -from lldbsuite.test import lldbutil class TestCase(TestBase): @@ -19,6 +19,20 @@ def getFormatted(self, format, expr): self.assertTrue(result.Succeeded(), result.GetError()) return result.GetOutput() + @no_debug_info_test + @skipIfWindows + def testAllPlatforms(self): + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + # We can dump correctly non char* c-strings with explicit formatting. + self.assertIn(' = ""', self.getFormatted("c-string", "void_empty_cstring")) + self.assertIn(' = ""', self.getFormatted("c-string", "empty_cstring")) + + # TODO: Move as many asserts as possible within this function to `testAllPlatforms`. + # Currently `arm` is being skipped even though many asserts would effectively + # pass. @no_debug_info_test @skipIfWindows # uint128_t not available on arm. diff --git a/lldb/test/API/functionalities/data-formatter/builtin-formats/main.cpp b/lldb/test/API/functionalities/data-formatter/builtin-formats/main.cpp index 58b8116dfa1ec..573c111306c14 100644 --- a/lldb/test/API/functionalities/data-formatter/builtin-formats/main.cpp +++ b/lldb/test/API/functionalities/data-formatter/builtin-formats/main.cpp @@ -1,8 +1,10 @@ #include const char cstring[15] = " \033\a\b\f\n\r\t\vaA09\0"; +const char *empty_cstring = ""; int main() { int use = *cstring; + void *void_empty_cstring = (void *)empty_cstring; return use; // break here } From b49a0dbaebc7f4023d54d7ea0c4719c5740dcebe Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 13 Oct 2023 19:23:53 +0100 Subject: [PATCH 094/720] [AMDGPU] Fix comments about afn and arcp in fast unsafe fdiv handling (#68982) --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 4 ++-- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 4797e5a7a61d4..02cb77f6ecaca 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4641,8 +4641,8 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI, } } - // For f16 require arcp only. - // For f32 require afn+arcp. + // For f16 require afn or arcp. + // For f32 require afn. if (!AllowInaccurateRcp && (ResTy != LLT::scalar(16) || !MI.getFlag(MachineInstr::FmArcp))) return false; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 9bd0f5390b19e..33f65ab786584 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -9577,8 +9577,8 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, } } - // For f16 require arcp only. - // For f32 require afn+arcp. + // For f16 require afn or arcp. + // For f32 require afn. if (!AllowInaccurateRcp && (VT != MVT::f16 || !Flags.hasAllowReciprocal())) return SDValue(); From 844c731f2dda3e01984f79b9e68d5d7566c9824c Mon Sep 17 00:00:00 2001 From: Kirill Stoimenov <87100199+kstoimenov@users.noreply.github.com> Date: Fri, 13 Oct 2023 11:43:29 -0700 Subject: [PATCH 095/720] [HWASAN] Mark built-ins as not built-ins to prevent optimizations (#68936) The other 3 sanitizers (ASAN, TSAN and MSAN) all use maybeMarkSanitizerLibraryCallNoBuiltin to make disable optimizations which inline functions like memcmp for example. The lack of this optimization was allowing ExpandMemCmpPass to convert a memcmp call to inlined assembly and cause a false negative in HWASAN. --- compiler-rt/test/hwasan/TestCases/memcmp.cpp | 4 +-- .../Instrumentation/HWAddressSanitizer.cpp | 12 +++++-- .../HWAddressSanitizer/str-nobuiltin.ll | 33 +++++++++++++++++++ 3 files changed, 44 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Instrumentation/HWAddressSanitizer/str-nobuiltin.ll diff --git a/compiler-rt/test/hwasan/TestCases/memcmp.cpp b/compiler-rt/test/hwasan/TestCases/memcmp.cpp index c6a2b42b54d27..5f8a93f62a44a 100644 --- a/compiler-rt/test/hwasan/TestCases/memcmp.cpp +++ b/compiler-rt/test/hwasan/TestCases/memcmp.cpp @@ -11,8 +11,8 @@ int main(int argc, char **argv) { __hwasan_enable_allocator_tagging(); char a[] = {static_cast(argc), 2, 3, 4}; - volatile int size = sizeof(a); - char *volatile p = (char *)malloc(size); + int size = sizeof(a); + char *p = (char *)malloc(size); memcpy(p, a, size); free(p); return memcmp(p, a, size); diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index fd7c641ccf4b2..e194b96475481 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/StackSafetyAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" @@ -52,6 +53,7 @@ #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/MemoryTaggingSupport.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" @@ -320,7 +322,8 @@ class HWAddressSanitizer { LoopInfo *LI); bool ignoreAccess(Instruction *Inst, Value *Ptr); void getInterestingMemoryOperands( - Instruction *I, SmallVectorImpl &Interesting); + Instruction *I, const TargetLibraryInfo &TLI, + SmallVectorImpl &Interesting); void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size); Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag); @@ -779,7 +782,8 @@ bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) { } void HWAddressSanitizer::getInterestingMemoryOperands( - Instruction *I, SmallVectorImpl &Interesting) { + Instruction *I, const TargetLibraryInfo &TLI, + SmallVectorImpl &Interesting) { // Skip memory accesses inserted by another instrumentation. if (I->hasMetadata(LLVMContext::MD_nosanitize)) return; @@ -817,6 +821,7 @@ void HWAddressSanitizer::getInterestingMemoryOperands( Type *Ty = CI->getParamByValType(ArgNo); Interesting.emplace_back(I, ArgNo, false, Ty, Align(1)); } + maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI); } } @@ -1493,6 +1498,7 @@ void HWAddressSanitizer::sanitizeFunction(Function &F, SmallVector OperandsToInstrument; SmallVector IntrinToInstrument; SmallVector LandingPadVec; + const TargetLibraryInfo &TLI = FAM.getResult(F); memtag::StackInfoBuilder SIB(SSI); for (auto &Inst : instructions(F)) { @@ -1503,7 +1509,7 @@ void HWAddressSanitizer::sanitizeFunction(Function &F, if (InstrumentLandingPads && isa(Inst)) LandingPadVec.push_back(&Inst); - getInterestingMemoryOperands(&Inst, OperandsToInstrument); + getInterestingMemoryOperands(&Inst, TLI, OperandsToInstrument); if (MemIntrinsic *MI = dyn_cast(&Inst)) if (!ignoreMemIntrinsic(MI)) diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/str-nobuiltin.ll b/llvm/test/Instrumentation/HWAddressSanitizer/str-nobuiltin.ll new file mode 100644 index 0000000000000..8faa906027386 --- /dev/null +++ b/llvm/test/Instrumentation/HWAddressSanitizer/str-nobuiltin.ll @@ -0,0 +1,33 @@ +; Test marking string functions as nobuiltin in address sanitizer. +; +; RUN: opt < %s -passes=hwasan -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +declare ptr @memchr(ptr %a, i32 %b, i64 %c) +declare i32 @memcmp(ptr %a, ptr %b, i64 %c) +declare i32 @strcmp(ptr %a, ptr %b) +declare ptr @strcpy(ptr %a, ptr %b) +declare ptr @stpcpy(ptr %a, ptr %b) +declare i64 @strlen(ptr %a) +declare i64 @strnlen(ptr %a, i64 %b) + +; CHECK: call{{.*}}@memchr{{.*}} #[[ATTR:[0-9]+]] +; CHECK: call{{.*}}@memcmp{{.*}} #[[ATTR]] +; CHECK: call{{.*}}@strcmp{{.*}} #[[ATTR]] +; CHECK: call{{.*}}@strcpy{{.*}} #[[ATTR]] +; CHECK: call{{.*}}@stpcpy{{.*}} #[[ATTR]] +; CHECK: call{{.*}}@strlen{{.*}} #[[ATTR]] +; CHECK: call{{.*}}@strnlen{{.*}} #[[ATTR]] +; attributes #[[ATTR]] = { nobuiltin } + +define void @f1(ptr %a, ptr %b) nounwind uwtable sanitize_hwaddress { + tail call ptr @memchr(ptr %a, i32 1, i64 12) + tail call i32 @memcmp(ptr %a, ptr %b, i64 12) + tail call i32 @strcmp(ptr %a, ptr %b) + tail call ptr @strcpy(ptr %a, ptr %b) + tail call ptr @stpcpy(ptr %a, ptr %b) + tail call i64 @strlen(ptr %a) + tail call i64 @strnlen(ptr %a, i64 12) + ret void +} From 36bb134ac79c91129d6ea551953ce67ed776123d Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 13 Oct 2023 11:53:57 -0700 Subject: [PATCH 096/720] [libc++] Use -nostdlib++ on GCC unconditionally (#68832) We support GCC 13, which supports the flag. This allows simplifying the CMake logic around the use of -nostdlib++. Note that there are other places where we don't assume -nostdlib++ yet in the build, but this patch is intentionally trying to be small because this part of our CMake is pretty tricky. --- libcxx/CMakeLists.txt | 32 ++------------------------------ libcxx/benchmarks/CMakeLists.txt | 2 +- libcxx/cmake/config-ix.cmake | 15 --------------- 3 files changed, 3 insertions(+), 46 deletions(-) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index 16540caf68eaf..d03421afde1e7 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -642,18 +642,8 @@ get_sanitizer_flags(SANITIZER_FLAGS "${LLVM_USE_SANITIZER}") # Link system libraries ======================================================= function(cxx_link_system_libraries target) - -# In order to remove just libc++ from the link step -# we need to use -nostdlib++ whenever it is supported. -# Unfortunately this cannot be used universally because for example g++ supports -# only -nodefaultlibs in which case all libraries will be removed and -# all libraries but c++ have to be added in manually. - if (CXX_SUPPORTS_NOSTDLIBXX_FLAG) - target_add_link_flags_if_supported(${target} PRIVATE "-nostdlib++") - else() - target_add_link_flags_if_supported(${target} PRIVATE "-nodefaultlibs") - target_add_compile_flags_if_supported(${target} PRIVATE "/Zl") - target_add_link_flags_if_supported(${target} PRIVATE "/nodefaultlib") + if (NOT MSVC) + target_link_libraries(${target} PRIVATE "-nostdlib++") endif() if (CXX_SUPPORTS_UNWINDLIB_EQ_NONE_FLAG AND LIBCXXABI_USE_LLVM_UNWINDER) @@ -663,24 +653,6 @@ function(cxx_link_system_libraries target) target_add_link_flags_if_supported(${target} PRIVATE "--unwindlib=none") endif() - if (NOT APPLE) # On Apple platforms, we always use -nostdlib++ so we don't need to re-add other libraries - if (LIBCXX_HAS_PTHREAD_LIB) - target_link_libraries(${target} PRIVATE pthread) - endif() - - if (LIBCXX_HAS_C_LIB) - target_link_libraries(${target} PRIVATE c) - endif() - - if (LIBCXX_HAS_M_LIB) - target_link_libraries(${target} PRIVATE m) - endif() - - if (LIBCXX_HAS_RT_LIB) - target_link_libraries(${target} PRIVATE rt) - endif() - endif() - if (LIBCXX_USE_COMPILER_RT) find_compiler_rt_library(builtins LIBCXX_BUILTINS_LIBRARY) if (LIBCXX_BUILTINS_LIBRARY) diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt index 1a4d634500180..80b2663fd8086 100644 --- a/libcxx/benchmarks/CMakeLists.txt +++ b/libcxx/benchmarks/CMakeLists.txt @@ -122,7 +122,7 @@ endif() add_library( cxx-benchmarks-flags-libcxx INTERFACE) target_link_libraries( cxx-benchmarks-flags-libcxx INTERFACE cxx-benchmarks-flags) target_compile_options(cxx-benchmarks-flags-libcxx INTERFACE ${SANITIZER_FLAGS} -Wno-user-defined-literals -Wno-suggest-override) -target_link_options( cxx-benchmarks-flags-libcxx INTERFACE -nodefaultlibs "-L${BENCHMARK_LIBCXX_INSTALL}/lib" "-L${BENCHMARK_LIBCXX_INSTALL}/lib64" ${SANITIZER_FLAGS}) +target_link_options( cxx-benchmarks-flags-libcxx INTERFACE -nostdlib++ "-L${BENCHMARK_LIBCXX_INSTALL}/lib" "-L${BENCHMARK_LIBCXX_INSTALL}/lib64" ${SANITIZER_FLAGS}) set(libcxx_benchmark_targets) diff --git a/libcxx/cmake/config-ix.cmake b/libcxx/cmake/config-ix.cmake index 9962d848d85e8..9fed861a4e193 100644 --- a/libcxx/cmake/config-ix.cmake +++ b/libcxx/cmake/config-ix.cmake @@ -14,14 +14,6 @@ include(CheckCSourceCompiles) # link with --uwnindlib=none. Check if that option works. llvm_check_compiler_linker_flag(C "--unwindlib=none" CXX_SUPPORTS_UNWINDLIB_EQ_NONE_FLAG) -if(WIN32 AND NOT MINGW) - # NOTE(compnerd) this is technically a lie, there is msvcrt, but for now, lets - # let the default linking take care of that. - set(LIBCXX_HAS_C_LIB NO) -else() - check_library_exists(c fopen "" LIBCXX_HAS_C_LIB) -endif() - if (NOT LIBCXX_USE_COMPILER_RT) if(WIN32 AND NOT MINGW) set(LIBCXX_HAS_GCC_S_LIB NO) @@ -54,9 +46,6 @@ else() endif() if (CXX_SUPPORTS_NOSTDLIBXX_FLAG OR C_SUPPORTS_NODEFAULTLIBS_FLAG) - if (LIBCXX_HAS_C_LIB) - list(APPEND CMAKE_REQUIRED_LIBRARIES c) - endif () if (LIBCXX_USE_COMPILER_RT) include(HandleCompilerRT) find_compiler_rt_library(builtins LIBCXX_BUILTINS_LIBRARY @@ -108,22 +97,18 @@ if(WIN32 AND NOT MINGW) # TODO(compnerd) do we want to support an emulation layer that allows for the # use of pthread-win32 or similar libraries to emulate pthreads on Windows? set(LIBCXX_HAS_PTHREAD_LIB NO) - set(LIBCXX_HAS_M_LIB NO) set(LIBCXX_HAS_RT_LIB NO) set(LIBCXX_HAS_ATOMIC_LIB NO) elseif(APPLE) set(LIBCXX_HAS_PTHREAD_LIB NO) - set(LIBCXX_HAS_M_LIB NO) set(LIBCXX_HAS_RT_LIB NO) set(LIBCXX_HAS_ATOMIC_LIB NO) elseif(FUCHSIA) - set(LIBCXX_HAS_M_LIB NO) set(LIBCXX_HAS_PTHREAD_LIB NO) set(LIBCXX_HAS_RT_LIB NO) check_library_exists(atomic __atomic_fetch_add_8 "" LIBCXX_HAS_ATOMIC_LIB) else() check_library_exists(pthread pthread_create "" LIBCXX_HAS_PTHREAD_LIB) - check_library_exists(m ccos "" LIBCXX_HAS_M_LIB) check_library_exists(rt clock_gettime "" LIBCXX_HAS_RT_LIB) check_library_exists(atomic __atomic_fetch_add_8 "" LIBCXX_HAS_ATOMIC_LIB) endif() From 1bc48716957e2856116c310ed963365574d5cfe2 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 13 Oct 2023 11:57:45 -0700 Subject: [PATCH 097/720] [libc++][NFC] Fix typo in filename --- .../deprecated.verify.cpp | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename libcxx/test/std/depr/{depr.numeric.imits.has.denorm => depr.numeric.limits.has.denorm}/deprecated.verify.cpp (100%) diff --git a/libcxx/test/std/depr/depr.numeric.imits.has.denorm/deprecated.verify.cpp b/libcxx/test/std/depr/depr.numeric.limits.has.denorm/deprecated.verify.cpp similarity index 100% rename from libcxx/test/std/depr/depr.numeric.imits.has.denorm/deprecated.verify.cpp rename to libcxx/test/std/depr/depr.numeric.limits.has.denorm/deprecated.verify.cpp From a8896e57f150abf57b4e70ba1f6bfbd4c2d24ff6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Sch=C3=A4pers?= Date: Fri, 13 Oct 2023 21:07:33 +0200 Subject: [PATCH 098/720] [clang-format][NFC] Annotate control statement r_braces (#68621) Annotating switch braces for the first time. Also in preparation of #67906. --- clang/lib/Format/FormatToken.h | 2 ++ clang/lib/Format/UnwrappedLineParser.cpp | 26 ++++++++++------ clang/lib/Format/UnwrappedLineParser.h | 1 + clang/unittests/Format/TokenAnnotatorTest.cpp | 31 +++++++++++++++++++ 4 files changed, 50 insertions(+), 10 deletions(-) diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 527f1d744a580..606e9e790ad83 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -52,6 +52,7 @@ namespace format { TYPE(ConflictStart) \ /* l_brace of if/for/while */ \ TYPE(ControlStatementLBrace) \ + TYPE(ControlStatementRBrace) \ TYPE(CppCastLParen) \ TYPE(CSharpGenericTypeConstraint) \ TYPE(CSharpGenericTypeConstraintColon) \ @@ -67,6 +68,7 @@ namespace format { TYPE(DesignatedInitializerPeriod) \ TYPE(DictLiteral) \ TYPE(ElseLBrace) \ + TYPE(ElseRBrace) \ TYPE(EnumLBrace) \ TYPE(EnumRBrace) \ TYPE(FatArrow) \ diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 3275d7b6a71aa..82a812fc8bcc6 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -640,6 +640,14 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { FormatTok = Tokens->setPosition(StoredPosition); } +// Sets the token type of the directly previous right brace. +void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) { + if (auto Prev = FormatTok->getPreviousNonComment(); + Prev && Prev->is(tok::r_brace)) { + Prev->setFinalizedType(Type); + } +} + template static inline void hash_combine(std::size_t &seed, const T &v) { std::hash hasher; @@ -2756,6 +2764,7 @@ FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind); + setPreviousRBraceType(TT_ControlStatementRBrace); if (Style.BraceWrapping.BeforeElse) addUnwrappedLine(); else @@ -2794,6 +2803,7 @@ FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, FormatToken *IfLBrace = parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind); + setPreviousRBraceType(TT_ElseRBrace); if (FormatTok->is(tok::kw_else)) { KeepElseBraces = KeepElseBraces || ElseBlockKind == IfStmtKind::IfOnly || @@ -3057,12 +3067,12 @@ void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { keepAncestorBraces(); if (isBlockBegin(*FormatTok)) { - if (!KeepBraces) - FormatTok->setFinalizedType(TT_ControlStatementLBrace); + FormatTok->setFinalizedType(TT_ControlStatementLBrace); FormatToken *LeftBrace = FormatTok; CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, /*MunchSemi=*/true, KeepBraces); + setPreviousRBraceType(TT_ControlStatementRBrace); if (!KeepBraces) { assert(!NestedTooDeep.empty()); if (!NestedTooDeep.back()) @@ -3196,7 +3206,9 @@ void UnwrappedLineParser::parseSwitch() { if (FormatTok->is(tok::l_brace)) { CompoundStatementIndenter Indenter(this, Style, Line->Level); + FormatTok->setFinalizedType(TT_ControlStatementLBrace); parseBlock(); + setPreviousRBraceType(TT_ControlStatementRBrace); addUnwrappedLine(); } else { addUnwrappedLine(); @@ -3713,10 +3725,7 @@ bool UnwrappedLineParser::parseEnum() { nextToken(); addUnwrappedLine(); } - if (auto Prev = FormatTok->getPreviousNonComment(); - Prev && Prev->is(tok::r_brace)) { - Prev->setFinalizedType(TT_EnumRBrace); - } + setPreviousRBraceType(TT_EnumRBrace); return true; // There is no addUnwrappedLine() here so that we fall through to parsing a @@ -3950,10 +3959,7 @@ void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); } - if (auto Prev = FormatTok->getPreviousNonComment(); - Prev && Prev->is(tok::r_brace)) { - Prev->setFinalizedType(ClosingBraceType); - } + setPreviousRBraceType(ClosingBraceType); } // There is no addUnwrappedLine() here so that we fall through to parsing a // structural element afterwards. Thus, in "class A {} n, m;", diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index a4f150d195712..c31f25fdd8f83 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -243,6 +243,7 @@ class UnwrappedLineParser { void flushComments(bool NewlineBeforeNext); void pushToken(FormatToken *Tok); void calculateBraceTypes(bool ExpectClassBody = false); + void setPreviousRBraceType(TokenType Type); // Marks a conditional compilation edge (for example, an '#if', '#ifdef', // '#else' or merge conflict marker). If 'Unreachable' is true, assumes diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 2d590f2af05e6..b6d4cf166de02 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -2151,6 +2151,37 @@ TEST_F(TokenAnnotatorTest, UnderstandsAttributes) { EXPECT_TOKEN(Tokens[5], tok::r_paren, TT_AttributeRParen); } +TEST_F(TokenAnnotatorTest, UnderstandsControlStatements) { + auto Tokens = annotate("while (true) {}"); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; + EXPECT_TOKEN(Tokens[4], tok::l_brace, TT_ControlStatementLBrace); + EXPECT_TOKEN(Tokens[5], tok::r_brace, TT_ControlStatementRBrace); + + Tokens = annotate("for (;;) {}"); + ASSERT_EQ(Tokens.size(), 8u) << Tokens; + EXPECT_TOKEN(Tokens[5], tok::l_brace, TT_ControlStatementLBrace); + EXPECT_TOKEN(Tokens[6], tok::r_brace, TT_ControlStatementRBrace); + + Tokens = annotate("do {} while (true);"); + ASSERT_EQ(Tokens.size(), 9u) << Tokens; + EXPECT_TOKEN(Tokens[1], tok::l_brace, TT_ControlStatementLBrace); + EXPECT_TOKEN(Tokens[2], tok::r_brace, TT_ControlStatementRBrace); + + Tokens = annotate("if (true) {} else if (false) {} else {}"); + ASSERT_EQ(Tokens.size(), 17u) << Tokens; + EXPECT_TOKEN(Tokens[4], tok::l_brace, TT_ControlStatementLBrace); + EXPECT_TOKEN(Tokens[5], tok::r_brace, TT_ControlStatementRBrace); + EXPECT_TOKEN(Tokens[11], tok::l_brace, TT_ControlStatementLBrace); + EXPECT_TOKEN(Tokens[12], tok::r_brace, TT_ControlStatementRBrace); + EXPECT_TOKEN(Tokens[14], tok::l_brace, TT_ElseLBrace); + EXPECT_TOKEN(Tokens[15], tok::r_brace, TT_ElseRBrace); + + Tokens = annotate("switch (foo) {}"); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; + EXPECT_TOKEN(Tokens[4], tok::l_brace, TT_ControlStatementLBrace); + EXPECT_TOKEN(Tokens[5], tok::r_brace, TT_ControlStatementRBrace); +} + } // namespace } // namespace format } // namespace clang From 0dfcfb53d7bba22b3a5d36853837d5889b32a744 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Fri, 13 Oct 2023 20:07:41 +0100 Subject: [PATCH 099/720] [lldb][DataFormatter] VectorType: fix format for arrays with size not a power-of-2 (#68907) To get the number of children for a VectorType (i.e., a type declared with a `vector_size`/`ext_vector_type` attribute) LLDB previously did following calculation: 1. Get byte-size of the vector container from Clang (`getTypeInfo`). 2. Get byte-size of the element type we want to interpret the array as. (e.g., sometimes we want to interpret an `unsigned char vec[16]` as a `float32[]`). 3. `numChildren = containerSize / reinterpretedElementSize` However, for step 1, clang will return us the *aligned* container byte-size. So for a type such as `float __attribute__((ext_vector_type(3)))` (which is an array of 3 4-byte floats), clang will round up the byte-width of the array to `16`. (see [here](https://github.com/llvm/llvm-project/blob/ab6a66dbec61654d0962f6abf6d6c5b776937584/clang/lib/AST/ASTContext.cpp#L1987-L1992)) This means that for vectors where the size isn't a power-of-2, LLDB will miscalculate the number of elements. **Solution** This patch changes step 1 such that we calculate the container size as `numElementsInSource * byteSizeOfElement`. --- lldb/source/DataFormatters/VectorType.cpp | 65 ++++++++++++++----- .../vector-types/TestVectorTypesFormatting.py | 7 ++ .../data-formatter/vector-types/main.cpp | 4 +- 3 files changed, 58 insertions(+), 18 deletions(-) diff --git a/lldb/source/DataFormatters/VectorType.cpp b/lldb/source/DataFormatters/VectorType.cpp index 4afcfa2e8e490..57dae0b2c71f0 100644 --- a/lldb/source/DataFormatters/VectorType.cpp +++ b/lldb/source/DataFormatters/VectorType.cpp @@ -169,21 +169,49 @@ static lldb::Format GetItemFormatForFormat(lldb::Format format, } } -static size_t CalculateNumChildren( - CompilerType container_type, CompilerType element_type, - lldb_private::ExecutionContextScope *exe_scope = - nullptr // does not matter here because all we trade in are basic types - ) { - std::optional container_size = - container_type.GetByteSize(exe_scope); - std::optional element_size = element_type.GetByteSize(exe_scope); - - if (container_size && element_size && *element_size) { - if (*container_size % *element_size) - return 0; - return *container_size / *element_size; - } - return 0; +/// Calculates the number of elements stored in a container (with +/// element type 'container_elem_type') as if it had elements of type +/// 'element_type'. +/// +/// For example, a container of type +/// `uint8_t __attribute__((vector_size(16)))` has 16 elements. +/// But calling `CalculateNumChildren` with an 'element_type' +/// of `float` (4-bytes) will return `4` because we are interpreting +/// the byte-array as a `float32[]`. +/// +/// \param[in] container_elem_type The type of the elements stored +/// in the container we are calculating the children of. +/// +/// \param[in] num_elements Number of 'container_elem_type's our +/// container stores. +/// +/// \param[in] element_type The type of elements we interpret +/// container_type to contain for the purposes of calculating +/// the number of children. +/// +/// \returns The number of elements stored in a container of +/// type 'element_type'. Returns a std::nullopt if the +/// size of the container is not a multiple of 'element_type' +/// or if an error occurs. +static std::optional +CalculateNumChildren(CompilerType container_elem_type, uint64_t num_elements, + CompilerType element_type) { + std::optional container_elem_size = + container_elem_type.GetByteSize(/* exe_scope */ nullptr); + if (!container_elem_size) + return {}; + + auto container_size = *container_elem_size * num_elements; + + std::optional element_size = + element_type.GetByteSize(/* exe_scope */ nullptr); + if (!element_size || !*element_size) + return {}; + + if (container_size % *element_size) + return {}; + + return container_size / *element_size; } namespace lldb_private { @@ -221,11 +249,14 @@ class VectorTypeSyntheticFrontEnd : public SyntheticChildrenFrontEnd { m_parent_format = m_backend.GetFormat(); CompilerType parent_type(m_backend.GetCompilerType()); CompilerType element_type; - parent_type.IsVectorType(&element_type); + uint64_t num_elements; + parent_type.IsVectorType(&element_type, &num_elements); m_child_type = ::GetCompilerTypeForFormat( m_parent_format, element_type, parent_type.GetTypeSystem().GetSharedPointer()); - m_num_children = ::CalculateNumChildren(parent_type, m_child_type); + m_num_children = + ::CalculateNumChildren(element_type, num_elements, m_child_type) + .value_or(0); m_item_format = GetItemFormatForFormat(m_parent_format, m_child_type); return false; } diff --git a/lldb/test/API/functionalities/data-formatter/vector-types/TestVectorTypesFormatting.py b/lldb/test/API/functionalities/data-formatter/vector-types/TestVectorTypesFormatting.py index 4103d62878c70..1839c28aeb29f 100644 --- a/lldb/test/API/functionalities/data-formatter/vector-types/TestVectorTypesFormatting.py +++ b/lldb/test/API/functionalities/data-formatter/vector-types/TestVectorTypesFormatting.py @@ -86,3 +86,10 @@ def cleanup(): v.SetFormat(lldb.eFormatVectorOfFloat32) oldValueAgain = v.GetChildAtIndex(0).GetValue() self.assertEqual(oldValue, oldValueAgain, "same format but different values") + + # Test formatter for vector types whose size is not a power-of-2 + f3 = self.frame().FindVariable("f3") + self.assertEqual(f3.GetNumChildren(), 3) + self.assertEqual(f3.GetChildAtIndex(0).GetData().float[0], 1.25) + self.assertEqual(f3.GetChildAtIndex(1).GetData().float[0], 2.50) + self.assertEqual(f3.GetChildAtIndex(2).GetData().float[0], 2.50) diff --git a/lldb/test/API/functionalities/data-formatter/vector-types/main.cpp b/lldb/test/API/functionalities/data-formatter/vector-types/main.cpp index ef0a227560bc2..7f2309e776bc2 100644 --- a/lldb/test/API/functionalities/data-formatter/vector-types/main.cpp +++ b/lldb/test/API/functionalities/data-formatter/vector-types/main.cpp @@ -1,8 +1,10 @@ typedef float float4 __attribute__((ext_vector_type(4))); -typedef unsigned char vec __attribute__((ext_vector_type(16))); +typedef unsigned char vec __attribute__((ext_vector_type(16))); +typedef float float3 __attribute__((ext_vector_type(3))); int main() { float4 f4 = {1.25, 1.25, 2.50, 2.50}; vec v = (vec)f4; + float3 f3 = f4.gba; return 0; // break here } From b1115f8ccefb380824a9d997622cc84fc0d84a89 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 13 Oct 2023 20:08:30 +0100 Subject: [PATCH 100/720] [LV] Use LatchVPBB directly instead of going through region (NFC). Split off from D158333. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 88f064b6d57ce..2ca7e75f97f0f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8918,8 +8918,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { // --------------------------------------------------------------------------- // Adjust the recipes for any inloop reductions. - adjustRecipesForReductions(cast(TopRegion->getExiting()), Plan, - RecipeBuilder, Range.Start); + adjustRecipesForReductions(LatchVPBB, Plan, RecipeBuilder, Range.Start); // Interleave memory: for each Interleave Group we marked earlier as relevant // for this VPlan, replace the Recipes widening its memory instructions with a From 6dbc6dfe79b33a3bb18cb9fff16d3392597707b8 Mon Sep 17 00:00:00 2001 From: Han-Chung Wang Date: Fri, 13 Oct 2023 12:16:29 -0700 Subject: [PATCH 101/720] Reland "[mlir][arith] Canonicalization patterns for `arith.select` (#67809)" (#68941) This cherry-picks the changes in llvm-project/5bf701a6687a46fd898621f5077959ff202d716b and extends the pattern to handle vector types. To reuse `getBoolAttribute` method, it moves the static method above the include of generated file. --- .../Dialect/Arith/IR/ArithCanonicalization.td | 49 +++++++++ mlir/lib/Dialect/Arith/IR/ArithOps.cpp | 22 ++-- mlir/test/Dialect/Arith/canonicalize.mlir | 100 ++++++++++++++++++ 3 files changed, 161 insertions(+), 10 deletions(-) diff --git a/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td b/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td index f3d84d0b261e8..ef951647ccd14 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td +++ b/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td @@ -233,6 +233,55 @@ def CmpIExtUI : CPred<"$0.getValue() == arith::CmpIPredicate::eq || " "$0.getValue() == arith::CmpIPredicate::ne">> $pred)]>; +//===----------------------------------------------------------------------===// +// SelectOp +//===----------------------------------------------------------------------===// + +def GetScalarOrVectorTrueAttribute : + NativeCodeCall<"cast(getBoolAttribute($0.getType(), true))">; + +// select(not(pred), a, b) => select(pred, b, a) +def SelectNotCond : + Pat<(SelectOp (Arith_XOrIOp $pred, (ConstantLikeMatcher APIntAttr:$ones)), $a, $b), + (SelectOp $pred, $b, $a), + [(IsScalarOrSplatNegativeOne $ones)]>; + +// select(pred, select(pred, a, b), c) => select(pred, a, c) +def RedundantSelectTrue : + Pat<(SelectOp $pred, (SelectOp $pred, $a, $b), $c), + (SelectOp $pred, $a, $c)>; + +// select(pred, a, select(pred, b, c)) => select(pred, a, c) +def RedundantSelectFalse : + Pat<(SelectOp $pred, $a, (SelectOp $pred, $b, $c)), + (SelectOp $pred, $a, $c)>; + +// select(predA, select(predB, x, y), y) => select(and(predA, predB), x, y) +def SelectAndCond : + Pat<(SelectOp $predA, (SelectOp $predB, $x, $y), $y), + (SelectOp (Arith_AndIOp $predA, $predB), $x, $y)>; + +// select(predA, select(predB, y, x), y) => select(and(predA, not(predB)), x, y) +def SelectAndNotCond : + Pat<(SelectOp $predA, (SelectOp $predB, $y, $x), $y), + (SelectOp (Arith_AndIOp $predA, + (Arith_XOrIOp $predB, + (Arith_ConstantOp (GetScalarOrVectorTrueAttribute $predB)))), + $x, $y)>; + +// select(predA, x, select(predB, x, y)) => select(or(predA, predB), x, y) +def SelectOrCond : + Pat<(SelectOp $predA, $x, (SelectOp $predB, $x, $y)), + (SelectOp (Arith_OrIOp $predA, $predB), $x, $y)>; + +// select(predA, x, select(predB, y, x)) => select(or(predA, not(predB)), x, y) +def SelectOrNotCond : + Pat<(SelectOp $predA, $x, (SelectOp $predB, $y, $x)), + (SelectOp (Arith_OrIOp $predA, + (Arith_XOrIOp $predB, + (Arith_ConstantOp (GetScalarOrVectorTrueAttribute $predB)))), + $x, $y)>; + //===----------------------------------------------------------------------===// // IndexCastOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp index 3892e8fa0a32f..1002719f0b89f 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp +++ b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp @@ -113,6 +113,14 @@ static FailureOr getIntOrSplatIntValue(Attribute attr) { return failure(); } +static Attribute getBoolAttribute(Type type, bool value) { + auto boolAttr = BoolAttr::get(type.getContext(), value); + ShapedType shapedType = llvm::dyn_cast_or_null(type); + if (!shapedType) + return boolAttr; + return DenseElementsAttr::get(shapedType, boolAttr); +} + //===----------------------------------------------------------------------===// // TableGen'd canonicalization patterns //===----------------------------------------------------------------------===// @@ -1696,14 +1704,6 @@ static bool applyCmpPredicateToEqualOperands(arith::CmpIPredicate predicate) { llvm_unreachable("unknown cmpi predicate kind"); } -static Attribute getBoolAttribute(Type type, MLIRContext *ctx, bool value) { - auto boolAttr = BoolAttr::get(ctx, value); - ShapedType shapedType = llvm::dyn_cast_or_null(type); - if (!shapedType) - return boolAttr; - return DenseElementsAttr::get(shapedType, boolAttr); -} - static std::optional getIntegerWidth(Type t) { if (auto intType = llvm::dyn_cast(t)) { return intType.getWidth(); @@ -1718,7 +1718,7 @@ OpFoldResult arith::CmpIOp::fold(FoldAdaptor adaptor) { // cmpi(pred, x, x) if (getLhs() == getRhs()) { auto val = applyCmpPredicateToEqualOperands(getPredicate()); - return getBoolAttribute(getType(), getContext(), val); + return getBoolAttribute(getType(), val); } if (matchPattern(adaptor.getRhs(), m_Zero())) { @@ -2212,7 +2212,9 @@ struct SelectToExtUI : public OpRewritePattern { void arith::SelectOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results.add(context); + results.add(context); } OpFoldResult arith::SelectOp::fold(FoldAdaptor adaptor) { diff --git a/mlir/test/Dialect/Arith/canonicalize.mlir b/mlir/test/Dialect/Arith/canonicalize.mlir index 5e4476a21df04..10050d87d7568 100644 --- a/mlir/test/Dialect/Arith/canonicalize.mlir +++ b/mlir/test/Dialect/Arith/canonicalize.mlir @@ -128,6 +128,106 @@ func.func @selToArith(%arg0: i1, %arg1 : i1, %arg2 : i1) -> i1 { return %res : i1 } +// CHECK-LABEL: @redundantSelectTrue +// CHECK-NEXT: %[[res:.+]] = arith.select %arg0, %arg1, %arg3 +// CHECK-NEXT: return %[[res]] +func.func @redundantSelectTrue(%arg0: i1, %arg1 : i32, %arg2 : i32, %arg3 : i32) -> i32 { + %0 = arith.select %arg0, %arg1, %arg2 : i32 + %res = arith.select %arg0, %0, %arg3 : i32 + return %res : i32 +} + +// CHECK-LABEL: @redundantSelectFalse +// CHECK-NEXT: %[[res:.+]] = arith.select %arg0, %arg3, %arg2 +// CHECK-NEXT: return %[[res]] +func.func @redundantSelectFalse(%arg0: i1, %arg1 : i32, %arg2 : i32, %arg3 : i32) -> i32 { + %0 = arith.select %arg0, %arg1, %arg2 : i32 + %res = arith.select %arg0, %arg3, %0 : i32 + return %res : i32 +} + +// CHECK-LABEL: @selNotCond +// CHECK-NEXT: %[[res1:.+]] = arith.select %arg0, %arg2, %arg1 +// CHECK-NEXT: %[[res2:.+]] = arith.select %arg0, %arg4, %arg3 +// CHECK-NEXT: return %[[res1]], %[[res2]] +func.func @selNotCond(%arg0: i1, %arg1 : i32, %arg2 : i32, %arg3 : i32, %arg4 : i32) -> (i32, i32) { + %one = arith.constant 1 : i1 + %cond1 = arith.xori %arg0, %one : i1 + %cond2 = arith.xori %one, %arg0 : i1 + + %res1 = arith.select %cond1, %arg1, %arg2 : i32 + %res2 = arith.select %cond2, %arg3, %arg4 : i32 + return %res1, %res2 : i32, i32 +} + +// CHECK-LABEL: @selAndCond +// CHECK-NEXT: %[[and:.+]] = arith.andi %arg1, %arg0 +// CHECK-NEXT: %[[res:.+]] = arith.select %[[and]], %arg2, %arg3 +// CHECK-NEXT: return %[[res]] +func.func @selAndCond(%arg0: i1, %arg1: i1, %arg2 : i32, %arg3 : i32) -> i32 { + %sel = arith.select %arg0, %arg2, %arg3 : i32 + %res = arith.select %arg1, %sel, %arg3 : i32 + return %res : i32 +} + +// CHECK-LABEL: @selAndNotCond +// CHECK-NEXT: %[[one:.+]] = arith.constant true +// CHECK-NEXT: %[[not:.+]] = arith.xori %arg0, %[[one]] +// CHECK-NEXT: %[[and:.+]] = arith.andi %arg1, %[[not]] +// CHECK-NEXT: %[[res:.+]] = arith.select %[[and]], %arg3, %arg2 +// CHECK-NEXT: return %[[res]] +func.func @selAndNotCond(%arg0: i1, %arg1: i1, %arg2 : i32, %arg3 : i32) -> i32 { + %sel = arith.select %arg0, %arg2, %arg3 : i32 + %res = arith.select %arg1, %sel, %arg2 : i32 + return %res : i32 +} + +// CHECK-LABEL: @selAndNotCondVec +// CHECK-NEXT: %[[one:.+]] = arith.constant dense : vector<4xi1> +// CHECK-NEXT: %[[not:.+]] = arith.xori %arg0, %[[one]] +// CHECK-NEXT: %[[and:.+]] = arith.andi %arg1, %[[not]] +// CHECK-NEXT: %[[res:.+]] = arith.select %[[and]], %arg3, %arg2 +// CHECK-NEXT: return %[[res]] +func.func @selAndNotCondVec(%arg0: vector<4xi1>, %arg1: vector<4xi1>, %arg2 : vector<4xi32>, %arg3 : vector<4xi32>) -> vector<4xi32> { + %sel = arith.select %arg0, %arg2, %arg3 : vector<4xi1>, vector<4xi32> + %res = arith.select %arg1, %sel, %arg2 : vector<4xi1>, vector<4xi32> + return %res : vector<4xi32> +} + +// CHECK-LABEL: @selOrCond +// CHECK-NEXT: %[[or:.+]] = arith.ori %arg1, %arg0 +// CHECK-NEXT: %[[res:.+]] = arith.select %[[or]], %arg2, %arg3 +// CHECK-NEXT: return %[[res]] +func.func @selOrCond(%arg0: i1, %arg1: i1, %arg2 : i32, %arg3 : i32) -> i32 { + %sel = arith.select %arg0, %arg2, %arg3 : i32 + %res = arith.select %arg1, %arg2, %sel : i32 + return %res : i32 +} + +// CHECK-LABEL: @selOrNotCond +// CHECK-NEXT: %[[one:.+]] = arith.constant true +// CHECK-NEXT: %[[not:.+]] = arith.xori %arg0, %[[one]] +// CHECK-NEXT: %[[or:.+]] = arith.ori %arg1, %[[not]] +// CHECK-NEXT: %[[res:.+]] = arith.select %[[or]], %arg3, %arg2 +// CHECK-NEXT: return %[[res]] +func.func @selOrNotCond(%arg0: i1, %arg1: i1, %arg2 : i32, %arg3 : i32) -> i32 { + %sel = arith.select %arg0, %arg2, %arg3 : i32 + %res = arith.select %arg1, %arg3, %sel : i32 + return %res : i32 +} + +// CHECK-LABEL: @selOrNotCondVec +// CHECK-NEXT: %[[one:.+]] = arith.constant dense : vector<4xi1> +// CHECK-NEXT: %[[not:.+]] = arith.xori %arg0, %[[one]] +// CHECK-NEXT: %[[or:.+]] = arith.ori %arg1, %[[not]] +// CHECK-NEXT: %[[res:.+]] = arith.select %[[or]], %arg3, %arg2 +// CHECK-NEXT: return %[[res]] +func.func @selOrNotCondVec(%arg0: vector<4xi1>, %arg1: vector<4xi1>, %arg2 : vector<4xi32>, %arg3 : vector<4xi32>) -> vector<4xi32> { + %sel = arith.select %arg0, %arg2, %arg3 : vector<4xi1>, vector<4xi32> + %res = arith.select %arg1, %arg3, %sel : vector<4xi1>, vector<4xi32> + return %res : vector<4xi32> +} + // Test case: Folding of comparisons with equal operands. // CHECK-LABEL: @cmpi_equal_operands // CHECK-DAG: %[[T:.*]] = arith.constant true From 99d92d18e334d776db4bca7cc45d015e2d14cfe0 Mon Sep 17 00:00:00 2001 From: Christopher Ferris Date: Fri, 13 Oct 2023 12:19:11 -0700 Subject: [PATCH 102/720] [scudo] Add specific die functions for linux specific failures. (#68650) While running into failures on unmap calls, it becomes difficult to figure out what's wrong. Break the dieOnMapUnmapError into specific versions for map, unmap, and then one for mprotect. Also, put these in a common linux space so that all linux derived code can reuse this code. --- .../lib/scudo/standalone/CMakeLists.txt | 2 + compiler-rt/lib/scudo/standalone/common.cpp | 14 ----- compiler-rt/lib/scudo/standalone/common.h | 4 -- compiler-rt/lib/scudo/standalone/linux.cpp | 7 ++- .../lib/scudo/standalone/mem_map_linux.cpp | 11 ++-- compiler-rt/lib/scudo/standalone/report.cpp | 13 +++-- compiler-rt/lib/scudo/standalone/report.h | 5 +- .../lib/scudo/standalone/report_linux.cpp | 58 +++++++++++++++++++ .../lib/scudo/standalone/report_linux.h | 34 +++++++++++ compiler-rt/lib/scudo/standalone/trusty.cpp | 5 +- 10 files changed, 119 insertions(+), 34 deletions(-) create mode 100644 compiler-rt/lib/scudo/standalone/report_linux.cpp create mode 100644 compiler-rt/lib/scudo/standalone/report_linux.h diff --git a/compiler-rt/lib/scudo/standalone/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/CMakeLists.txt index c4d3ea1e4f05b..ba699f6a67c67 100644 --- a/compiler-rt/lib/scudo/standalone/CMakeLists.txt +++ b/compiler-rt/lib/scudo/standalone/CMakeLists.txt @@ -84,6 +84,7 @@ set(SCUDO_HEADERS quarantine.h release.h report.h + report_linux.h secondary.h size_class_map.h stack_depot.h @@ -113,6 +114,7 @@ set(SCUDO_SOURCES mem_map_linux.cpp release.cpp report.cpp + report_linux.cpp string_utils.cpp timing.cpp ) diff --git a/compiler-rt/lib/scudo/standalone/common.cpp b/compiler-rt/lib/scudo/standalone/common.cpp index 666f95400c7e7..06e930638f6f9 100644 --- a/compiler-rt/lib/scudo/standalone/common.cpp +++ b/compiler-rt/lib/scudo/standalone/common.cpp @@ -21,18 +21,4 @@ uptr getPageSizeSlow() { return PageSizeCached; } -// Fatal internal map() or unmap() error (potentially OOM related). -void NORETURN dieOnMapUnmapError(uptr SizeIfOOM) { - char Error[128] = "Scudo ERROR: internal map or unmap failure\n"; - if (SizeIfOOM) { - formatString( - Error, sizeof(Error), - "Scudo ERROR: internal map failure (NO MEMORY) requesting %zuKB\n", - SizeIfOOM >> 10); - } - outputRaw(Error); - setAbortMessage(Error); - die(); -} - } // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/common.h b/compiler-rt/lib/scudo/standalone/common.h index d0f429cfcb7a0..3581c946d1608 100644 --- a/compiler-rt/lib/scudo/standalone/common.h +++ b/compiler-rt/lib/scudo/standalone/common.h @@ -175,10 +175,6 @@ void setMemoryPermission(uptr Addr, uptr Size, uptr Flags, void releasePagesToOS(uptr BaseAddress, uptr Offset, uptr Size, MapPlatformData *Data = nullptr); -// Internal map & unmap fatal error. This must not call map(). SizeIfOOM shall -// hold the requested size on an out-of-memory error, 0 otherwise. -void NORETURN dieOnMapUnmapError(uptr SizeIfOOM = 0); - // Logging related functions. void setAbortMessage(const char *Message); diff --git a/compiler-rt/lib/scudo/standalone/linux.cpp b/compiler-rt/lib/scudo/standalone/linux.cpp index c31c3d2483a97..2746951081098 100644 --- a/compiler-rt/lib/scudo/standalone/linux.cpp +++ b/compiler-rt/lib/scudo/standalone/linux.cpp @@ -14,6 +14,7 @@ #include "internal_defs.h" #include "linux.h" #include "mutex.h" +#include "report_linux.h" #include "string_utils.h" #include @@ -66,7 +67,7 @@ void *map(void *Addr, uptr Size, UNUSED const char *Name, uptr Flags, void *P = mmap(Addr, Size, MmapProt, MmapFlags, -1, 0); if (P == MAP_FAILED) { if (!(Flags & MAP_ALLOWNOMEM) || errno != ENOMEM) - dieOnMapUnmapError(errno == ENOMEM ? Size : 0); + reportMapError(errno == ENOMEM ? Size : 0); return nullptr; } #if SCUDO_ANDROID @@ -80,7 +81,7 @@ void *map(void *Addr, uptr Size, UNUSED const char *Name, uptr Flags, void unmap(void *Addr, uptr Size, UNUSED uptr Flags, UNUSED MapPlatformData *Data) { if (munmap(Addr, Size) != 0) - dieOnMapUnmapError(); + reportUnmapError(reinterpret_cast(Addr), Size); } // TODO: Will be deprecated. Use the interfaces in MemMapLinux instead. @@ -88,7 +89,7 @@ void setMemoryPermission(uptr Addr, uptr Size, uptr Flags, UNUSED MapPlatformData *Data) { int Prot = (Flags & MAP_NOACCESS) ? PROT_NONE : (PROT_READ | PROT_WRITE); if (mprotect(reinterpret_cast(Addr), Size, Prot) != 0) - dieOnMapUnmapError(); + reportProtectError(Addr, Size, Prot); } // TODO: Will be deprecated. Use the interfaces in MemMapLinux instead. diff --git a/compiler-rt/lib/scudo/standalone/mem_map_linux.cpp b/compiler-rt/lib/scudo/standalone/mem_map_linux.cpp index f377d105894db..783c4f0d9ab0f 100644 --- a/compiler-rt/lib/scudo/standalone/mem_map_linux.cpp +++ b/compiler-rt/lib/scudo/standalone/mem_map_linux.cpp @@ -16,6 +16,7 @@ #include "internal_defs.h" #include "linux.h" #include "mutex.h" +#include "report_linux.h" #include "string_utils.h" #include @@ -64,7 +65,7 @@ static void *mmapWrapper(uptr Addr, uptr Size, const char *Name, uptr Flags) { mmap(reinterpret_cast(Addr), Size, MmapProt, MmapFlags, -1, 0); if (P == MAP_FAILED) { if (!(Flags & MAP_ALLOWNOMEM) || errno != ENOMEM) - dieOnMapUnmapError(errno == ENOMEM ? Size : 0); + reportMapError(errno == ENOMEM ? Size : 0); return nullptr; } #if SCUDO_ANDROID @@ -101,21 +102,21 @@ void MemMapLinux::unmapImpl(uptr Addr, uptr Size) { } if (munmap(reinterpret_cast(Addr), Size) != 0) - dieOnMapUnmapError(); + reportUnmapError(Addr, Size); } bool MemMapLinux::remapImpl(uptr Addr, uptr Size, const char *Name, uptr Flags) { void *P = mmapWrapper(Addr, Size, Name, Flags); if (reinterpret_cast(P) != Addr) - dieOnMapUnmapError(); + reportMapError(); return true; } void MemMapLinux::setMemoryPermissionImpl(uptr Addr, uptr Size, uptr Flags) { int Prot = (Flags & MAP_NOACCESS) ? PROT_NONE : (PROT_READ | PROT_WRITE); if (mprotect(reinterpret_cast(Addr), Size, Prot) != 0) - dieOnMapUnmapError(); + reportProtectError(Addr, Size, Prot); } void MemMapLinux::releaseAndZeroPagesToOSImpl(uptr From, uptr Size) { @@ -139,7 +140,7 @@ bool ReservedMemoryLinux::createImpl(uptr Addr, uptr Size, const char *Name, void ReservedMemoryLinux::releaseImpl() { if (munmap(reinterpret_cast(getBase()), getCapacity()) != 0) - dieOnMapUnmapError(); + reportUnmapError(getBase(), getCapacity()); } ReservedMemoryLinux::MemMapT ReservedMemoryLinux::dispatchImpl(uptr Addr, diff --git a/compiler-rt/lib/scudo/standalone/report.cpp b/compiler-rt/lib/scudo/standalone/report.cpp index c033949a85f4b..9cef0adc0bb31 100644 --- a/compiler-rt/lib/scudo/standalone/report.cpp +++ b/compiler-rt/lib/scudo/standalone/report.cpp @@ -24,11 +24,7 @@ class ScopedErrorReport { Message.vappend(Format, Args); va_end(Args); } - NORETURN ~ScopedErrorReport() { - outputRaw(Message.data()); - setAbortMessage(Message.data()); - die(); - } + NORETURN ~ScopedErrorReport() { reportRawError(Message.data()); } private: ScopedString Message; @@ -55,6 +51,13 @@ void NORETURN reportError(const char *Message) { Report.append("%s\n", Message); } +// Generic fatal error message without ScopedString. +void NORETURN reportRawError(const char *Message) { + outputRaw(Message); + setAbortMessage(Message); + die(); +} + void NORETURN reportInvalidFlag(const char *FlagType, const char *Value) { ScopedErrorReport Report; Report.append("invalid value for %s option: '%s'\n", FlagType, Value); diff --git a/compiler-rt/lib/scudo/standalone/report.h b/compiler-rt/lib/scudo/standalone/report.h index d8c2dea994c16..a510fdaebb6de 100644 --- a/compiler-rt/lib/scudo/standalone/report.h +++ b/compiler-rt/lib/scudo/standalone/report.h @@ -15,9 +15,12 @@ namespace scudo { // Reports are *fatal* unless stated otherwise. -// Generic error. +// Generic error, adds newline to end of message. void NORETURN reportError(const char *Message); +// Generic error, but the message is not modified. +void NORETURN reportRawError(const char *Message); + // Flags related errors. void NORETURN reportInvalidFlag(const char *FlagType, const char *Value); diff --git a/compiler-rt/lib/scudo/standalone/report_linux.cpp b/compiler-rt/lib/scudo/standalone/report_linux.cpp new file mode 100644 index 0000000000000..6a983036e6cd9 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/report_linux.cpp @@ -0,0 +1,58 @@ +//===-- report_linux.cpp ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "platform.h" + +#if SCUDO_LINUX || SCUDO_TRUSTY + +#include "common.h" +#include "internal_defs.h" +#include "report.h" +#include "report_linux.h" +#include "string_utils.h" + +#include +#include +#include + +namespace scudo { + +// Fatal internal map() error (potentially OOM related). +void NORETURN reportMapError(uptr SizeIfOOM) { + char Error[128] = "Scudo ERROR: internal map failure\n"; + if (SizeIfOOM) { + formatString( + Error, sizeof(Error), + "Scudo ERROR: internal map failure (NO MEMORY) requesting %zuKB\n", + SizeIfOOM >> 10); + } + reportRawError(Error); +} + +void NORETURN reportUnmapError(uptr Addr, uptr Size) { + char Error[128]; + formatString(Error, sizeof(Error), + "Scudo ERROR: internal unmap failure (error desc=%s) Addr 0x%zx " + "Size %zu\n", + strerror(errno), Addr, Size); + reportRawError(Error); +} + +void NORETURN reportProtectError(uptr Addr, uptr Size, int Prot) { + char Error[128]; + formatString( + Error, sizeof(Error), + "Scudo ERROR: internal protect failure (error desc=%s) Addr 0x%zx " + "Size %zu Prot %x\n", + strerror(errno), Addr, Size, Prot); + reportRawError(Error); +} + +} // namespace scudo + +#endif // SCUDO_LINUX || SCUDO_TRUSTY diff --git a/compiler-rt/lib/scudo/standalone/report_linux.h b/compiler-rt/lib/scudo/standalone/report_linux.h new file mode 100644 index 0000000000000..aa0bb247e6723 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/report_linux.h @@ -0,0 +1,34 @@ +//===-- report_linux.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_REPORT_LINUX_H_ +#define SCUDO_REPORT_LINUX_H_ + +#include "platform.h" + +#if SCUDO_LINUX || SCUDO_TRUSTY + +#include "internal_defs.h" + +namespace scudo { + +// Report a fatal error when a map call fails. SizeIfOOM shall +// hold the requested size on an out-of-memory error, 0 otherwise. +void NORETURN reportMapError(uptr SizeIfOOM = 0); + +// Report a fatal error when an unmap call fails. +void NORETURN reportUnmapError(uptr Addr, uptr Size); + +// Report a fatal error when a mprotect call fails. +void NORETURN reportProtectError(uptr Addr, uptr Size, int Prot); + +} // namespace scudo + +#endif // SCUDO_LINUX || SCUDO_TRUSTY + +#endif // SCUDO_REPORT_LINUX_H_ diff --git a/compiler-rt/lib/scudo/standalone/trusty.cpp b/compiler-rt/lib/scudo/standalone/trusty.cpp index 5f72b1cb3e54b..26bc8e551ce45 100644 --- a/compiler-rt/lib/scudo/standalone/trusty.cpp +++ b/compiler-rt/lib/scudo/standalone/trusty.cpp @@ -12,6 +12,7 @@ #include "common.h" #include "mutex.h" +#include "report_linux.h" #include "trusty.h" #include // for errno @@ -51,7 +52,7 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, if (IS_ERR(P)) { errno = lk_err_to_errno(PTR_ERR(P)); if (!(Flags & MAP_ALLOWNOMEM) || errno != ENOMEM) - dieOnMapUnmapError(Size); + reportMapError(Size); return nullptr; } @@ -61,7 +62,7 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, void unmap(UNUSED void *Addr, UNUSED uptr Size, UNUSED uptr Flags, UNUSED MapPlatformData *Data) { if (_trusty_munmap(Addr, Size) != 0) - dieOnMapUnmapError(); + reportUnmapError(Addr, Size); } void setMemoryPermission(UNUSED uptr Addr, UNUSED uptr Size, UNUSED uptr Flags, From eb4a061568a392ec8c7274df7ece82bf670f81bb Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 13 Oct 2023 16:04:13 -0400 Subject: [PATCH 103/720] [gn] port 99d92d18e334 --- .../gn/secondary/compiler-rt/lib/scudo/standalone/BUILD.gn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/BUILD.gn index ed581d8f75998..c46e59bc247a2 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/BUILD.gn @@ -51,6 +51,8 @@ source_set("sources") { "release.h", "report.cpp", "report.h", + "report_linux.cpp", + "report_linux.h", "secondary.h", "size_class_map.h", "stack_depot.h", From e220398cc3aea0c23752594aa3d8437c13bf4c71 Mon Sep 17 00:00:00 2001 From: Amy Huang Date: Fri, 13 Oct 2023 13:34:15 -0700 Subject: [PATCH 104/720] [MSVC, ARM64] Add __prefetch intrinsic (#67174) Implement __prefetch intrinsic. MSVC docs: https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-170 Bug: https://github.com/llvm/llvm-project/issues/65405 --- clang/include/clang/Basic/BuiltinsAArch64.def | 2 ++ clang/lib/CodeGen/CGBuiltin.cpp | 9 +++++++++ clang/lib/Headers/intrin.h | 2 ++ clang/test/CodeGen/arm64-microsoft-intrinsics.c | 10 ++++++++++ 4 files changed, 23 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index 12c7a371e0fbd..82a1ba3c82ad3 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -283,6 +283,8 @@ TARGET_HEADER_BUILTIN(_CountLeadingZeros64, "UiULLi", "nh", INTRIN_H, ALL_MS_LAN TARGET_HEADER_BUILTIN(_CountOneBits, "UiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_CountOneBits64, "UiULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__prefetch, "vv*", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") + #undef BUILTIN #undef LANGBUILTIN #undef TARGET_BUILTIN diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 8cb7943df9a78..c05e69eff4370 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10842,6 +10842,15 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Result; } + if (BuiltinID == AArch64::BI__prefetch) { + Value *Address = EmitScalarExpr(E->getArg(0)); + Value *RW = llvm::ConstantInt::get(Int32Ty, 0); + Value *Locality = ConstantInt::get(Int32Ty, 3); + Value *Data = llvm::ConstantInt::get(Int32Ty, 1); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); + return Builder.CreateCall(F, {Address, RW, Locality, Data}); + } + // Handle MSVC intrinsics before argument evaluation to prevent double // evaluation. if (std::optional MsvcIntId = diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h index 4678c527bfaab..9ebaea9fee942 100644 --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -586,6 +586,8 @@ unsigned int _CountLeadingZeros(unsigned long); unsigned int _CountLeadingZeros64(unsigned _int64); unsigned int _CountOneBits(unsigned long); unsigned int _CountOneBits64(unsigned __int64); + +void __cdecl __prefetch(void *); #endif /*----------------------------------------------------------------------------*\ diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c index b15defb0894e9..44b2ee28fe568 100644 --- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c +++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c @@ -402,6 +402,16 @@ unsigned int check_CountOneBits64(unsigned __int64 arg1) { // CHECK-MSCOMPAT: ret i32 %[[VAR2]] // CHECK-LINUX: error: call to undeclared function '_CountOneBits64' +void check__prefetch(void *arg1) { + return __prefetch(arg1); +} + +// CHECK-MSCOMPAT: %[[ARG1:.*]].addr = alloca ptr, align 8 +// CHECK-MSCOMPAT: store ptr %[[ARG1]], ptr %[[ARG1]].addr, align 8 +// CHECK-MSCOMPAT: %[[VAR0:.*]] = load ptr, ptr %[[ARG1]].addr, align 8 +// CHECK-MSCOMPAT: call void @llvm.prefetch.p0(ptr %[[VAR0]], i32 0, i32 3, i32 1) +// CHECK-MSCOMPAT: ret void + // CHECK-MSCOMPAT: ![[MD2]] = !{!"x18"} // CHECK-MSCOMPAT: ![[MD3]] = !{!"sp"} From d9ede91a27fe751fbe2afff14f450c11c24a3024 Mon Sep 17 00:00:00 2001 From: Christopher Ferris Date: Fri, 13 Oct 2023 13:36:58 -0700 Subject: [PATCH 105/720] [scudo] Fix type mismatch in trusty. (#69024) --- compiler-rt/lib/scudo/standalone/trusty.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/scudo/standalone/trusty.cpp b/compiler-rt/lib/scudo/standalone/trusty.cpp index 26bc8e551ce45..26b349c6e506e 100644 --- a/compiler-rt/lib/scudo/standalone/trusty.cpp +++ b/compiler-rt/lib/scudo/standalone/trusty.cpp @@ -62,7 +62,7 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, void unmap(UNUSED void *Addr, UNUSED uptr Size, UNUSED uptr Flags, UNUSED MapPlatformData *Data) { if (_trusty_munmap(Addr, Size) != 0) - reportUnmapError(Addr, Size); + reportUnmapError(reinterpret_cast(Addr), Size); } void setMemoryPermission(UNUSED uptr Addr, UNUSED uptr Size, UNUSED uptr Flags, From 1673a1ba5decd907d49e64ef705980a145b891d1 Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Fri, 13 Oct 2023 16:51:24 -0400 Subject: [PATCH 106/720] [LLDB][NFC] Create a namespace for the DWARF plugin (#68150) As a followup of https://github.com/llvm/llvm-project/pull/67851, I'm defining a new namespace `lldb_plugin::dwarf` for the classes in this Plugins/SymbolFile/DWARF folder. This change is very NFC and helped me with exporting the necessary symbols for my out-of-tree language plugin. The only class that I didn't change is ClangDWARFASTParser, because that shouldn't be in the same namespace as the generic language-agnostic dwarf parser. It would be a good idea if other plugins follow the same namespace scheme. --- .../include/lldb/Expression/DWARFExpression.h | 22 +- .../lldb/Expression/DWARFExpressionList.h | 15 +- lldb/include/lldb/Symbol/TypeSystem.h | 15 +- lldb/source/Expression/DWARFExpression.cpp | 1 + .../SymbolFile/DWARF/AppleDWARFIndex.cpp | 1 + .../SymbolFile/DWARF/AppleDWARFIndex.h | 6 +- .../Plugins/SymbolFile/DWARF/DIERef.cpp | 1 + lldb/source/Plugins/SymbolFile/DWARF/DIERef.h | 13 +- .../SymbolFile/DWARF/DWARFASTParser.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFASTParser.h | 40 +-- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 2 + .../SymbolFile/DWARF/DWARFASTParserClang.h | 209 ++++++----- .../SymbolFile/DWARF/DWARFAttribute.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFAttribute.h | 5 + .../Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp | 7 +- .../Plugins/SymbolFile/DWARF/DWARFBaseDIE.h | 6 +- .../SymbolFile/DWARF/DWARFCompileUnit.cpp | 1 + .../SymbolFile/DWARF/DWARFCompileUnit.h | 8 +- .../Plugins/SymbolFile/DWARF/DWARFContext.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFContext.h | 6 +- .../Plugins/SymbolFile/DWARF/DWARFDIE.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFDIE.h | 17 +- .../SymbolFile/DWARF/DWARFDataExtractor.h | 2 +- .../SymbolFile/DWARF/DWARFDebugArangeSet.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugArangeSet.h | 6 +- .../SymbolFile/DWARF/DWARFDebugAranges.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugAranges.h | 11 +- .../SymbolFile/DWARF/DWARFDebugInfo.cpp | 4 +- .../Plugins/SymbolFile/DWARF/DWARFDebugInfo.h | 14 +- .../SymbolFile/DWARF/DWARFDebugInfoEntry.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugInfoEntry.h | 24 +- .../SymbolFile/DWARF/DWARFDebugMacro.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugMacro.h | 27 +- .../SymbolFile/DWARF/DWARFDebugRanges.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugRanges.h | 8 +- .../SymbolFile/DWARF/DWARFDeclContext.cpp | 1 + .../SymbolFile/DWARF/DWARFDeclContext.h | 8 +- .../Plugins/SymbolFile/DWARF/DWARFDefines.cpp | 6 +- .../Plugins/SymbolFile/DWARF/DWARFDefines.h | 6 +- .../SymbolFile/DWARF/DWARFFormValue.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFFormValue.h | 15 +- .../Plugins/SymbolFile/DWARF/DWARFIndex.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFIndex.h | 6 +- .../SymbolFile/DWARF/DWARFTypeUnit.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFTypeUnit.h | 8 +- .../Plugins/SymbolFile/DWARF/DWARFUnit.cpp | 28 +- .../Plugins/SymbolFile/DWARF/DWARFUnit.h | 64 ++-- .../SymbolFile/DWARF/DebugNamesDWARFIndex.cpp | 3 +- .../SymbolFile/DWARF/DebugNamesDWARFIndex.h | 6 +- .../SymbolFile/DWARF/ManualDWARFIndex.cpp | 1 + .../SymbolFile/DWARF/ManualDWARFIndex.h | 6 +- .../Plugins/SymbolFile/DWARF/NameToDIE.cpp | 1 + .../Plugins/SymbolFile/DWARF/NameToDIE.h | 25 +- .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 10 +- .../SymbolFile/DWARF/SymbolFileDWARF.h | 333 ++++++++---------- .../DWARF/SymbolFileDWARFDebugMap.cpp | 5 + .../DWARF/SymbolFileDWARFDebugMap.h | 229 ++++++------ .../SymbolFile/DWARF/SymbolFileDWARFDwo.cpp | 1 + .../SymbolFile/DWARF/SymbolFileDWARFDwo.h | 27 +- .../SymbolFile/DWARF/UniqueDWARFASTType.cpp | 1 + .../SymbolFile/DWARF/UniqueDWARFASTType.h | 18 +- .../TypeSystem/Clang/TypeSystemClang.cpp | 1 + .../TypeSystem/Clang/TypeSystemClang.h | 2 +- 63 files changed, 700 insertions(+), 594 deletions(-) diff --git a/lldb/include/lldb/Expression/DWARFExpression.h b/lldb/include/lldb/Expression/DWARFExpression.h index 5e03f539a272c..1d85308d1caa7 100644 --- a/lldb/include/lldb/Expression/DWARFExpression.h +++ b/lldb/include/lldb/Expression/DWARFExpression.h @@ -18,10 +18,14 @@ #include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h" #include -class DWARFUnit; - namespace lldb_private { +namespace plugin { +namespace dwarf { +class DWARFUnit; +} // namespace dwarf +} // namespace plugin + /// \class DWARFExpression DWARFExpression.h /// "lldb/Expression/DWARFExpression.h" Encapsulates a DWARF location /// expression and interprets it. @@ -64,18 +68,20 @@ class DWARFExpression { /// \return /// The address specified by the operation, if the operation exists, or /// LLDB_INVALID_ADDRESS otherwise. - lldb::addr_t GetLocation_DW_OP_addr(const DWARFUnit *dwarf_cu, + lldb::addr_t GetLocation_DW_OP_addr(const plugin::dwarf::DWARFUnit *dwarf_cu, bool &error) const; - bool Update_DW_OP_addr(const DWARFUnit *dwarf_cu, lldb::addr_t file_addr); + bool Update_DW_OP_addr(const plugin::dwarf::DWARFUnit *dwarf_cu, + lldb::addr_t file_addr); void UpdateValue(uint64_t const_value, lldb::offset_t const_value_byte_size, uint8_t addr_byte_size); - bool ContainsThreadLocalStorage(const DWARFUnit *dwarf_cu) const; + bool + ContainsThreadLocalStorage(const plugin::dwarf::DWARFUnit *dwarf_cu) const; bool LinkThreadLocalStorage( - const DWARFUnit *dwarf_cu, + const plugin::dwarf::DWARFUnit *dwarf_cu, std::function const &link_address_callback); @@ -128,13 +134,13 @@ class DWARFExpression { /// details of the failure are provided through it. static bool Evaluate(ExecutionContext *exe_ctx, RegisterContext *reg_ctx, lldb::ModuleSP module_sp, const DataExtractor &opcodes, - const DWARFUnit *dwarf_cu, + const plugin::dwarf::DWARFUnit *dwarf_cu, const lldb::RegisterKind reg_set, const Value *initial_value_ptr, const Value *object_address_ptr, Value &result, Status *error_ptr); - static bool ParseDWARFLocationList(const DWARFUnit *dwarf_cu, + static bool ParseDWARFLocationList(const plugin::dwarf::DWARFUnit *dwarf_cu, const DataExtractor &data, DWARFExpressionList *loc_list); diff --git a/lldb/include/lldb/Expression/DWARFExpressionList.h b/lldb/include/lldb/Expression/DWARFExpressionList.h index c0939647056dc..c2218ad4af0a7 100644 --- a/lldb/include/lldb/Expression/DWARFExpressionList.h +++ b/lldb/include/lldb/Expression/DWARFExpressionList.h @@ -13,10 +13,14 @@ #include "lldb/Utility/RangeMap.h" #include "lldb/lldb-private.h" -class DWARFUnit; - namespace lldb_private { +namespace plugin { +namespace dwarf { +class DWARFUnit; +} // namespace dwarf +} // namespace plugin + /// \class DWARFExpressionList DWARFExpressionList.h /// "lldb/Expression/DWARFExpressionList.h" Encapsulates a range map from file /// address range to a single DWARF location expression. @@ -24,13 +28,14 @@ class DWARFExpressionList { public: DWARFExpressionList() = default; - DWARFExpressionList(lldb::ModuleSP module_sp, const DWARFUnit *dwarf_cu, + DWARFExpressionList(lldb::ModuleSP module_sp, + const plugin::dwarf::DWARFUnit *dwarf_cu, lldb::addr_t func_file_addr) : m_module_wp(module_sp), m_dwarf_cu(dwarf_cu), m_func_file_addr(func_file_addr) {} DWARFExpressionList(lldb::ModuleSP module_sp, DWARFExpression expr, - const DWARFUnit *dwarf_cu) + const plugin::dwarf::DWARFUnit *dwarf_cu) : m_module_wp(module_sp), m_dwarf_cu(dwarf_cu) { AddExpression(0, LLDB_INVALID_ADDRESS, expr); } @@ -136,7 +141,7 @@ class DWARFExpressionList { /// The DWARF compile unit this expression belongs to. It is used to evaluate /// values indexing into the .debug_addr section (e.g. DW_OP_GNU_addr_index, /// DW_OP_GNU_const_index) - const DWARFUnit *m_dwarf_cu = nullptr; + const plugin::dwarf::DWARFUnit *m_dwarf_cu = nullptr; // Function base file address. lldb::addr_t m_func_file_addr = LLDB_INVALID_ADDRESS; diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h index 56d09db837051..56acb1db1546a 100644 --- a/lldb/include/lldb/Symbol/TypeSystem.h +++ b/lldb/include/lldb/Symbol/TypeSystem.h @@ -28,11 +28,17 @@ #include "lldb/Symbol/CompilerDeclContext.h" #include "lldb/lldb-private.h" -class DWARFDIE; -class DWARFASTParser; class PDBASTParser; namespace lldb_private { + +namespace plugin { +namespace dwarf { +class DWARFDIE; +class DWARFASTParser; +} // namespace dwarf +} // namespace plugin + namespace npdb { class PdbAstBuilder; } // namespace npdb @@ -93,7 +99,8 @@ class TypeSystem : public PluginInterface, /// removing all the TypeSystems from the TypeSystemMap. virtual void Finalize() {} - virtual DWARFASTParser *GetDWARFParser() { return nullptr; } + virtual plugin::dwarf::DWARFASTParser *GetDWARFParser() { return nullptr; } + virtual PDBASTParser *GetPDBParser() { return nullptr; } virtual npdb::PdbAstBuilder *GetNativePDBParser() { return nullptr; } @@ -563,6 +570,6 @@ class TypeSystemMap { std::optional create_callback = std::nullopt); }; -} // namespace lldb_private + } // namespace lldb_private #endif // LLDB_SYMBOL_TYPESYSTEM_H diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp index 93fcf0579be0b..fe4928d4f43a4 100644 --- a/lldb/source/Expression/DWARFExpression.cpp +++ b/lldb/source/Expression/DWARFExpression.cpp @@ -45,6 +45,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; // DWARFExpression constructor DWARFExpression::DWARFExpression() : m_data() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp index 34fb98b5a9b69..325517ca1d249 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp @@ -18,6 +18,7 @@ using namespace lldb_private; using namespace lldb; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; std::unique_ptr AppleDWARFIndex::Create( Module &module, DWARFDataExtractor apple_names, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h index 6b948e0798953..a1fb99700d10a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h @@ -12,7 +12,8 @@ #include "Plugins/SymbolFile/DWARF/DWARFIndex.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" -namespace lldb_private { +namespace lldb_private::plugin { +namespace dwarf { class AppleDWARFIndex : public DWARFIndex { public: static std::unique_ptr @@ -77,6 +78,7 @@ class AppleDWARFIndex : public DWARFIndex { std::optional search_for_tag = std::nullopt, std::optional search_for_qualhash = std::nullopt); }; -} // namespace lldb_private +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_APPLEDWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp index 88a5e6027557b..163e9f4c081cf 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp @@ -14,6 +14,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; void llvm::format_provider::format(const DIERef &ref, raw_ostream &OS, StringRef Style) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h index b5a5cfe263f78..ad443aacb46ec 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h @@ -14,6 +14,8 @@ #include #include +namespace lldb_private::plugin { +namespace dwarf { /// Identifies a DWARF debug info entry within a given Module. It contains three /// "coordinates": /// - file_index: identifies the separate stand alone debug info file @@ -93,7 +95,7 @@ class DIERef { /// \return /// Returns a valid DIERef if decoding succeeded, std::nullopt if there was /// unsufficient or invalid values that were decoded. - static std::optional Decode(const lldb_private::DataExtractor &data, + static std::optional Decode(const DataExtractor &data, lldb::offset_t *offset_ptr); /// Encode this object into a data encoder object. @@ -103,7 +105,7 @@ class DIERef { /// \param encoder /// A data encoder object that serialized bytes will be encoded into. /// - void Encode(lldb_private::DataEncoder &encoder) const; + void Encode(DataEncoder &encoder) const; static constexpr uint64_t k_die_offset_bit_size = DW_DIE_OFFSET_MAX_BITSIZE; static constexpr uint64_t k_file_index_bit_size = @@ -131,10 +133,13 @@ class DIERef { static_assert(sizeof(DIERef) == 8); typedef std::vector DIEArray; +} // namespace dwarf +} // namespace lldb_private::plugin namespace llvm { -template<> struct format_provider { - static void format(const DIERef &ref, raw_ostream &OS, StringRef Style); +template <> struct format_provider { + static void format(const lldb_private::plugin::dwarf::DIERef &ref, + raw_ostream &OS, StringRef Style); }; } // namespace llvm diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp index a68b7cd110eb7..1fe0cadecc9e7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp @@ -18,6 +18,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; std::optional DWARFASTParser::ParseChildArrayInfo(const DWARFDIE &parent_die, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h index 18825ae060b12..eaafbe169cc8c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h @@ -17,53 +17,53 @@ #include "lldb/lldb-enumerations.h" #include -class DWARFDIE; namespace lldb_private { class CompileUnit; class ExecutionContext; } + +namespace lldb_private::plugin { +namespace dwarf { +class DWARFDIE; class SymbolFileDWARF; class DWARFASTParser { public: virtual ~DWARFASTParser() = default; - virtual lldb::TypeSP ParseTypeFromDWARF(const lldb_private::SymbolContext &sc, + virtual lldb::TypeSP ParseTypeFromDWARF(const SymbolContext &sc, const DWARFDIE &die, bool *type_is_new_ptr) = 0; - virtual lldb_private::ConstString - ConstructDemangledNameFromDWARF(const DWARFDIE &die) = 0; + virtual ConstString ConstructDemangledNameFromDWARF(const DWARFDIE &die) = 0; - virtual lldb_private::Function * - ParseFunctionFromDWARF(lldb_private::CompileUnit &comp_unit, - const DWARFDIE &die, - const lldb_private::AddressRange &range) = 0; + virtual Function *ParseFunctionFromDWARF(CompileUnit &comp_unit, + const DWARFDIE &die, + const AddressRange &range) = 0; - virtual bool - CompleteTypeFromDWARF(const DWARFDIE &die, lldb_private::Type *type, - lldb_private::CompilerType &compiler_type) = 0; + virtual bool CompleteTypeFromDWARF(const DWARFDIE &die, Type *type, + CompilerType &compiler_type) = 0; - virtual lldb_private::CompilerDecl - GetDeclForUIDFromDWARF(const DWARFDIE &die) = 0; + virtual CompilerDecl GetDeclForUIDFromDWARF(const DWARFDIE &die) = 0; - virtual lldb_private::CompilerDeclContext + virtual CompilerDeclContext GetDeclContextForUIDFromDWARF(const DWARFDIE &die) = 0; - virtual lldb_private::CompilerDeclContext + virtual CompilerDeclContext GetDeclContextContainingUIDFromDWARF(const DWARFDIE &die) = 0; virtual void EnsureAllDIEsInDeclContextHaveBeenParsed( - lldb_private::CompilerDeclContext decl_context) = 0; + CompilerDeclContext decl_context) = 0; - virtual lldb_private::ConstString - GetDIEClassTemplateParams(const DWARFDIE &die) = 0; + virtual ConstString GetDIEClassTemplateParams(const DWARFDIE &die) = 0; - static std::optional + static std::optional ParseChildArrayInfo(const DWARFDIE &parent_die, - const lldb_private::ExecutionContext *exe_ctx = nullptr); + const ExecutionContext *exe_ctx = nullptr); static lldb::AccessType GetAccessTypeFromDWARF(uint32_t dwarf_accessibility); }; +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSER_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index d0065896b0d22..545a5dcc7d0fd 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -60,6 +60,8 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; + DWARFASTParserClang::DWARFASTParserClang(TypeSystemClang &ast) : m_ast(ast), m_die_to_decl_ctx(), m_decl_ctx_to_die() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index 88bfc490e8907..3d6912cf56c17 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -31,45 +31,51 @@ namespace lldb_private { class CompileUnit; } +namespace lldb_private::plugin { +namespace dwarf { class DWARFDebugInfoEntry; class SymbolFileDWARF; +} // namespace dwarf +} // namespace lldb_private::plugin struct ParsedDWARFTypeAttributes; -class DWARFASTParserClang : public DWARFASTParser { +class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { public: DWARFASTParserClang(lldb_private::TypeSystemClang &ast); ~DWARFASTParserClang() override; // DWARFASTParser interface. - lldb::TypeSP ParseTypeFromDWARF(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, - bool *type_is_new_ptr) override; + lldb::TypeSP + ParseTypeFromDWARF(const lldb_private::SymbolContext &sc, + const lldb_private::plugin::dwarf::DWARFDIE &die, + bool *type_is_new_ptr) override; - lldb_private::ConstString - ConstructDemangledNameFromDWARF(const DWARFDIE &die) override; + lldb_private::ConstString ConstructDemangledNameFromDWARF( + const lldb_private::plugin::dwarf::DWARFDIE &die) override; lldb_private::Function * ParseFunctionFromDWARF(lldb_private::CompileUnit &comp_unit, - const DWARFDIE &die, + const lldb_private::plugin::dwarf::DWARFDIE &die, const lldb_private::AddressRange &func_range) override; bool - CompleteTypeFromDWARF(const DWARFDIE &die, lldb_private::Type *type, + CompleteTypeFromDWARF(const lldb_private::plugin::dwarf::DWARFDIE &die, + lldb_private::Type *type, lldb_private::CompilerType &compiler_type) override; - lldb_private::CompilerDecl - GetDeclForUIDFromDWARF(const DWARFDIE &die) override; + lldb_private::CompilerDecl GetDeclForUIDFromDWARF( + const lldb_private::plugin::dwarf::DWARFDIE &die) override; void EnsureAllDIEsInDeclContextHaveBeenParsed( lldb_private::CompilerDeclContext decl_context) override; - lldb_private::CompilerDeclContext - GetDeclContextForUIDFromDWARF(const DWARFDIE &die) override; + lldb_private::CompilerDeclContext GetDeclContextForUIDFromDWARF( + const lldb_private::plugin::dwarf::DWARFDIE &die) override; - lldb_private::CompilerDeclContext - GetDeclContextContainingUIDFromDWARF(const DWARFDIE &die) override; + lldb_private::CompilerDeclContext GetDeclContextContainingUIDFromDWARF( + const lldb_private::plugin::dwarf::DWARFDIE &die) override; lldb_private::ClangASTImporter &GetClangASTImporter(); @@ -85,9 +91,9 @@ class DWARFASTParserClang : public DWARFASTParser { /// DWARFFormValue with the bit width of the given integer type. /// Returns an error if the value in the DWARFFormValue does not fit /// into the given integer type or the integer type isn't supported. - llvm::Expected - ExtractIntFromFormValue(const lldb_private::CompilerType &int_type, - const DWARFFormValue &form_value) const; + llvm::Expected ExtractIntFromFormValue( + const lldb_private::CompilerType &int_type, + const lldb_private::plugin::dwarf::DWARFFormValue &form_value) const; /// Returns the template parameters of a class DWARFDIE as a string. /// @@ -99,8 +105,8 @@ class DWARFASTParserClang : public DWARFASTParser { /// \return A string, including surrounding '<>', of the template parameters. /// If the DIE's name already has '<>', returns an empty ConstString because /// it's assumed that the caller is using the DIE name anyway. - lldb_private::ConstString - GetDIEClassTemplateParams(const DWARFDIE &die) override; + lldb_private::ConstString GetDIEClassTemplateParams( + const lldb_private::plugin::dwarf::DWARFDIE &die) override; protected: /// Protected typedefs and members. @@ -108,14 +114,19 @@ class DWARFASTParserClang : public DWARFASTParser { class DelayedAddObjCClassProperty; typedef std::vector DelayedPropertyList; - typedef llvm::DenseMap + typedef llvm::DenseMap< + const lldb_private::plugin::dwarf::DWARFDebugInfoEntry *, + clang::DeclContext *> DIEToDeclContextMap; - typedef std::multimap + typedef std::multimap DeclContextToDIEMap; - typedef llvm::DenseMap + typedef llvm::DenseMap< + const lldb_private::plugin::dwarf::DWARFDebugInfoEntry *, + lldb_private::OptionalClangModuleID> DIEToModuleMap; - typedef llvm::DenseMap + typedef llvm::DenseMap< + const lldb_private::plugin::dwarf::DWARFDebugInfoEntry *, clang::Decl *> DIEToDeclMap; lldb_private::TypeSystemClang &m_ast; @@ -126,11 +137,14 @@ class DWARFASTParserClang : public DWARFASTParser { std::unique_ptr m_clang_ast_importer_up; /// @} - clang::DeclContext *GetDeclContextForBlock(const DWARFDIE &die); + clang::DeclContext * + GetDeclContextForBlock(const lldb_private::plugin::dwarf::DWARFDIE &die); - clang::BlockDecl *ResolveBlockDIE(const DWARFDIE &die); + clang::BlockDecl * + ResolveBlockDIE(const lldb_private::plugin::dwarf::DWARFDIE &die); - clang::NamespaceDecl *ResolveNamespaceDIE(const DWARFDIE &die); + clang::NamespaceDecl * + ResolveNamespaceDIE(const lldb_private::plugin::dwarf::DWARFDIE &die); /// Returns the namespace decl that a DW_TAG_imported_declaration imports. /// @@ -141,82 +155,98 @@ class DWARFASTParserClang : public DWARFASTParser { /// 'die' imports. If the imported entity is not a namespace /// or another import declaration, returns nullptr. If an error /// occurs, returns nullptr. - clang::NamespaceDecl *ResolveImportedDeclarationDIE(const DWARFDIE &die); + clang::NamespaceDecl *ResolveImportedDeclarationDIE( + const lldb_private::plugin::dwarf::DWARFDIE &die); - bool ParseTemplateDIE(const DWARFDIE &die, + bool ParseTemplateDIE(const lldb_private::plugin::dwarf::DWARFDIE &die, lldb_private::TypeSystemClang::TemplateParameterInfos &template_param_infos); bool ParseTemplateParameterInfos( - const DWARFDIE &parent_die, + const lldb_private::plugin::dwarf::DWARFDIE &parent_die, lldb_private::TypeSystemClang::TemplateParameterInfos &template_param_infos); - std::string GetCPlusPlusQualifiedName(const DWARFDIE &die); + std::string + GetCPlusPlusQualifiedName(const lldb_private::plugin::dwarf::DWARFDIE &die); bool ParseChildMembers( - const DWARFDIE &die, lldb_private::CompilerType &class_compiler_type, + const lldb_private::plugin::dwarf::DWARFDIE &die, + lldb_private::CompilerType &class_compiler_type, std::vector> &base_classes, - std::vector &member_function_dies, + std::vector &member_function_dies, DelayedPropertyList &delayed_properties, const lldb::AccessType default_accessibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info); size_t ParseChildParameters(clang::DeclContext *containing_decl_ctx, - const DWARFDIE &parent_die, bool skip_artificial, - bool &is_static, bool &is_variadic, + const lldb_private::plugin::dwarf::DWARFDIE &parent_die, + bool skip_artificial, bool &is_static, bool &is_variadic, bool &has_template_params, std::vector &function_args, std::vector &function_param_decls, unsigned &type_quals); - size_t ParseChildEnumerators(lldb_private::CompilerType &compiler_type, - bool is_signed, uint32_t enumerator_byte_size, - const DWARFDIE &parent_die); + size_t ParseChildEnumerators( + lldb_private::CompilerType &compiler_type, bool is_signed, + uint32_t enumerator_byte_size, + const lldb_private::plugin::dwarf::DWARFDIE &parent_die); /// Parse a structure, class, or union type DIE. - lldb::TypeSP ParseStructureLikeDIE(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, - ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP + ParseStructureLikeDIE(const lldb_private::SymbolContext &sc, + const lldb_private::plugin::dwarf::DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); - lldb_private::Type *GetTypeForDIE(const DWARFDIE &die); + lldb_private::Type * + GetTypeForDIE(const lldb_private::plugin::dwarf::DWARFDIE &die); - clang::Decl *GetClangDeclForDIE(const DWARFDIE &die); + clang::Decl * + GetClangDeclForDIE(const lldb_private::plugin::dwarf::DWARFDIE &die); - clang::DeclContext *GetClangDeclContextForDIE(const DWARFDIE &die); + clang::DeclContext * + GetClangDeclContextForDIE(const lldb_private::plugin::dwarf::DWARFDIE &die); - clang::DeclContext *GetClangDeclContextContainingDIE(const DWARFDIE &die, - DWARFDIE *decl_ctx_die); - lldb_private::OptionalClangModuleID GetOwningClangModule(const DWARFDIE &die); + clang::DeclContext *GetClangDeclContextContainingDIE( + const lldb_private::plugin::dwarf::DWARFDIE &die, + lldb_private::plugin::dwarf::DWARFDIE *decl_ctx_die); + lldb_private::OptionalClangModuleID + GetOwningClangModule(const lldb_private::plugin::dwarf::DWARFDIE &die); - bool CopyUniqueClassMethodTypes(const DWARFDIE &src_class_die, - const DWARFDIE &dst_class_die, - lldb_private::Type *class_type, - std::vector &failures); + bool CopyUniqueClassMethodTypes( + const lldb_private::plugin::dwarf::DWARFDIE &src_class_die, + const lldb_private::plugin::dwarf::DWARFDIE &dst_class_die, + lldb_private::Type *class_type, + std::vector &failures); - clang::DeclContext *GetCachedClangDeclContextForDIE(const DWARFDIE &die); + clang::DeclContext *GetCachedClangDeclContextForDIE( + const lldb_private::plugin::dwarf::DWARFDIE &die); - void LinkDeclContextToDIE(clang::DeclContext *decl_ctx, const DWARFDIE &die); + void LinkDeclContextToDIE(clang::DeclContext *decl_ctx, + const lldb_private::plugin::dwarf::DWARFDIE &die); - void LinkDeclToDIE(clang::Decl *decl, const DWARFDIE &die); + void LinkDeclToDIE(clang::Decl *decl, + const lldb_private::plugin::dwarf::DWARFDIE &die); /// If \p type_sp is valid, calculate and set its symbol context scope, and /// update the type list for its backing symbol file. /// /// Returns \p type_sp. - lldb::TypeSP - UpdateSymbolContextScopeForType(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, lldb::TypeSP type_sp); + lldb::TypeSP UpdateSymbolContextScopeForType( + const lldb_private::SymbolContext &sc, + const lldb_private::plugin::dwarf::DWARFDIE &die, lldb::TypeSP type_sp); /// Follow Clang Module Skeleton CU references to find a type definition. - lldb::TypeSP ParseTypeFromClangModule(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, - lldb_private::Log *log); + lldb::TypeSP + ParseTypeFromClangModule(const lldb_private::SymbolContext &sc, + const lldb_private::plugin::dwarf::DWARFDIE &die, + lldb_private::Log *log); // Return true if this type is a declaration to a type in an external // module. - lldb::ModuleSP GetModuleForType(const DWARFDIE &die); + lldb::ModuleSP + GetModuleForType(const lldb_private::plugin::dwarf::DWARFDIE &die); private: struct FieldInfo { @@ -268,33 +298,41 @@ class DWARFASTParserClang : public DWARFASTParser { /// created property. /// \param delayed_properties The list of delayed properties that the result /// will be appended to. - void ParseObjCProperty(const DWARFDIE &die, const DWARFDIE &parent_die, - const lldb_private::CompilerType &class_clang_type, - DelayedPropertyList &delayed_properties); + void + ParseObjCProperty(const lldb_private::plugin::dwarf::DWARFDIE &die, + const lldb_private::plugin::dwarf::DWARFDIE &parent_die, + const lldb_private::CompilerType &class_clang_type, + DelayedPropertyList &delayed_properties); void - ParseSingleMember(const DWARFDIE &die, const DWARFDIE &parent_die, + ParseSingleMember(const lldb_private::plugin::dwarf::DWARFDIE &die, + const lldb_private::plugin::dwarf::DWARFDIE &parent_die, const lldb_private::CompilerType &class_clang_type, lldb::AccessType default_accessibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info, FieldInfo &last_field_info); - bool CompleteRecordType(const DWARFDIE &die, lldb_private::Type *type, + bool CompleteRecordType(const lldb_private::plugin::dwarf::DWARFDIE &die, + lldb_private::Type *type, lldb_private::CompilerType &clang_type); - bool CompleteEnumType(const DWARFDIE &die, lldb_private::Type *type, + bool CompleteEnumType(const lldb_private::plugin::dwarf::DWARFDIE &die, + lldb_private::Type *type, lldb_private::CompilerType &clang_type); - lldb::TypeSP ParseTypeModifier(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, - ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP + ParseTypeModifier(const lldb_private::SymbolContext &sc, + const lldb_private::plugin::dwarf::DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); lldb::TypeSP ParseEnum(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP ParseSubroutine(const DWARFDIE &die, + const lldb_private::plugin::dwarf::DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP ParseSubroutine(const lldb_private::plugin::dwarf::DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP ParseArrayType(const DWARFDIE &die, + lldb::TypeSP ParseArrayType(const lldb_private::plugin::dwarf::DWARFDIE &die, const ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP ParsePointerToMemberType(const DWARFDIE &die, - const ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP + ParsePointerToMemberType(const lldb_private::plugin::dwarf::DWARFDIE &die, + const ParsedDWARFTypeAttributes &attrs); /// Parses a DW_TAG_inheritance DIE into a base/super class. /// @@ -311,7 +349,8 @@ class DWARFASTParserClang : public DWARFASTParser { /// \param layout_info The layout information that will be updated for C++ /// base classes with the base offset. void ParseInheritance( - const DWARFDIE &die, const DWARFDIE &parent_die, + const lldb_private::plugin::dwarf::DWARFDIE &die, + const lldb_private::plugin::dwarf::DWARFDIE &parent_die, const lldb_private::CompilerType class_clang_type, const lldb::AccessType default_accessibility, const lldb::ModuleSP &module_sp, @@ -328,7 +367,8 @@ class DWARFASTParserClang : public DWARFASTParser { /// \param layout_info The layout information that will be updated for // base classes with the base offset void - ParseRustVariantPart(DWARFDIE &die, const DWARFDIE &parent_die, + ParseRustVariantPart(lldb_private::plugin::dwarf::DWARFDIE &die, + const lldb_private::plugin::dwarf::DWARFDIE &parent_die, lldb_private::CompilerType &class_clang_type, const lldb::AccessType default_accesibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info); @@ -338,7 +378,8 @@ class DWARFASTParserClang : public DWARFASTParser { /// Some attributes are relevant for all kinds of types (declaration), while /// others are only meaningful to a specific type (is_virtual) struct ParsedDWARFTypeAttributes { - explicit ParsedDWARFTypeAttributes(const DWARFDIE &die); + explicit ParsedDWARFTypeAttributes( + const lldb_private::plugin::dwarf::DWARFDIE &die); lldb::AccessType accessibility = lldb::eAccessNone; bool is_artificial = false; @@ -355,12 +396,12 @@ struct ParsedDWARFTypeAttributes { const char *mangled_name = nullptr; lldb_private::ConstString name; lldb_private::Declaration decl; - DWARFDIE object_pointer; - DWARFFormValue abstract_origin; - DWARFFormValue containing_type; - DWARFFormValue signature; - DWARFFormValue specification; - DWARFFormValue type; + lldb_private::plugin::dwarf::DWARFDIE object_pointer; + lldb_private::plugin::dwarf::DWARFFormValue abstract_origin; + lldb_private::plugin::dwarf::DWARFFormValue containing_type; + lldb_private::plugin::dwarf::DWARFFormValue signature; + lldb_private::plugin::dwarf::DWARFFormValue specification; + lldb_private::plugin::dwarf::DWARFFormValue type; lldb::LanguageType class_language = lldb::eLanguageTypeUnknown; std::optional byte_size; size_t calling_convention = llvm::dwarf::DW_CC_normal; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp index 00b56537ae2b5..3d35775e081e3 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp @@ -11,6 +11,7 @@ #include "DWARFDebugInfo.h" using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; DWARFAttributes::DWARFAttributes() : m_infos() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h index 90e12fa024936..e05ccc980d92a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h @@ -14,6 +14,8 @@ #include "llvm/ADT/SmallVector.h" #include +namespace lldb_private::plugin { +namespace dwarf { class DWARFUnit; class DWARFAttribute { @@ -31,6 +33,7 @@ class DWARFAttribute { form = m_form; val = m_value; } + protected: dw_attr_t m_attr; dw_form_t m_form; @@ -72,5 +75,7 @@ class DWARFAttributes { typedef llvm::SmallVector collection; collection m_infos; }; +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFATTRIBUTE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp index 37a917c3a7661..3a3b05acd26d6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp @@ -18,6 +18,7 @@ #include using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; std::optional DWARFBaseDIE::GetDIERef() const { if (!IsValid()) @@ -35,7 +36,7 @@ dw_tag_t DWARFBaseDIE::Tag() const { } const char *DWARFBaseDIE::GetTagAsCString() const { - return lldb_private::DW_TAG_value_to_name(Tag()); + return DW_TAG_value_to_name(Tag()); } const char *DWARFBaseDIE::GetAttributeValueAsString(const dw_attr_t attr, @@ -120,6 +121,8 @@ DWARFAttributes DWARFBaseDIE::GetAttributes(Recurse recurse) const { return DWARFAttributes(); } +namespace lldb_private::plugin { +namespace dwarf { bool operator==(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs) { return lhs.GetDIE() == rhs.GetDIE() && lhs.GetCU() == rhs.GetCU(); } @@ -127,6 +130,8 @@ bool operator==(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs) { bool operator!=(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs) { return !(lhs == rhs); } +} // namespace dwarf +} // namespace lldb_private::plugin const DWARFDataExtractor &DWARFBaseDIE::GetData() const { // Clients must check if this DIE is valid before calling this function. diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h index 8bcf807ad163a..75c822703cd80 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h @@ -15,6 +15,8 @@ #include "llvm/Support/Error.h" #include +namespace lldb_private::plugin { +namespace dwarf { class DIERef; class DWARFASTParser; class DWARFAttributes; @@ -78,7 +80,7 @@ class DWARFBaseDIE { // correct section data. // // Clients must validate that this object is valid before calling this. - const lldb_private::DWARFDataExtractor &GetData() const; + const DWARFDataExtractor &GetData() const; // Accessing information about a DIE dw_tag_t Tag() const; @@ -124,5 +126,7 @@ class DWARFBaseDIE { bool operator==(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs); bool operator!=(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs); +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFBASEDIE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp index f839a59bf6c39..ec4c297cf7e16 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp @@ -16,6 +16,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; void DWARFCompileUnit::Dump(Stream *s) const { s->Format( diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h index 65debac4c7d92..dd130977d4b1f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h @@ -14,13 +14,15 @@ namespace llvm { class DWARFAbbreviationDeclarationSet; -} +} // namespace llvm +namespace lldb_private::plugin { +namespace dwarf { class DWARFCompileUnit : public DWARFUnit { public: void BuildAddressRangeTable(DWARFDebugAranges *debug_aranges) override; - void Dump(lldb_private::Stream *s) const override; + void Dump(Stream *s) const override; static bool classof(const DWARFUnit *unit) { return !unit->IsTypeUnit(); } @@ -40,5 +42,7 @@ class DWARFCompileUnit : public DWARFUnit { friend class DWARFUnit; }; +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFCOMPILEUNIT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp index f72dad88e1575..ee347036dbbc0 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp @@ -13,6 +13,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; static DWARFDataExtractor LoadSection(SectionList *section_list, SectionType section_type) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h index 7df776b5f5141..87c6eb209337c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h @@ -16,7 +16,8 @@ #include #include -namespace lldb_private { +namespace lldb_private::plugin { +namespace dwarf { class DWARFContext { private: SectionList *m_main_section_list; @@ -78,6 +79,7 @@ class DWARFContext { llvm::DWARFContext &GetAsLLVM(); }; -} // namespace lldb_private +} // namespace dwarf +} // namespace lldb_private::plugin #endif diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp index b31c5dcac9185..d43c2ac276fb8 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp @@ -18,6 +18,7 @@ using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; namespace { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h index 031ea26ad4050..25b313bf09957 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h @@ -13,6 +13,8 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/iterator_range.h" +namespace lldb_private::plugin { +namespace dwarf { class DWARFDIE : public DWARFBaseDIE { public: class child_iterator; @@ -31,14 +33,14 @@ class DWARFDIE : public DWARFBaseDIE { const char *GetPubname() const; using DWARFBaseDIE::GetName; - void GetName(lldb_private::Stream &s) const; + void GetName(Stream &s) const; - void AppendTypeName(lldb_private::Stream &s) const; + void AppendTypeName(Stream &s) const; - lldb_private::Type *ResolveType() const; + Type *ResolveType() const; // Resolve a type by UID using this DIE's DWARF file - lldb_private::Type *ResolveTypeUID(const DWARFDIE &die) const; + Type *ResolveTypeUID(const DWARFDIE &die) const; // Functions for obtaining DIE relations and references @@ -72,8 +74,7 @@ class DWARFDIE : public DWARFBaseDIE { /// Return this DIE's decl context as it is needed to look up types /// in Clang's -gmodules debug info format. - void GetDeclContext( - llvm::SmallVectorImpl &context) const; + void GetDeclContext(llvm::SmallVectorImpl &context) const; // Getting attribute values from the DIE. // @@ -88,7 +89,7 @@ class DWARFDIE : public DWARFBaseDIE { std::optional &decl_file, std::optional &decl_line, std::optional &decl_column, std::optional &call_file, std::optional &call_line, std::optional &call_column, - lldb_private::DWARFExpressionList *frame_base) const; + DWARFExpressionList *frame_base) const; /// The range of all the children of this DIE. llvm::iterator_range children() const; @@ -126,5 +127,7 @@ class DWARFDIE::child_iterator return *this; } }; +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDIE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h index b9526b079c1e9..41b8e9ad0217b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h @@ -33,6 +33,6 @@ class DWARFDataExtractor : public DataExtractor { llvm::DWARFDataExtractor GetAsLLVMDWARF() const; llvm::DataExtractor GetAsLLVM() const; }; -} +} // namespace lldb_private #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDATAEXTRACTOR_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp index 03cbfd28ae741..8461b94abca63 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp @@ -13,6 +13,7 @@ #include using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; DWARFDebugArangeSet::DWARFDebugArangeSet() : m_offset(DW_INVALID_OFFSET), m_next_offset(DW_INVALID_OFFSET) {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h index 3c8633eaa3cce..ecdbe953f58b0 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h @@ -13,6 +13,8 @@ #include #include +namespace lldb_private::plugin { +namespace dwarf { class DWARFDebugArangeSet { public: struct Header { @@ -42,7 +44,7 @@ class DWARFDebugArangeSet { DWARFDebugArangeSet(); void Clear(); void SetOffset(uint32_t offset) { m_offset = offset; } - llvm::Error extract(const lldb_private::DWARFDataExtractor &data, + llvm::Error extract(const DWARFDataExtractor &data, lldb::offset_t *offset_ptr); dw_offset_t FindAddress(dw_addr_t address) const; size_t NumDescriptors() const { return m_arange_descriptors.size(); } @@ -62,5 +64,7 @@ class DWARFDebugArangeSet { Header m_header; DescriptorColl m_arange_descriptors; }; +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGARANGESET_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp index b38dd2b88c9d0..da73891f66654 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp @@ -15,6 +15,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; // Constructor DWARFDebugAranges::DWARFDebugAranges() : m_aranges() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h index 5ff37e400c884..99e2108b85c67 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h @@ -13,10 +13,11 @@ #include "lldb/Utility/RangeMap.h" #include "llvm/Support/Error.h" +namespace lldb_private::plugin { +namespace dwarf { class DWARFDebugAranges { protected: - typedef lldb_private::RangeDataVector - RangeToDIE; + typedef RangeDataVector RangeToDIE; public: typedef RangeToDIE::Entry Range; @@ -26,14 +27,14 @@ class DWARFDebugAranges { void Clear() { m_aranges.Clear(); } - void extract(const lldb_private::DWARFDataExtractor &debug_aranges_data); + void extract(const DWARFDataExtractor &debug_aranges_data); // Use append range multiple times and then call sort void AppendRange(dw_offset_t cu_offset, dw_addr_t low_pc, dw_addr_t high_pc); void Sort(bool minimize); - void Dump(lldb_private::Log *log) const; + void Dump(Log *log) const; dw_offset_t FindAddress(dw_addr_t address) const; @@ -50,5 +51,7 @@ class DWARFDebugAranges { protected: RangeToDIE m_aranges; }; +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGARANGES_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp index 9a33d6338b87d..553b6a4c551d2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp @@ -27,10 +27,10 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; // Constructor -DWARFDebugInfo::DWARFDebugInfo(SymbolFileDWARF &dwarf, - lldb_private::DWARFContext &context) +DWARFDebugInfo::DWARFDebugInfo(SymbolFileDWARF &dwarf, DWARFContext &context) : m_dwarf(dwarf), m_context(context), m_units(), m_cu_aranges_up() {} const DWARFDebugAranges &DWARFDebugInfo::GetCompileUnitAranges() { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h index c990ac9fbe583..d5e48f312ea0e 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h @@ -19,20 +19,18 @@ #include "lldb/lldb-private.h" #include "llvm/Support/Error.h" -namespace lldb_private { +namespace lldb_private::plugin { +namespace dwarf { class DWARFContext; -} class DWARFDebugInfo { public: - typedef dw_offset_t (*Callback)(SymbolFileDWARF *dwarf2Data, - DWARFUnit *cu, + typedef dw_offset_t (*Callback)(SymbolFileDWARF *dwarf2Data, DWARFUnit *cu, DWARFDebugInfoEntry *die, const dw_offset_t next_offset, const uint32_t depth, void *userData); - explicit DWARFDebugInfo(SymbolFileDWARF &dwarf, - lldb_private::DWARFContext &context); + explicit DWARFDebugInfo(SymbolFileDWARF &dwarf, DWARFContext &context); size_t GetNumUnits(); DWARFUnit *GetUnitAtIndex(size_t idx); @@ -58,7 +56,7 @@ class DWARFDebugInfo { typedef std::vector UnitColl; SymbolFileDWARF &m_dwarf; - lldb_private::DWARFContext &m_context; + DWARFContext &m_context; llvm::once_flag m_units_once_flag; UnitColl m_units; @@ -80,5 +78,7 @@ class DWARFDebugInfo { DWARFDebugInfo(const DWARFDebugInfo &) = delete; const DWARFDebugInfo &operator=(const DWARFDebugInfo &) = delete; }; +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGINFO_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp index a6ab83700904c..a18836e5d9bbb 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp @@ -35,6 +35,7 @@ using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; extern int g_verbose; // Extract a debug info entry for a given DWARFUnit from the data diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h index 29db44a16bb12..c19fa74285490 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h @@ -22,6 +22,8 @@ #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" +namespace lldb_private::plugin { +namespace dwarf { class DWARFDeclContext; #define DIE_SIBLING_IDX_BITSIZE 31 @@ -47,8 +49,8 @@ class DWARFDebugInfoEntry { void BuildFunctionAddressRangeTable(DWARFUnit *cu, DWARFDebugAranges *debug_aranges) const; - bool Extract(const lldb_private::DWARFDataExtractor &data, - const DWARFUnit *cu, lldb::offset_t *offset_ptr); + bool Extract(const DWARFDataExtractor &data, const DWARFUnit *cu, + lldb::offset_t *offset_ptr); using Recurse = DWARFBaseDIE::Recurse; DWARFAttributes GetAttributes(DWARFUnit *cu, @@ -104,13 +106,15 @@ class DWARFDebugInfoEntry { const char *GetPubname(const DWARFUnit *cu) const; - bool GetDIENamesAndRanges( - DWARFUnit *cu, const char *&name, const char *&mangled, - DWARFRangeList &rangeList, std::optional &decl_file, - std::optional &decl_line, std::optional &decl_column, - std::optional &call_file, std::optional &call_line, - std::optional &call_column, - lldb_private::DWARFExpressionList *frame_base = nullptr) const; + bool GetDIENamesAndRanges(DWARFUnit *cu, const char *&name, + const char *&mangled, DWARFRangeList &rangeList, + std::optional &decl_file, + std::optional &decl_line, + std::optional &decl_column, + std::optional &call_file, + std::optional &call_line, + std::optional &call_column, + DWARFExpressionList *frame_base = nullptr) const; const llvm::DWARFAbbreviationDeclaration * GetAbbreviationDeclarationPtr(const DWARFUnit *cu) const; @@ -190,5 +194,7 @@ class DWARFDebugInfoEntry { void GetAttributes(DWARFUnit *cu, DWARFAttributes &attrs, Recurse recurse, uint32_t curr_depth) const; }; +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGINFOENTRY_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp index 19c6448c4e74a..2cd84bc55b751 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp @@ -15,6 +15,7 @@ using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; DWARFDebugMacroHeader DWARFDebugMacroHeader::ParseHeader(const DWARFDataExtractor &debug_macro_data, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h index cbf762458331b..67d1cde8d5de0 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h @@ -17,11 +17,11 @@ #include "lldb/lldb-types.h" namespace lldb_private { - class DWARFDataExtractor; +} -} // namespace lldb_private - +namespace lldb_private::plugin { +namespace dwarf { class SymbolFileDWARF; class DWARFDebugMacroHeader { @@ -33,15 +33,14 @@ class DWARFDebugMacroHeader { }; static DWARFDebugMacroHeader - ParseHeader(const lldb_private::DWARFDataExtractor &debug_macro_data, + ParseHeader(const DWARFDataExtractor &debug_macro_data, lldb::offset_t *offset); bool OffsetIs64Bit() const { return m_offset_is_64_bit; } private: - static void - SkipOperandTable(const lldb_private::DWARFDataExtractor &debug_macro_data, - lldb::offset_t *offset); + static void SkipOperandTable(const DWARFDataExtractor &debug_macro_data, + lldb::offset_t *offset); uint16_t m_version = 0; bool m_offset_is_64_bit = false; @@ -50,12 +49,14 @@ class DWARFDebugMacroHeader { class DWARFDebugMacroEntry { public: - static void - ReadMacroEntries(const lldb_private::DWARFDataExtractor &debug_macro_data, - const lldb_private::DWARFDataExtractor &debug_str_data, - const bool offset_is_64_bit, lldb::offset_t *sect_offset, - SymbolFileDWARF *sym_file_dwarf, - lldb_private::DebugMacrosSP &debug_macros_sp); + static void ReadMacroEntries(const DWARFDataExtractor &debug_macro_data, + const DWARFDataExtractor &debug_str_data, + const bool offset_is_64_bit, + lldb::offset_t *sect_offset, + SymbolFileDWARF *sym_file_dwarf, + DebugMacrosSP &debug_macros_sp); }; +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGMACRO_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp index 0b5bb23a4981f..fd8f4e12ff770 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp @@ -11,6 +11,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; DWARFDebugRanges::DWARFDebugRanges() : m_range_map() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h index 2e06cd5daf6f3..a04fcf59d5bfd 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h @@ -12,21 +12,23 @@ #include "lldb/Core/dwarf.h" #include +namespace lldb_private::plugin { +namespace dwarf { class DWARFUnit; -namespace lldb_private { class DWARFContext; -} class DWARFDebugRanges { public: DWARFDebugRanges(); - void Extract(lldb_private::DWARFContext &context); + void Extract(DWARFContext &context); DWARFRangeList FindRanges(const DWARFUnit *cu, dw_offset_t debug_ranges_offset) const; protected: std::map m_range_map; }; +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGRANGES_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp index 393de0038e651..44e7602279013 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp @@ -9,6 +9,7 @@ #include "DWARFDeclContext.h" using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; const char *DWARFDeclContext::GetQualifiedName() const { if (m_qualified_name.empty()) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h index 13e3dfb70c0cc..a20a862d34029 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h @@ -16,6 +16,8 @@ #include #include +namespace lldb_private::plugin { +namespace dwarf { // DWARFDeclContext // // A class that represents a declaration context all the way down to a @@ -68,8 +70,8 @@ class DWARFDeclContext { // Same as GetQualifiedName, but the life time of the returned string will // be that of the LLDB session. - lldb_private::ConstString GetQualifiedNameAsConstString() const { - return lldb_private::ConstString(GetQualifiedName()); + ConstString GetQualifiedNameAsConstString() const { + return ConstString(GetQualifiedName()); } void Clear() { @@ -82,5 +84,7 @@ class DWARFDeclContext { collection m_entries; mutable std::string m_qualified_name; }; +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDECLCONTEXT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp index 4e99a295ce50f..9a88aed85e979 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp @@ -12,7 +12,8 @@ #include #include -namespace lldb_private { +namespace lldb_private::plugin { +namespace dwarf { const char *DW_TAG_value_to_name(uint32_t val) { static char invalid[100]; @@ -88,4 +89,5 @@ const char *DW_LNS_value_to_name(uint32_t val) { return llvmstr.data(); } -} // namespace lldb_private +} // namespace dwarf +} // namespace lldb_private::plugin diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h index 2afdbb47381a9..3ed92cc203bf8 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h @@ -12,7 +12,8 @@ #include "lldb/Core/dwarf.h" #include -namespace lldb_private { +namespace lldb_private::plugin { +namespace dwarf { typedef uint32_t DRC_class; // Holds DRC_* class bitfields @@ -30,6 +31,7 @@ const char *DW_LANG_value_to_name(uint32_t val); const char *DW_LNS_value_to_name(uint32_t val); -} // namespace lldb_private +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEFINES_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp index 6ca17dcf47ff7..0a7029a55c047 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp @@ -22,6 +22,7 @@ class DWARFUnit; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; void DWARFFormValue::Clear() { m_unit = nullptr; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h index 2a8843c1a0d45..445749a6aac3a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h @@ -13,6 +13,8 @@ #include #include +namespace lldb_private::plugin { +namespace dwarf { class DWARFUnit; class SymbolFileDWARF; class DWARFDIE; @@ -51,9 +53,8 @@ class DWARFFormValue { ValueType &ValueRef() { return m_value; } void SetValue(const ValueType &val) { m_value = val; } - void Dump(lldb_private::Stream &s) const; - bool ExtractValue(const lldb_private::DWARFDataExtractor &data, - lldb::offset_t *offset_ptr); + void Dump(Stream &s) const; + bool ExtractValue(const DWARFDataExtractor &data, lldb::offset_t *offset_ptr); const uint8_t *BlockData() const; static std::optional GetFixedSize(dw_form_t form, const DWARFUnit *u); @@ -68,10 +69,10 @@ class DWARFFormValue { const char *AsCString() const; dw_addr_t Address() const; bool IsValid() const { return m_form != 0; } - bool SkipValue(const lldb_private::DWARFDataExtractor &debug_info_data, + bool SkipValue(const DWARFDataExtractor &debug_info_data, lldb::offset_t *offset_ptr) const; static bool SkipValue(const dw_form_t form, - const lldb_private::DWARFDataExtractor &debug_info_data, + const DWARFDataExtractor &debug_info_data, lldb::offset_t *offset_ptr, const DWARFUnit *unit); static bool IsBlockForm(const dw_form_t form); static bool IsDataForm(const dw_form_t form); @@ -84,7 +85,9 @@ class DWARFFormValue { // It may be different from compile unit where m_value refers to. const DWARFUnit *m_unit = nullptr; // Unit for this form dw_form_t m_form = dw_form_t(0); // Form for this value - ValueType m_value; // Contains all data for the form + ValueType m_value; // Contains all data for the form }; +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFFORMVALUE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp index 779b52481b856..b1c323b101cef 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp @@ -17,6 +17,7 @@ using namespace lldb_private; using namespace lldb; +using namespace lldb_private::plugin::dwarf; DWARFIndex::~DWARFIndex() = default; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h index 13fe96dae2aa1..9aadeddbb2175 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h @@ -17,10 +17,11 @@ #include "lldb/Core/Module.h" #include "lldb/Target/Statistics.h" +namespace lldb_private::plugin { +namespace dwarf { class DWARFDeclContext; class DWARFDIE; -namespace lldb_private { class DWARFIndex { public: DWARFIndex(Module &module) : m_module(module) {} @@ -102,6 +103,7 @@ class DWARFIndex { void ReportInvalidDIERef(DIERef ref, llvm::StringRef name) const; }; -} // namespace lldb_private +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp index 87af7177ca95e..4f3a3f5446537 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp @@ -13,6 +13,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; void DWARFTypeUnit::Dump(Stream *s) const { s->Format("{0:x16}: Type Unit: length = {1:x8}, version = {2:x4}, " diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h index 5d939582a312e..7b58c632c6c5b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h @@ -14,13 +14,15 @@ namespace llvm { class DWARFAbbreviationDeclarationSet; -} +} // namespace llvm +namespace lldb_private::plugin { +namespace dwarf { class DWARFTypeUnit : public DWARFUnit { public: void BuildAddressRangeTable(DWARFDebugAranges *debug_aranges) override {} - void Dump(lldb_private::Stream *s) const override; + void Dump(Stream *s) const override; uint64_t GetTypeHash() { return m_header.GetTypeHash(); } @@ -37,5 +39,7 @@ class DWARFTypeUnit : public DWARFUnit { friend class DWARFUnit; }; +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFTYPEUNIT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index a09c68087c476..6f771c66a725c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -28,6 +28,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; extern int g_verbose; @@ -201,8 +202,8 @@ DWARFUnit::ScopedExtractDIEs::ScopedExtractDIEs(ScopedExtractDIEs &&rhs) rhs.m_cu = nullptr; } -DWARFUnit::ScopedExtractDIEs &DWARFUnit::ScopedExtractDIEs::operator=( - DWARFUnit::ScopedExtractDIEs &&rhs) { +DWARFUnit::ScopedExtractDIEs & +DWARFUnit::ScopedExtractDIEs::operator=(DWARFUnit::ScopedExtractDIEs &&rhs) { m_cu = rhs.m_cu; rhs.m_cu = nullptr; m_clear_dies = rhs.m_clear_dies; @@ -311,9 +312,9 @@ void DWARFUnit::ExtractDIEsRWLocked() { } if (!m_die_array.empty()) { - // The last die cannot have children (if it did, it wouldn't be the last one). - // This only makes a difference for malformed dwarf that does not have a - // terminating null die. + // The last die cannot have children (if it did, it wouldn't be the last + // one). This only makes a difference for malformed dwarf that does not have + // a terminating null die. m_die_array.back().SetHasChildren(false); if (m_first_die) { @@ -720,7 +721,7 @@ void DWARFUnit::ParseProducerInfo() { llvm::SmallVector matches; if (g_swiftlang_version_regex.Execute(producer, &matches)) { - m_producer_version.tryParse(matches[1]); + m_producer_version.tryParse(matches[1]); m_producer = eProducerSwift; } else if (producer.contains("clang")) { if (g_clang_version_regex.Execute(producer, &matches)) @@ -905,9 +906,10 @@ llvm::Error DWARFUnitHeader::ApplyIndexEntry( return llvm::Error::success(); } -llvm::Expected DWARFUnitHeader::extract( - const DWARFDataExtractor &data, DIERef::Section section, - lldb_private::DWARFContext &context, lldb::offset_t *offset_ptr) { +llvm::Expected +DWARFUnitHeader::extract(const DWARFDataExtractor &data, + DIERef::Section section, DWARFContext &context, + lldb::offset_t *offset_ptr) { DWARFUnitHeader header; header.m_offset = *offset_ptr; header.m_length = data.GetDWARFInitialLength(offset_ptr); @@ -1086,22 +1088,20 @@ DWARFUnit::FindRnglistFromOffset(dw_offset_t offset) { return ranges; } -llvm::Expected -DWARFUnit::FindRnglistFromIndex(uint32_t index) { +llvm::Expected DWARFUnit::FindRnglistFromIndex(uint32_t index) { llvm::Expected maybe_offset = GetRnglistOffset(index); if (!maybe_offset) return maybe_offset.takeError(); return FindRnglistFromOffset(*maybe_offset); } - bool DWARFUnit::HasAny(llvm::ArrayRef tags) { ExtractUnitDIEIfNeeded(); if (m_dwo) return m_dwo->HasAny(tags); - for (const auto &die: m_die_array) { - for (const auto tag: tags) { + for (const auto &die : m_die_array) { + for (const auto tag : tags) { if (tag == die.Tag()) return true; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index 20871d805e77a..3aef03712d00d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -19,6 +19,8 @@ #include #include +namespace lldb_private::plugin { +namespace dwarf { class DWARFUnit; class DWARFCompileUnit; class NameToDIE; @@ -78,21 +80,21 @@ class DWARFUnitHeader { llvm::Error ApplyIndexEntry(const llvm::DWARFUnitIndex::Entry *index_entry); - static llvm::Expected - extract(const lldb_private::DWARFDataExtractor &data, DIERef::Section section, - lldb_private::DWARFContext &dwarf_context, - lldb::offset_t *offset_ptr); + static llvm::Expected extract(const DWARFDataExtractor &data, + DIERef::Section section, + DWARFContext &dwarf_context, + lldb::offset_t *offset_ptr); }; -class DWARFUnit : public lldb_private::UserID { +class DWARFUnit : public UserID { using die_iterator_range = llvm::iterator_range; public: static llvm::Expected extract(SymbolFileDWARF &dwarf2Data, lldb::user_id_t uid, - const lldb_private::DWARFDataExtractor &debug_info, - DIERef::Section section, lldb::offset_t *offset_ptr); + const DWARFDataExtractor &debug_info, DIERef::Section section, + lldb::offset_t *offset_ptr); virtual ~DWARFUnit(); bool IsDWOUnit() { return m_is_dwo; } @@ -104,6 +106,7 @@ class DWARFUnit : public lldb_private::UserID { class ScopedExtractDIEs { DWARFUnit *m_cu; + public: bool m_clear_dies = false; ScopedExtractDIEs(DWARFUnit &cu); @@ -115,8 +118,8 @@ class DWARFUnit : public lldb_private::UserID { }; ScopedExtractDIEs ExtractDIEsScoped(); - bool Verify(lldb_private::Stream *s) const; - virtual void Dump(lldb_private::Stream *s) const = 0; + bool Verify(Stream *s) const; + virtual void Dump(Stream *s) const = 0; /// Get the data that contains the DIE information for this unit. /// /// This will return the correct bytes that contain the data for @@ -125,7 +128,7 @@ class DWARFUnit : public lldb_private::UserID { /// /// \return /// The correct data for the DIE information in this unit. - const lldb_private::DWARFDataExtractor &GetData() const; + const DWARFDataExtractor &GetData() const; /// Get the size in bytes of the unit header. /// @@ -210,10 +213,10 @@ class DWARFUnit : public lldb_private::UserID { bool GetIsOptimized(); - const lldb_private::FileSpec &GetCompilationDirectory(); - const lldb_private::FileSpec &GetAbsolutePath(); - lldb_private::FileSpec GetFile(size_t file_idx); - lldb_private::FileSpec::Style GetPathStyle(); + const FileSpec &GetCompilationDirectory(); + const FileSpec &GetAbsolutePath(); + FileSpec GetFile(size_t file_idx); + FileSpec::Style GetPathStyle(); SymbolFileDWARFDwo *GetDwoSymbolFile(); @@ -227,7 +230,9 @@ class DWARFUnit : public lldb_private::UserID { uint8_t GetUnitType() const { return m_header.GetUnitType(); } bool IsTypeUnit() const { return m_header.IsTypeUnit(); } /// Note that this check only works for DWARF5+. - bool IsSkeletonUnit() const { return GetUnitType() == llvm::dwarf::DW_UT_skeleton; } + bool IsSkeletonUnit() const { + return GetUnitType() == llvm::dwarf::DW_UT_skeleton; + } std::optional GetStringOffsetSectionItem(uint32_t index) const; @@ -259,9 +264,9 @@ class DWARFUnit : public lldb_private::UserID { /// Return the location table for parsing the given location list data. The /// format is chosen according to the unit type. Never returns null. std::unique_ptr - GetLocationTable(const lldb_private::DataExtractor &data) const; + GetLocationTable(const DataExtractor &data) const; - lldb_private::DWARFDataExtractor GetLocationData() const; + DWARFDataExtractor GetLocationData() const; /// Returns true if any DIEs in the unit match any DW_TAG values in \a tags. /// @@ -272,7 +277,6 @@ class DWARFUnit : public lldb_private::UserID { /// True if any DIEs match any tag in \a tags, false otherwise. bool HasAny(llvm::ArrayRef tags); - /// Get the fission .dwo file specific error for this compile unit. /// /// The skeleton compile unit only can have a DWO error. Any other type @@ -281,7 +285,7 @@ class DWARFUnit : public lldb_private::UserID { /// \returns /// A valid DWO error if there is a problem with anything in the /// locating or parsing inforamtion in the .dwo file - const lldb_private::Status &GetDwoError() const { return m_dwo_error; } + const Status &GetDwoError() const { return m_dwo_error; } /// Set the fission .dwo file specific error for this compile unit. /// @@ -289,7 +293,7 @@ class DWARFUnit : public lldb_private::UserID { /// .dwo file. Things like a missing .dwo file, DWO ID mismatch, and other /// .dwo errors can be stored in each compile unit so the issues can be /// communicated to the user. - void SetDwoError(const lldb_private::Status &error) { m_dwo_error = error; } + void SetDwoError(const Status &error) { m_dwo_error = error; } protected: DWARFUnit(SymbolFileDWARF &dwarf, lldb::user_id_t uid, @@ -298,7 +302,7 @@ class DWARFUnit : public lldb_private::UserID { DIERef::Section section, bool is_dwo); llvm::Error ExtractHeader(SymbolFileDWARF &dwarf, - const lldb_private::DWARFDataExtractor &data, + const DWARFDataExtractor &data, lldb::offset_t *offset_ptr); // Get the DWARF unit DWARF debug information entry. Parse the single DIE @@ -321,7 +325,7 @@ class DWARFUnit : public lldb_private::UserID { const std::optional &GetRnglistTable(); - lldb_private::DWARFDataExtractor GetRnglistData() const; + DWARFDataExtractor GetRnglistData() const; SymbolFileDWARF &m_dwarf; std::shared_ptr m_dwo; @@ -348,12 +352,12 @@ class DWARFUnit : public lldb_private::UserID { DWARFProducer m_producer = eProducerInvalid; llvm::VersionTuple m_producer_version; std::optional m_language_type; - lldb_private::LazyBool m_is_optimized = lldb_private::eLazyBoolCalculate; - std::optional m_comp_dir; - std::optional m_file_spec; - std::optional m_addr_base; ///< Value of DW_AT_addr_base. - dw_addr_t m_loclists_base = 0; ///< Value of DW_AT_loclists_base. - dw_addr_t m_ranges_base = 0; ///< Value of DW_AT_rnglists_base. + LazyBool m_is_optimized = eLazyBoolCalculate; + std::optional m_comp_dir; + std::optional m_file_spec; + std::optional m_addr_base; ///< Value of DW_AT_addr_base. + dw_addr_t m_loclists_base = 0; ///< Value of DW_AT_loclists_base. + dw_addr_t m_ranges_base = 0; ///< Value of DW_AT_rnglists_base. std::optional m_gnu_addr_base; std::optional m_gnu_ranges_base; @@ -374,7 +378,7 @@ class DWARFUnit : public lldb_private::UserID { /// If we get an error when trying to load a .dwo file, save that error here. /// Errors include .dwo/.dwp file not found, or the .dwp/.dwp file was found /// but DWO ID doesn't match, etc. - lldb_private::Status m_dwo_error; + Status m_dwo_error; private: void ParseProducerInfo(); @@ -390,5 +394,7 @@ class DWARFUnit : public lldb_private::UserID { DWARFUnit(const DWARFUnit &) = delete; const DWARFUnit &operator=(const DWARFUnit &) = delete; }; +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFUNIT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp index af2d6c554140b..292ea2806c59d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp @@ -18,6 +18,7 @@ using namespace lldb_private; using namespace lldb; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; llvm::Expected> DebugNamesDWARFIndex::Create(Module &module, DWARFDataExtractor debug_names, @@ -227,7 +228,7 @@ void DebugNamesDWARFIndex::GetNamespaces( ConstString name, llvm::function_ref callback) { for (const DebugNames::Entry &entry : m_debug_names_up->equal_range(name.GetStringRef())) { - dwarf::Tag entry_tag = entry.tag(); + lldb_private::dwarf::Tag entry_tag = entry.tag(); if (entry_tag == DW_TAG_namespace || entry_tag == DW_TAG_imported_declaration) { if (!ProcessEntry(entry, callback)) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h index abbd700f1603f..7ce630a56137d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h @@ -17,7 +17,8 @@ #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include -namespace lldb_private { +namespace lldb_private::plugin { +namespace dwarf { class DebugNamesDWARFIndex : public DWARFIndex { public: static llvm::Expected> @@ -89,6 +90,7 @@ class DebugNamesDWARFIndex : public DWARFIndex { static llvm::DenseSet GetUnits(const DebugNames &debug_names); }; -} // namespace lldb_private +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DEBUGNAMESDWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp index 90f18c96afa23..16ff5f7d4842c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp @@ -28,6 +28,7 @@ using namespace lldb_private; using namespace lldb; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; void ManualDWARFIndex::Index() { if (m_indexed) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h index d95cf501face8..0126e587e52d8 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h @@ -13,10 +13,11 @@ #include "Plugins/SymbolFile/DWARF/NameToDIE.h" #include "llvm/ADT/DenseSet.h" +namespace lldb_private::plugin { +namespace dwarf { class DWARFDebugInfo; class SymbolFileDWARFDwo; -namespace lldb_private { class ManualDWARFIndex : public DWARFIndex { public: ManualDWARFIndex(Module &module, SymbolFileDWARF &dwarf, @@ -173,6 +174,7 @@ class ManualDWARFIndex : public DWARFIndex { IndexSet m_set; bool m_indexed = false; }; -} // namespace lldb_private +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_MANUALDWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp index 89e628f5eaf1c..44d90648700cf 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp @@ -20,6 +20,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; void NameToDIE::Finalize() { m_map.Sort(std::less()); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h index 61df1a628ab59..90eac1fa37338 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h @@ -16,6 +16,8 @@ #include "lldb/Core/dwarf.h" #include "lldb/lldb-defines.h" +namespace lldb_private::plugin { +namespace dwarf { class DWARFUnit; class NameToDIE { @@ -24,18 +26,18 @@ class NameToDIE { ~NameToDIE() = default; - void Dump(lldb_private::Stream *s); + void Dump(Stream *s); - void Insert(lldb_private::ConstString name, const DIERef &die_ref); + void Insert(ConstString name, const DIERef &die_ref); void Append(const NameToDIE &other); void Finalize(); - bool Find(lldb_private::ConstString name, + bool Find(ConstString name, llvm::function_ref callback) const; - bool Find(const lldb_private::RegularExpression ®ex, + bool Find(const RegularExpression ®ex, llvm::function_ref callback) const; /// \a unit must be the skeleton unit if possible, not GetNonSkeletonUnit(). @@ -44,8 +46,7 @@ class NameToDIE { llvm::function_ref callback) const; void - ForEach(std::function const + ForEach(std::function const &callback) const; /// Decode a serialized version of this object from data. @@ -61,9 +62,8 @@ class NameToDIE { /// All strings in cache files are put into string tables for efficiency /// and cache file size reduction. Strings are stored as uint32_t string /// table offsets in the cache data. - bool Decode(const lldb_private::DataExtractor &data, - lldb::offset_t *offset_ptr, - const lldb_private::StringTableReader &strtab); + bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, + const StringTableReader &strtab); /// Encode this object into a data encoder object. /// @@ -76,8 +76,7 @@ class NameToDIE { /// All strings in cache files are put into string tables for efficiency /// and cache file size reduction. Strings are stored as uint32_t string /// table offsets in the cache data. - void Encode(lldb_private::DataEncoder &encoder, - lldb_private::ConstStringTable &strtab) const; + void Encode(DataEncoder &encoder, ConstStringTable &strtab) const; /// Used for unit testing the encoding and decoding. bool operator==(const NameToDIE &rhs) const; @@ -87,7 +86,9 @@ class NameToDIE { void Clear() { m_map.Clear(); } protected: - lldb_private::UniqueCStringMap m_map; + UniqueCStringMap m_map; }; +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_NAMETODIE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index f52a095bf1675..737c65d0712e0 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -99,6 +99,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; LLDB_PLUGIN_DEFINE(SymbolFileDWARF) @@ -138,9 +139,8 @@ static PluginProperties &GetGlobalPluginProperties() { } static const llvm::DWARFDebugLine::LineTable * -ParseLLVMLineTable(lldb_private::DWARFContext &context, - llvm::DWARFDebugLine &line, dw_offset_t line_offset, - dw_offset_t unit_offset) { +ParseLLVMLineTable(DWARFContext &context, llvm::DWARFDebugLine &line, + dw_offset_t line_offset, dw_offset_t unit_offset) { Log *log = GetLog(DWARFLog::DebugInfo); llvm::DWARFDataExtractor data = context.getOrLoadLineData().GetAsLLVMDWARF(); @@ -161,7 +161,7 @@ ParseLLVMLineTable(lldb_private::DWARFContext &context, return *line_table; } -static bool ParseLLVMLineTablePrologue(lldb_private::DWARFContext &context, +static bool ParseLLVMLineTablePrologue(DWARFContext &context, llvm::DWARFDebugLine::Prologue &prologue, dw_offset_t line_offset, dw_offset_t unit_offset) { @@ -2429,7 +2429,7 @@ bool SymbolFileDWARF::DIEInDeclContext(const CompilerDeclContext &decl_ctx, // ...But if we are only checking root decl contexts, confirm that the // 'die' is a top-level context. if (only_root_namespaces) - return die.GetParent().Tag() == dwarf::DW_TAG_compile_unit; + return die.GetParent().Tag() == llvm::dwarf::DW_TAG_compile_unit; return true; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index a32c0609d3fdb..8ba7cd34f43e0 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -39,6 +39,14 @@ #include "DWARFIndex.h" #include "UniqueDWARFASTType.h" +class DWARFASTParserClang; + +namespace llvm { +class DWARFDebugAbbrev; +} // namespace llvm + +namespace lldb_private::plugin { +namespace dwarf { // Forward Declarations for this DWARF plugin class DebugMapModule; class DWARFCompileUnit; @@ -53,15 +61,10 @@ class DWARFTypeUnit; class SymbolFileDWARFDebugMap; class SymbolFileDWARFDwo; class SymbolFileDWARFDwp; -class UserID; - -namespace llvm { -class DWARFDebugAbbrev; -} -#define DIE_IS_BEING_PARSED ((lldb_private::Type *)1) +#define DIE_IS_BEING_PARSED ((Type *)1) -class SymbolFileDWARF : public lldb_private::SymbolFileCommon { +class SymbolFileDWARF : public SymbolFileCommon { /// LLVM RTTI support. static char ID; @@ -79,26 +82,24 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { friend class DebugMapModule; friend class DWARFCompileUnit; friend class DWARFDIE; - friend class DWARFASTParserClang; + friend class ::DWARFASTParserClang; // Static Functions static void Initialize(); static void Terminate(); - static void DebuggerInitialize(lldb_private::Debugger &debugger); + static void DebuggerInitialize(Debugger &debugger); static llvm::StringRef GetPluginNameStatic() { return "dwarf"; } static llvm::StringRef GetPluginDescriptionStatic(); - static lldb_private::SymbolFile * - CreateInstance(lldb::ObjectFileSP objfile_sp); + static SymbolFile *CreateInstance(lldb::ObjectFileSP objfile_sp); // Constructors and Destructors - SymbolFileDWARF(lldb::ObjectFileSP objfile_sp, - lldb_private::SectionList *dwo_section_list); + SymbolFileDWARF(lldb::ObjectFileSP objfile_sp, SectionList *dwo_section_list); ~SymbolFileDWARF() override; @@ -108,118 +109,99 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { // Compile Unit function calls - lldb::LanguageType - ParseLanguage(lldb_private::CompileUnit &comp_unit) override; + lldb::LanguageType ParseLanguage(CompileUnit &comp_unit) override; - lldb_private::XcodeSDK - ParseXcodeSDK(lldb_private::CompileUnit &comp_unit) override; + XcodeSDK ParseXcodeSDK(CompileUnit &comp_unit) override; - size_t ParseFunctions(lldb_private::CompileUnit &comp_unit) override; + size_t ParseFunctions(CompileUnit &comp_unit) override; - bool ParseLineTable(lldb_private::CompileUnit &comp_unit) override; + bool ParseLineTable(CompileUnit &comp_unit) override; - bool ParseDebugMacros(lldb_private::CompileUnit &comp_unit) override; + bool ParseDebugMacros(CompileUnit &comp_unit) override; - bool ForEachExternalModule( - lldb_private::CompileUnit &, llvm::DenseSet &, - llvm::function_ref) override; + bool ForEachExternalModule(CompileUnit &, llvm::DenseSet &, + llvm::function_ref) override; - bool ParseSupportFiles(lldb_private::CompileUnit &comp_unit, - lldb_private::FileSpecList &support_files) override; + bool ParseSupportFiles(CompileUnit &comp_unit, + FileSpecList &support_files) override; - bool ParseIsOptimized(lldb_private::CompileUnit &comp_unit) override; + bool ParseIsOptimized(CompileUnit &comp_unit) override; - size_t ParseTypes(lldb_private::CompileUnit &comp_unit) override; + size_t ParseTypes(CompileUnit &comp_unit) override; - bool ParseImportedModules( - const lldb_private::SymbolContext &sc, - std::vector &imported_modules) override; + bool + ParseImportedModules(const SymbolContext &sc, + std::vector &imported_modules) override; - size_t ParseBlocksRecursive(lldb_private::Function &func) override; + size_t ParseBlocksRecursive(Function &func) override; - size_t - ParseVariablesForContext(const lldb_private::SymbolContext &sc) override; + size_t ParseVariablesForContext(const SymbolContext &sc) override; - lldb_private::Type *ResolveTypeUID(lldb::user_id_t type_uid) override; - std::optional GetDynamicArrayInfoForUID( - lldb::user_id_t type_uid, - const lldb_private::ExecutionContext *exe_ctx) override; + Type *ResolveTypeUID(lldb::user_id_t type_uid) override; + std::optional + GetDynamicArrayInfoForUID(lldb::user_id_t type_uid, + const ExecutionContext *exe_ctx) override; - bool CompleteType(lldb_private::CompilerType &compiler_type) override; + bool CompleteType(CompilerType &compiler_type) override; - lldb_private::Type *ResolveType(const DWARFDIE &die, - bool assert_not_being_parsed = true, - bool resolve_function_context = false); + Type *ResolveType(const DWARFDIE &die, bool assert_not_being_parsed = true, + bool resolve_function_context = false); - lldb_private::CompilerDecl GetDeclForUID(lldb::user_id_t uid) override; + CompilerDecl GetDeclForUID(lldb::user_id_t uid) override; - lldb_private::CompilerDeclContext - GetDeclContextForUID(lldb::user_id_t uid) override; + CompilerDeclContext GetDeclContextForUID(lldb::user_id_t uid) override; - lldb_private::CompilerDeclContext - GetDeclContextContainingUID(lldb::user_id_t uid) override; + CompilerDeclContext GetDeclContextContainingUID(lldb::user_id_t uid) override; - void - ParseDeclsForContext(lldb_private::CompilerDeclContext decl_ctx) override; + void ParseDeclsForContext(CompilerDeclContext decl_ctx) override; - uint32_t ResolveSymbolContext(const lldb_private::Address &so_addr, + uint32_t ResolveSymbolContext(const Address &so_addr, lldb::SymbolContextItem resolve_scope, - lldb_private::SymbolContext &sc) override; - - lldb_private::Status - CalculateFrameVariableError(lldb_private::StackFrame &frame) override; + SymbolContext &sc) override; - uint32_t ResolveSymbolContext( - const lldb_private::SourceLocationSpec &src_location_spec, - lldb::SymbolContextItem resolve_scope, - lldb_private::SymbolContextList &sc_list) override; + Status CalculateFrameVariableError(StackFrame &frame) override; - void - FindGlobalVariables(lldb_private::ConstString name, - const lldb_private::CompilerDeclContext &parent_decl_ctx, - uint32_t max_matches, - lldb_private::VariableList &variables) override; + uint32_t ResolveSymbolContext(const SourceLocationSpec &src_location_spec, + lldb::SymbolContextItem resolve_scope, + SymbolContextList &sc_list) override; - void FindGlobalVariables(const lldb_private::RegularExpression ®ex, + void FindGlobalVariables(ConstString name, + const CompilerDeclContext &parent_decl_ctx, uint32_t max_matches, - lldb_private::VariableList &variables) override; + VariableList &variables) override; - void FindFunctions(const lldb_private::Module::LookupInfo &lookup_info, - const lldb_private::CompilerDeclContext &parent_decl_ctx, - bool include_inlines, - lldb_private::SymbolContextList &sc_list) override; + void FindGlobalVariables(const RegularExpression ®ex, uint32_t max_matches, + VariableList &variables) override; - void FindFunctions(const lldb_private::RegularExpression ®ex, - bool include_inlines, - lldb_private::SymbolContextList &sc_list) override; + void FindFunctions(const Module::LookupInfo &lookup_info, + const CompilerDeclContext &parent_decl_ctx, + bool include_inlines, SymbolContextList &sc_list) override; - void GetMangledNamesForFunction( - const std::string &scope_qualified_name, - std::vector &mangled_names) override; + void FindFunctions(const RegularExpression ®ex, bool include_inlines, + SymbolContextList &sc_list) override; void - FindTypes(lldb_private::ConstString name, - const lldb_private::CompilerDeclContext &parent_decl_ctx, - uint32_t max_matches, - llvm::DenseSet &searched_symbol_files, - lldb_private::TypeMap &types) override; - - void FindTypes(llvm::ArrayRef pattern, - lldb_private::LanguageSet languages, + GetMangledNamesForFunction(const std::string &scope_qualified_name, + std::vector &mangled_names) override; + + void FindTypes(ConstString name, const CompilerDeclContext &parent_decl_ctx, + uint32_t max_matches, llvm::DenseSet &searched_symbol_files, - lldb_private::TypeMap &types) override; + TypeMap &types) override; - void GetTypes(lldb_private::SymbolContextScope *sc_scope, - lldb::TypeClass type_mask, - lldb_private::TypeList &type_list) override; + void FindTypes(llvm::ArrayRef pattern, LanguageSet languages, + llvm::DenseSet &searched_symbol_files, + TypeMap &types) override; + + void GetTypes(SymbolContextScope *sc_scope, lldb::TypeClass type_mask, + TypeList &type_list) override; llvm::Expected GetTypeSystemForLanguage(lldb::LanguageType language) override; - lldb_private::CompilerDeclContext - FindNamespace(lldb_private::ConstString name, - const lldb_private::CompilerDeclContext &parent_decl_ctx, - bool only_root_namespaces) override; + CompilerDeclContext FindNamespace(ConstString name, + const CompilerDeclContext &parent_decl_ctx, + bool only_root_namespaces) override; void PreloadSymbols() override; @@ -239,25 +221,22 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { DWARFDIE GetDeclContextDIEContainingDIE(const DWARFDIE &die); - bool - HasForwardDeclForClangType(const lldb_private::CompilerType &compiler_type); + bool HasForwardDeclForClangType(const CompilerType &compiler_type); - lldb_private::CompileUnit * - GetCompUnitForDWARFCompUnit(DWARFCompileUnit &dwarf_cu); + CompileUnit *GetCompUnitForDWARFCompUnit(DWARFCompileUnit &dwarf_cu); - virtual void GetObjCMethods(lldb_private::ConstString class_name, + virtual void GetObjCMethods(ConstString class_name, llvm::function_ref callback); bool Supports_DW_AT_APPLE_objc_complete_type(DWARFUnit *cu); - lldb_private::DebugMacrosSP ParseDebugMacros(lldb::offset_t *offset); + DebugMacrosSP ParseDebugMacros(lldb::offset_t *offset); static DWARFDIE GetParentSymbolContextDIE(const DWARFDIE &die); - lldb::ModuleSP GetExternalModule(lldb_private::ConstString name); + lldb::ModuleSP GetExternalModule(ConstString name); - typedef std::map - ExternalTypeModuleMap; + typedef std::map ExternalTypeModuleMap; /// Return the list of Clang modules imported by this SymbolFile. const ExternalTypeModuleMap &getExternalTypeModules() const { @@ -275,26 +254,25 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { /// If this is a DWARF object with a single CU, return its DW_AT_dwo_id. std::optional GetDWOId(); - static bool - DIEInDeclContext(const lldb_private::CompilerDeclContext &parent_decl_ctx, - const DWARFDIE &die, bool only_root_namespaces = false); + static bool DIEInDeclContext(const CompilerDeclContext &parent_decl_ctx, + const DWARFDIE &die, + bool only_root_namespaces = false); - std::vector> - ParseCallEdgesInFunction(lldb_private::UserID func_id) override; + std::vector> + ParseCallEdgesInFunction(UserID func_id) override; - void Dump(lldb_private::Stream &s) override; + void Dump(Stream &s) override; - void DumpClangAST(lldb_private::Stream &s) override; + void DumpClangAST(Stream &s) override; /// List separate dwo files. - bool - GetSeparateDebugInfo(lldb_private::StructuredData::Dictionary &d) override; + bool GetSeparateDebugInfo(StructuredData::Dictionary &d) override; - lldb_private::DWARFContext &GetDWARFContext() { return m_context; } + DWARFContext &GetDWARFContext() { return m_context; } const std::shared_ptr &GetDwpSymbolFile(); - lldb_private::FileSpec GetFile(DWARFUnit &unit, size_t file_idx); + FileSpec GetFile(DWARFUnit &unit, size_t file_idx); static llvm::Expected GetTypeSystem(DWARFUnit &unit); @@ -302,12 +280,11 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { // CompilerDecl related functions - static lldb_private::CompilerDecl GetDecl(const DWARFDIE &die); + static CompilerDecl GetDecl(const DWARFDIE &die); - static lldb_private::CompilerDeclContext GetDeclContext(const DWARFDIE &die); + static CompilerDeclContext GetDeclContext(const DWARFDIE &die); - static lldb_private::CompilerDeclContext - GetContainingDeclContext(const DWARFDIE &die); + static CompilerDeclContext GetContainingDeclContext(const DWARFDIE &die); static DWARFDeclContext GetDWARFDeclContext(const DWARFDIE &die); @@ -317,39 +294,34 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { /// Same as GetLanguage() but reports all C++ versions as C++ (no version). static lldb::LanguageType GetLanguageFamily(DWARFUnit &unit); - lldb_private::StatsDuration::Duration GetDebugInfoParseTime() override { + StatsDuration::Duration GetDebugInfoParseTime() override { return m_parse_time; } - lldb_private::StatsDuration::Duration GetDebugInfoIndexTime() override; + StatsDuration::Duration GetDebugInfoIndexTime() override; - lldb_private::StatsDuration &GetDebugInfoParseTimeRef() { - return m_parse_time; - } + StatsDuration &GetDebugInfoParseTimeRef() { return m_parse_time; } virtual lldb::offset_t - GetVendorDWARFOpcodeSize(const lldb_private::DataExtractor &data, + GetVendorDWARFOpcodeSize(const DataExtractor &data, const lldb::offset_t data_offset, const uint8_t op) const { return LLDB_INVALID_OFFSET; } - virtual bool - ParseVendorDWARFOpcode(uint8_t op, const lldb_private::DataExtractor &opcodes, - lldb::offset_t &offset, - std::vector &stack) const { + virtual bool ParseVendorDWARFOpcode(uint8_t op, const DataExtractor &opcodes, + lldb::offset_t &offset, + std::vector &stack) const { return false; } - lldb_private::ConstString ConstructFunctionDemangledName(const DWARFDIE &die); + ConstString ConstructFunctionDemangledName(const DWARFDIE &die); std::optional GetFileIndex() const { return m_file_index; } void SetFileIndex(std::optional file_index) { m_file_index = file_index; } -protected: - typedef llvm::DenseMap - DIEToTypePtr; + typedef llvm::DenseMap DIEToTypePtr; typedef llvm::DenseMap DIEToVariableSP; typedef llvm::DenseMap variable_dies, lldb::addr_t func_low_pc); @@ -434,25 +401,22 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { // Given a die_offset, figure out the symbol context representing that die. bool ResolveFunction(const DWARFDIE &die, bool include_inlines, - lldb_private::SymbolContextList &sc_list); + SymbolContextList &sc_list); /// Resolve functions and (possibly) blocks for the given file address and a /// compile unit. The compile unit comes from the sc argument and it must be /// set. The results of the lookup (if any) are written back to the symbol /// context. void ResolveFunctionAndBlock(lldb::addr_t file_vm_addr, bool lookup_block, - lldb_private::SymbolContext &sc); + SymbolContext &sc); virtual lldb::TypeSP FindDefinitionTypeForDWARFDeclContext(const DWARFDIE &die); - virtual lldb::TypeSP - FindCompleteObjCDefinitionTypeForDIE(const DWARFDIE &die, - lldb_private::ConstString type_name, - bool must_be_implementation); + virtual lldb::TypeSP FindCompleteObjCDefinitionTypeForDIE( + const DWARFDIE &die, ConstString type_name, bool must_be_implementation); - lldb_private::Symbol * - GetObjCClassSymbol(lldb_private::ConstString objc_class_name); + Symbol *GetObjCClassSymbol(ConstString objc_class_name); lldb::TypeSP GetTypeForDIE(const DWARFDIE &die, bool resolve_function_context = false); @@ -475,12 +439,11 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { bool DIEDeclContextsMatch(const DWARFDIE &die1, const DWARFDIE &die2); - bool ClassContainsSelector(const DWARFDIE &class_die, - lldb_private::ConstString selector); + bool ClassContainsSelector(const DWARFDIE &class_die, ConstString selector); /// Parse call site entries (DW_TAG_call_site), including any nested call site /// parameters (DW_TAG_call_site_parameter). - std::vector> + std::vector> CollectCallEdges(lldb::ModuleSP module, DWARFDIE function_die); /// If this symbol file is linked to by a debug map (see @@ -490,16 +453,15 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { /// needed, on success and LLDB_INVALID_ADDRESS otherwise. lldb::addr_t FixupAddress(lldb::addr_t file_addr); - bool FixupAddress(lldb_private::Address &addr); + bool FixupAddress(Address &addr); - typedef llvm::SetVector TypeSet; + typedef llvm::SetVector TypeSet; void GetTypes(const DWARFDIE &die, dw_offset_t min_die_offset, dw_offset_t max_die_offset, uint32_t type_mask, TypeSet &type_set); - typedef lldb_private::RangeDataVector + typedef RangeDataVector GlobalVariableMap; GlobalVariableMap &GetGlobalAranges(); @@ -523,15 +485,14 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { void FindDwpSymbolFile(); - const lldb_private::FileSpecList &GetTypeUnitSupportFiles(DWARFTypeUnit &tu); + const FileSpecList &GetTypeUnitSupportFiles(DWARFTypeUnit &tu); - void InitializeFirstCodeAddressRecursive( - const lldb_private::SectionList §ion_list); + void InitializeFirstCodeAddressRecursive(const SectionList §ion_list); void InitializeFirstCodeAddress(); - void GetCompileOptions( - std::unordered_map &args) override; + void + GetCompileOptions(std::unordered_map &args) override; lldb::ModuleWP m_debug_map_module_wp; SymbolFileDWARFDebugMap *m_debug_map_symfile; @@ -539,7 +500,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { llvm::once_flag m_dwp_symfile_once_flag; std::shared_ptr m_dwp_symfile; - lldb_private::DWARFContext m_context; + DWARFContext m_context; llvm::once_flag m_info_once_flag; std::unique_ptr m_info; @@ -547,14 +508,13 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { std::unique_ptr m_abbr; std::unique_ptr m_global_aranges_up; - typedef std::unordered_map - DebugMacrosMap; + typedef std::unordered_map DebugMacrosMap; DebugMacrosMap m_debug_macros_map; ExternalTypeModuleMap m_external_type_modules; - std::unique_ptr m_index; + std::unique_ptr m_index; bool m_fetched_external_modules : 1; - lldb_private::LazyBool m_supports_DW_AT_APPLE_objc_complete_type; + LazyBool m_supports_DW_AT_APPLE_objc_complete_type; typedef std::set DIERefSet; typedef llvm::StringMap NameToOffsetMap; @@ -565,8 +525,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { DIEToVariableSP m_die_to_variable_sp; DIEToClangType m_forward_decl_die_to_clang_type; ClangTypeToDIE m_forward_decl_clang_type_to_die; - llvm::DenseMap - m_type_unit_support_files; + llvm::DenseMap m_type_unit_support_files; std::vector m_lldb_cu_to_dwarf_unit; /// DWARF does not provide a good way for traditional (concatenating) linkers /// to invalidate debug info describing dead-stripped code. These linkers will @@ -575,7 +534,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { /// Try to filter out this debug info by comparing it to the lowest code /// address in the module. lldb::addr_t m_first_code_address = LLDB_INVALID_ADDRESS; - lldb_private::StatsDuration m_parse_time; + StatsDuration m_parse_time; std::atomic_flag m_dwo_warning_issued = ATOMIC_FLAG_INIT; /// If this DWARF file a .DWO file or a DWARF .o file on mac when /// no dSYM file is being used, this file index will be set to a @@ -583,5 +542,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { /// an index that identifies the .DWO or .o file. std::optional m_file_index; }; +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_SYMBOLFILEDWARF_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp index 4e194939814b6..f789cbac9a717 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp @@ -43,6 +43,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; char SymbolFileDWARFDebugMap::ID; @@ -167,6 +168,8 @@ SymbolFileDWARFDebugMap::CompileUnitInfo::GetFileRangeMap( return file_range_map; } +namespace lldb_private::plugin { +namespace dwarf { class DebugMapModule : public Module { public: DebugMapModule(const ModuleSP &exe_module_sp, uint32_t cu_idx, @@ -223,6 +226,8 @@ class DebugMapModule : public Module { ModuleWP m_exe_module_wp; const uint32_t m_cu_idx; }; +} // namespace dwarf +} // namespace lldb_private::plugin void SymbolFileDWARFDebugMap::Initialize() { PluginManager::RegisterPlugin(GetPluginNameStatic(), diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h index 0dc4235cf090f..52fa1dca3da5f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h @@ -21,12 +21,16 @@ #include "UniqueDWARFASTType.h" #include "lldb/Utility/StructuredData.h" +class DWARFASTParserClang; + +namespace lldb_private::plugin { +namespace dwarf { class SymbolFileDWARF; class DWARFCompileUnit; class DWARFDebugAranges; class DWARFDeclContext; -class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { +class SymbolFileDWARFDebugMap : public SymbolFileCommon { /// LLVM RTTI support. static char ID; @@ -48,8 +52,7 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { static llvm::StringRef GetPluginDescriptionStatic(); - static lldb_private::SymbolFile * - CreateInstance(lldb::ObjectFileSP objfile_sp); + static SymbolFile *CreateInstance(lldb::ObjectFileSP objfile_sp); // Constructors and Destructors SymbolFileDWARFDebugMap(lldb::ObjectFileSP objfile_sp); @@ -59,114 +62,94 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { void InitializeObject() override; // Compile Unit function calls - lldb::LanguageType - ParseLanguage(lldb_private::CompileUnit &comp_unit) override; - lldb_private::XcodeSDK - ParseXcodeSDK(lldb_private::CompileUnit &comp_unit) override; + lldb::LanguageType ParseLanguage(CompileUnit &comp_unit) override; + XcodeSDK ParseXcodeSDK(CompileUnit &comp_unit) override; llvm::SmallSet - ParseAllLanguages(lldb_private::CompileUnit &comp_unit) override; - size_t ParseFunctions(lldb_private::CompileUnit &comp_unit) override; - bool ParseLineTable(lldb_private::CompileUnit &comp_unit) override; - bool ParseDebugMacros(lldb_private::CompileUnit &comp_unit) override; - - bool ForEachExternalModule( - lldb_private::CompileUnit &, llvm::DenseSet &, - llvm::function_ref) override; - - bool ParseSupportFiles(lldb_private::CompileUnit &comp_unit, - lldb_private::FileSpecList &support_files) override; - - bool ParseIsOptimized(lldb_private::CompileUnit &comp_unit) override; - - size_t ParseTypes(lldb_private::CompileUnit &comp_unit) override; - - bool ParseImportedModules( - const lldb_private::SymbolContext &sc, - std::vector &imported_modules) override; - size_t ParseBlocksRecursive(lldb_private::Function &func) override; - size_t - ParseVariablesForContext(const lldb_private::SymbolContext &sc) override; - - lldb_private::Type *ResolveTypeUID(lldb::user_id_t type_uid) override; - std::optional GetDynamicArrayInfoForUID( - lldb::user_id_t type_uid, - const lldb_private::ExecutionContext *exe_ctx) override; - - lldb_private::CompilerDeclContext - GetDeclContextForUID(lldb::user_id_t uid) override; - lldb_private::CompilerDeclContext - GetDeclContextContainingUID(lldb::user_id_t uid) override; - void - ParseDeclsForContext(lldb_private::CompilerDeclContext decl_ctx) override; + ParseAllLanguages(CompileUnit &comp_unit) override; + size_t ParseFunctions(CompileUnit &comp_unit) override; + bool ParseLineTable(CompileUnit &comp_unit) override; + bool ParseDebugMacros(CompileUnit &comp_unit) override; + + bool ForEachExternalModule(CompileUnit &, llvm::DenseSet &, + llvm::function_ref) override; + + bool ParseSupportFiles(CompileUnit &comp_unit, + FileSpecList &support_files) override; - bool CompleteType(lldb_private::CompilerType &compiler_type) override; - uint32_t ResolveSymbolContext(const lldb_private::Address &so_addr, + bool ParseIsOptimized(CompileUnit &comp_unit) override; + + size_t ParseTypes(CompileUnit &comp_unit) override; + + bool + ParseImportedModules(const SymbolContext &sc, + std::vector &imported_modules) override; + size_t ParseBlocksRecursive(Function &func) override; + size_t ParseVariablesForContext(const SymbolContext &sc) override; + + Type *ResolveTypeUID(lldb::user_id_t type_uid) override; + std::optional + GetDynamicArrayInfoForUID(lldb::user_id_t type_uid, + const ExecutionContext *exe_ctx) override; + + CompilerDeclContext GetDeclContextForUID(lldb::user_id_t uid) override; + CompilerDeclContext GetDeclContextContainingUID(lldb::user_id_t uid) override; + void ParseDeclsForContext(CompilerDeclContext decl_ctx) override; + + bool CompleteType(CompilerType &compiler_type) override; + uint32_t ResolveSymbolContext(const Address &so_addr, lldb::SymbolContextItem resolve_scope, - lldb_private::SymbolContext &sc) override; - uint32_t ResolveSymbolContext( - const lldb_private::SourceLocationSpec &src_location_spec, - lldb::SymbolContextItem resolve_scope, - lldb_private::SymbolContextList &sc_list) override; + SymbolContext &sc) override; + uint32_t ResolveSymbolContext(const SourceLocationSpec &src_location_spec, + lldb::SymbolContextItem resolve_scope, + SymbolContextList &sc_list) override; - lldb_private::Status - CalculateFrameVariableError(lldb_private::StackFrame &frame) override; + Status CalculateFrameVariableError(StackFrame &frame) override; - void - FindGlobalVariables(lldb_private::ConstString name, - const lldb_private::CompilerDeclContext &parent_decl_ctx, - uint32_t max_matches, - lldb_private::VariableList &variables) override; - void FindGlobalVariables(const lldb_private::RegularExpression ®ex, + void FindGlobalVariables(ConstString name, + const CompilerDeclContext &parent_decl_ctx, uint32_t max_matches, - lldb_private::VariableList &variables) override; - void FindFunctions(const lldb_private::Module::LookupInfo &lookup_info, - const lldb_private::CompilerDeclContext &parent_decl_ctx, - bool include_inlines, - lldb_private::SymbolContextList &sc_list) override; - void FindFunctions(const lldb_private::RegularExpression ®ex, - bool include_inlines, - lldb_private::SymbolContextList &sc_list) override; - void - FindTypes(lldb_private::ConstString name, - const lldb_private::CompilerDeclContext &parent_decl_ctx, - uint32_t max_matches, - llvm::DenseSet &searched_symbol_files, - lldb_private::TypeMap &types) override; - void - FindTypes(llvm::ArrayRef context, - lldb_private::LanguageSet languages, - llvm::DenseSet &searched_symbol_files, - lldb_private::TypeMap &types) override; - lldb_private::CompilerDeclContext - FindNamespace(lldb_private::ConstString name, - const lldb_private::CompilerDeclContext &parent_decl_ctx, - bool only_root_namespaces) override; - void GetTypes(lldb_private::SymbolContextScope *sc_scope, - lldb::TypeClass type_mask, - lldb_private::TypeList &type_list) override; - std::vector> - ParseCallEdgesInFunction(lldb_private::UserID func_id) override; - - void DumpClangAST(lldb_private::Stream &s) override; + VariableList &variables) override; + void FindGlobalVariables(const RegularExpression ®ex, uint32_t max_matches, + VariableList &variables) override; + void FindFunctions(const Module::LookupInfo &lookup_info, + const CompilerDeclContext &parent_decl_ctx, + bool include_inlines, SymbolContextList &sc_list) override; + void FindFunctions(const RegularExpression ®ex, bool include_inlines, + SymbolContextList &sc_list) override; + void FindTypes(ConstString name, const CompilerDeclContext &parent_decl_ctx, + uint32_t max_matches, + llvm::DenseSet &searched_symbol_files, + TypeMap &types) override; + void FindTypes(llvm::ArrayRef context, LanguageSet languages, + llvm::DenseSet &searched_symbol_files, + TypeMap &types) override; + CompilerDeclContext FindNamespace(ConstString name, + const CompilerDeclContext &parent_decl_ctx, + bool only_root_namespaces) override; + void GetTypes(SymbolContextScope *sc_scope, lldb::TypeClass type_mask, + TypeList &type_list) override; + std::vector> + ParseCallEdgesInFunction(UserID func_id) override; + + void DumpClangAST(Stream &s) override; /// List separate oso files. - bool - GetSeparateDebugInfo(lldb_private::StructuredData::Dictionary &d) override; + bool GetSeparateDebugInfo(StructuredData::Dictionary &d) override; // PluginInterface protocol llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } // Statistics overrides. - lldb_private::ModuleList GetDebugInfoModules() override; + ModuleList GetDebugInfoModules() override; - void GetCompileOptions( - std::unordered_map &args) override; + void + GetCompileOptions(std::unordered_map &args) override; protected: enum { kHaveInitializedOSOs = (1 << 0), kNumFlags }; friend class DebugMapModule; - friend class DWARFASTParserClang; + friend class ::DWARFASTParserClang; friend class DWARFCompileUnit; friend class SymbolFileDWARF; struct OSOInfo { @@ -177,16 +160,15 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { typedef std::shared_ptr OSOInfoSP; - typedef lldb_private::RangeDataVector + typedef RangeDataVector FileRangeMap; // Class specific types struct CompileUnitInfo { - lldb_private::FileSpec so_file; - lldb_private::ConstString oso_path; + FileSpec so_file; + ConstString oso_path; llvm::sys::TimePoint<> oso_mod_time; - lldb_private::Status oso_load_error; + Status oso_load_error; OSOInfoSP oso_sp; /// The compile units that an object file contains. llvm::SmallVector compile_units_sps; @@ -228,28 +210,26 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { static SymbolFileDWARF *GetSymbolFileAsSymbolFileDWARF(SymbolFile *sym_file); - bool GetFileSpecForSO(uint32_t oso_idx, lldb_private::FileSpec &file_spec); + bool GetFileSpecForSO(uint32_t oso_idx, FileSpec &file_spec); - CompileUnitInfo *GetCompUnitInfo(const lldb_private::SymbolContext &sc); - CompileUnitInfo *GetCompUnitInfo(const lldb_private::CompileUnit &comp_unit); + CompileUnitInfo *GetCompUnitInfo(const SymbolContext &sc); + CompileUnitInfo *GetCompUnitInfo(const CompileUnit &comp_unit); - size_t GetCompUnitInfosForModule(const lldb_private::Module *oso_module, + size_t GetCompUnitInfosForModule(const Module *oso_module, std::vector &cu_infos); - lldb_private::Module * - GetModuleByCompUnitInfo(CompileUnitInfo *comp_unit_info); + Module *GetModuleByCompUnitInfo(CompileUnitInfo *comp_unit_info); - lldb_private::Module *GetModuleByOSOIndex(uint32_t oso_idx); + Module *GetModuleByOSOIndex(uint32_t oso_idx); - lldb_private::ObjectFile * - GetObjectFileByCompUnitInfo(CompileUnitInfo *comp_unit_info); + ObjectFile *GetObjectFileByCompUnitInfo(CompileUnitInfo *comp_unit_info); - lldb_private::ObjectFile *GetObjectFileByOSOIndex(uint32_t oso_idx); + ObjectFile *GetObjectFileByOSOIndex(uint32_t oso_idx); uint32_t GetCompUnitInfoIndex(const CompileUnitInfo *comp_unit_info); - SymbolFileDWARF *GetSymbolFile(const lldb_private::SymbolContext &sc); - SymbolFileDWARF *GetSymbolFile(const lldb_private::CompileUnit &comp_unit); + SymbolFileDWARF *GetSymbolFile(const SymbolContext &sc); + SymbolFileDWARF *GetSymbolFile(const CompileUnit &comp_unit); SymbolFileDWARF *GetSymbolFileByCompUnitInfo(CompileUnitInfo *comp_unit_info); @@ -280,11 +260,11 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { static int SymbolContainsSymbolWithID(lldb::user_id_t *symbol_idx_ptr, const CompileUnitInfo *comp_unit_info); - void PrivateFindGlobalVariables( - lldb_private::ConstString name, - const lldb_private::CompilerDeclContext &parent_decl_ctx, - const std::vector &name_symbol_indexes, uint32_t max_matches, - lldb_private::VariableList &variables); + void + PrivateFindGlobalVariables(ConstString name, + const CompilerDeclContext &parent_decl_ctx, + const std::vector &name_symbol_indexes, + uint32_t max_matches, VariableList &variables); void SetCompileUnit(SymbolFileDWARF *oso_dwarf, const lldb::CompUnitSP &cu_sp); @@ -302,8 +282,7 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { bool Supports_DW_AT_APPLE_objc_complete_type(SymbolFileDWARF *skip_dwarf_oso); lldb::TypeSP FindCompleteObjCDefinitionTypeForDIE( - const DWARFDIE &die, lldb_private::ConstString type_name, - bool must_be_implementation); + const DWARFDIE &die, ConstString type_name, bool must_be_implementation); UniqueDWARFASTTypeMap &GetUniqueDWARFASTTypeMap() { return m_unique_ast_type_map; @@ -334,19 +313,16 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { lldb::addr_t m_oso_file_addr = LLDB_INVALID_ADDRESS; }; - typedef lldb_private::RangeDataVector - DebugMap; + typedef RangeDataVector DebugMap; // Member Variables std::bitset m_flags; std::vector m_compile_unit_infos; std::vector m_func_indexes; // Sorted by address std::vector m_glob_indexes; - std::map>, - OSOInfoSP> - m_oso_map; + std::map>, OSOInfoSP> m_oso_map; UniqueDWARFASTTypeMap m_unique_ast_type_map; - lldb_private::LazyBool m_supports_DW_AT_APPLE_objc_complete_type; + LazyBool m_supports_DW_AT_APPLE_objc_complete_type; DebugMap m_debug_map; // When an object file from the debug map gets parsed in @@ -370,7 +346,7 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { /// \return /// Returns true if \a addr was converted to be an executable /// section/offset address, false otherwise. - bool LinkOSOAddress(lldb_private::Address &addr); + bool LinkOSOAddress(Address &addr); /// Convert a .o file "file address" to an executable "file address". /// @@ -401,12 +377,13 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { /// Returns a valid line table full of linked addresses, or NULL /// if none of the line table addresses exist in the main /// executable. - lldb_private::LineTable * - LinkOSOLineTable(SymbolFileDWARF *oso_symfile, - lldb_private::LineTable *line_table); + LineTable *LinkOSOLineTable(SymbolFileDWARF *oso_symfile, + LineTable *line_table); size_t AddOSOARanges(SymbolFileDWARF *dwarf2Data, DWARFDebugAranges *debug_aranges); }; +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_SYMBOLFILEDWARFDEBUGMAP_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp index 78c3c19684e11..60313ca3a0f7b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp @@ -21,6 +21,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; char SymbolFileDWARFDwo::ID; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h index e98ea49d939ba..8408264c34453 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h @@ -12,6 +12,8 @@ #include "SymbolFileDWARF.h" #include +namespace lldb_private::plugin { +namespace dwarf { class SymbolFileDWARFDwo : public SymbolFileDWARF { /// LLVM RTTI support. static char ID; @@ -32,7 +34,7 @@ class SymbolFileDWARFDwo : public SymbolFileDWARF { DWARFCompileUnit *GetDWOCompileUnitForHash(uint64_t hash); - void GetObjCMethods(lldb_private::ConstString class_name, + void GetObjCMethods(ConstString class_name, llvm::function_ref callback) override; llvm::Expected @@ -41,15 +43,13 @@ class SymbolFileDWARFDwo : public SymbolFileDWARF { DWARFDIE GetDIE(const DIERef &die_ref) override; - lldb::offset_t - GetVendorDWARFOpcodeSize(const lldb_private::DataExtractor &data, - const lldb::offset_t data_offset, - const uint8_t op) const override; + lldb::offset_t GetVendorDWARFOpcodeSize(const DataExtractor &data, + const lldb::offset_t data_offset, + const uint8_t op) const override; - bool ParseVendorDWARFOpcode( - uint8_t op, const lldb_private::DataExtractor &opcodes, - lldb::offset_t &offset, - std::vector &stack) const override; + bool ParseVendorDWARFOpcode(uint8_t op, const DataExtractor &opcodes, + lldb::offset_t &offset, + std::vector &stack) const override; protected: DIEToTypePtr &GetDIEToType() override; @@ -65,9 +65,10 @@ class SymbolFileDWARFDwo : public SymbolFileDWARF { lldb::TypeSP FindDefinitionTypeForDWARFDeclContext(const DWARFDIE &die) override; - lldb::TypeSP FindCompleteObjCDefinitionTypeForDIE( - const DWARFDIE &die, lldb_private::ConstString type_name, - bool must_be_implementation) override; + lldb::TypeSP + FindCompleteObjCDefinitionTypeForDIE(const DWARFDIE &die, + ConstString type_name, + bool must_be_implementation) override; SymbolFileDWARF &GetBaseSymbolFile() const { return m_base_symbol_file; } @@ -77,5 +78,7 @@ class SymbolFileDWARFDwo : public SymbolFileDWARF { SymbolFileDWARF &m_base_symbol_file; }; +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_SYMBOLFILEDWARFDWO_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp index 22a921cf61389..223518f0ae824 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp @@ -11,6 +11,7 @@ #include "lldb/Core/Declaration.h" using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; bool UniqueDWARFASTTypeList::Find(const DWARFDIE &die, const lldb_private::Declaration &decl, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h index 0947d1e581c52..bf3cbae55e5c7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h @@ -16,13 +16,15 @@ #include "DWARFDIE.h" #include "lldb/Core/Declaration.h" +namespace lldb_private::plugin { +namespace dwarf { class UniqueDWARFASTType { public: // Constructors and Destructors UniqueDWARFASTType() : m_type_sp(), m_die(), m_declaration() {} UniqueDWARFASTType(lldb::TypeSP &type_sp, const DWARFDIE &die, - const lldb_private::Declaration &decl, int32_t byte_size) + const Declaration &decl, int32_t byte_size) : m_type_sp(type_sp), m_die(die), m_declaration(decl), m_byte_size(byte_size) {} @@ -44,7 +46,7 @@ class UniqueDWARFASTType { lldb::TypeSP m_type_sp; DWARFDIE m_die; - lldb_private::Declaration m_declaration; + Declaration m_declaration; int32_t m_byte_size = -1; }; @@ -60,7 +62,7 @@ class UniqueDWARFASTTypeList { m_collection.push_back(entry); } - bool Find(const DWARFDIE &die, const lldb_private::Declaration &decl, + bool Find(const DWARFDIE &die, const Declaration &decl, const int32_t byte_size, UniqueDWARFASTType &entry) const; protected: @@ -74,14 +76,12 @@ class UniqueDWARFASTTypeMap { ~UniqueDWARFASTTypeMap() = default; - void Insert(lldb_private::ConstString name, - const UniqueDWARFASTType &entry) { + void Insert(ConstString name, const UniqueDWARFASTType &entry) { m_collection[name.GetCString()].Append(entry); } - bool Find(lldb_private::ConstString name, const DWARFDIE &die, - const lldb_private::Declaration &decl, const int32_t byte_size, - UniqueDWARFASTType &entry) const { + bool Find(ConstString name, const DWARFDIE &die, const Declaration &decl, + const int32_t byte_size, UniqueDWARFASTType &entry) const { const char *unique_name_cstr = name.GetCString(); collection::const_iterator pos = m_collection.find(unique_name_cstr); if (pos != m_collection.end()) { @@ -95,5 +95,7 @@ class UniqueDWARFASTTypeMap { typedef llvm::DenseMap collection; collection m_collection; }; +} // namespace dwarf +} // namespace lldb_private::plugin #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_UNIQUEDWARFASTTYPE_H diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index ddfe5b1a7c52d..bcf4b62478068 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -86,6 +86,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; using namespace clang; using llvm::StringSwitch; diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h index 1d2f25c47b8c7..7805be92ec136 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h @@ -514,7 +514,7 @@ class TypeSystemClang : public TypeSystem { size_t bit_size); // TypeSystem methods - DWARFASTParser *GetDWARFParser() override; + plugin::dwarf::DWARFASTParser *GetDWARFParser() override; PDBASTParser *GetPDBParser() override; npdb::PdbAstBuilder *GetNativePDBParser() override; From 02d9f4d1f128e17e04ab6e602d3c9b9942612428 Mon Sep 17 00:00:00 2001 From: Devajith Date: Fri, 13 Oct 2023 14:03:27 -0700 Subject: [PATCH 107/720] [mlir][mlir-query] Introduce mlir-query tool with autocomplete support This commit adds the initial version of the mlir-query tool, which leverages the pre-existing matchers defined in mlir/include/mlir/IR/Matchers.h The tool provides the following set of basic queries: hasOpAttrName(string) hasOpName(string) isConstantOp() isNegInfFloat() isNegZeroFloat() isNonZero() isOne() isOneFloat() isPosInfFloat() isPosZeroFloat() isZero() isZeroFloat() Reviewed By: jpienaar Differential Revision: https://reviews.llvm.org/D155127 --- .../include/mlir/Query/Matcher/ErrorBuilder.h | 63 ++ mlir/include/mlir/Query/Matcher/Marshallers.h | 199 +++++++ mlir/include/mlir/Query/Matcher/MatchFinder.h | 41 ++ .../mlir/Query/Matcher/MatchersInternal.h | 72 +++ mlir/include/mlir/Query/Matcher/Registry.h | 51 ++ .../include/mlir/Query/Matcher/VariantValue.h | 128 +++++ mlir/include/mlir/Query/Query.h | 109 ++++ mlir/include/mlir/Query/QuerySession.h | 42 ++ .../mlir/Tools/mlir-query/MlirQueryMain.h | 30 + mlir/lib/CMakeLists.txt | 1 + mlir/lib/Query/CMakeLists.txt | 12 + mlir/lib/Query/Matcher/CMakeLists.txt | 10 + mlir/lib/Query/Matcher/Diagnostics.cpp | 128 +++++ mlir/lib/Query/Matcher/Diagnostics.h | 82 +++ mlir/lib/Query/Matcher/ErrorBuilder.cpp | 25 + mlir/lib/Query/Matcher/Parser.cpp | 540 ++++++++++++++++++ mlir/lib/Query/Matcher/Parser.h | 188 ++++++ mlir/lib/Query/Matcher/RegistryManager.cpp | 139 +++++ mlir/lib/Query/Matcher/RegistryManager.h | 70 +++ mlir/lib/Query/Matcher/VariantValue.cpp | 132 +++++ mlir/lib/Query/Query.cpp | 82 +++ mlir/lib/Query/QueryParser.cpp | 217 +++++++ mlir/lib/Query/QueryParser.h | 59 ++ mlir/lib/Tools/CMakeLists.txt | 1 + mlir/lib/Tools/mlir-query/CMakeLists.txt | 13 + mlir/lib/Tools/mlir-query/MlirQueryMain.cpp | 115 ++++ mlir/test/CMakeLists.txt | 1 + mlir/test/mlir-query/simple-test.mlir | 16 + mlir/tools/CMakeLists.txt | 1 + mlir/tools/mlir-query/CMakeLists.txt | 20 + mlir/tools/mlir-query/mlir-query.cpp | 63 ++ 31 files changed, 2650 insertions(+) create mode 100644 mlir/include/mlir/Query/Matcher/ErrorBuilder.h create mode 100644 mlir/include/mlir/Query/Matcher/Marshallers.h create mode 100644 mlir/include/mlir/Query/Matcher/MatchFinder.h create mode 100644 mlir/include/mlir/Query/Matcher/MatchersInternal.h create mode 100644 mlir/include/mlir/Query/Matcher/Registry.h create mode 100644 mlir/include/mlir/Query/Matcher/VariantValue.h create mode 100644 mlir/include/mlir/Query/Query.h create mode 100644 mlir/include/mlir/Query/QuerySession.h create mode 100644 mlir/include/mlir/Tools/mlir-query/MlirQueryMain.h create mode 100644 mlir/lib/Query/CMakeLists.txt create mode 100644 mlir/lib/Query/Matcher/CMakeLists.txt create mode 100644 mlir/lib/Query/Matcher/Diagnostics.cpp create mode 100644 mlir/lib/Query/Matcher/Diagnostics.h create mode 100644 mlir/lib/Query/Matcher/ErrorBuilder.cpp create mode 100644 mlir/lib/Query/Matcher/Parser.cpp create mode 100644 mlir/lib/Query/Matcher/Parser.h create mode 100644 mlir/lib/Query/Matcher/RegistryManager.cpp create mode 100644 mlir/lib/Query/Matcher/RegistryManager.h create mode 100644 mlir/lib/Query/Matcher/VariantValue.cpp create mode 100644 mlir/lib/Query/Query.cpp create mode 100644 mlir/lib/Query/QueryParser.cpp create mode 100644 mlir/lib/Query/QueryParser.h create mode 100644 mlir/lib/Tools/mlir-query/CMakeLists.txt create mode 100644 mlir/lib/Tools/mlir-query/MlirQueryMain.cpp create mode 100644 mlir/test/mlir-query/simple-test.mlir create mode 100644 mlir/tools/mlir-query/CMakeLists.txt create mode 100644 mlir/tools/mlir-query/mlir-query.cpp diff --git a/mlir/include/mlir/Query/Matcher/ErrorBuilder.h b/mlir/include/mlir/Query/Matcher/ErrorBuilder.h new file mode 100644 index 0000000000000..1073daed8703f --- /dev/null +++ b/mlir/include/mlir/Query/Matcher/ErrorBuilder.h @@ -0,0 +1,63 @@ +//===--- ErrorBuilder.h - Helper for building error messages ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// ErrorBuilder to manage error messages. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TOOLS_MLIRQUERY_MATCHER_ERRORBUILDER_H +#define MLIR_TOOLS_MLIRQUERY_MATCHER_ERRORBUILDER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include + +namespace mlir::query::matcher::internal { +class Diagnostics; + +// Represents the line and column numbers in a source query. +struct SourceLocation { + unsigned line{}; + unsigned column{}; +}; + +// Represents a range in a source query, defined by its start and end locations. +struct SourceRange { + SourceLocation start{}; + SourceLocation end{}; +}; + +// All errors from the system. +enum class ErrorType { + None, + + // Parser Errors + ParserFailedToBuildMatcher, + ParserInvalidToken, + ParserNoCloseParen, + ParserNoCode, + ParserNoComma, + ParserNoOpenParen, + ParserNotAMatcher, + ParserOverloadedType, + ParserStringError, + ParserTrailingCode, + + // Registry Errors + RegistryMatcherNotFound, + RegistryValueNotFound, + RegistryWrongArgCount, + RegistryWrongArgType +}; + +void addError(Diagnostics *error, SourceRange range, ErrorType errorType, + std::initializer_list errorTexts); + +} // namespace mlir::query::matcher::internal + +#endif // MLIR_TOOLS_MLIRQUERY_MATCHER_ERRORBUILDER_H diff --git a/mlir/include/mlir/Query/Matcher/Marshallers.h b/mlir/include/mlir/Query/Matcher/Marshallers.h new file mode 100644 index 0000000000000..6ed35ac0ddccc --- /dev/null +++ b/mlir/include/mlir/Query/Matcher/Marshallers.h @@ -0,0 +1,199 @@ +//===--- Marshallers.h - Generic matcher function marshallers ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains function templates and classes to wrap matcher construct +// functions. It provides a collection of template function and classes that +// present a generic marshalling layer on top of matcher construct functions. +// The registry uses these to export all marshaller constructors with a uniform +// interface. This mechanism takes inspiration from clang-query. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TOOLS_MLIRQUERY_MATCHER_MARSHALLERS_H +#define MLIR_TOOLS_MLIRQUERY_MATCHER_MARSHALLERS_H + +#include "ErrorBuilder.h" +#include "VariantValue.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" + +namespace mlir::query::matcher::internal { + +// Helper template class for jumping from argument type to the correct is/get +// functions in VariantValue. This is used for verifying and extracting the +// matcher arguments. +template +struct ArgTypeTraits; +template +struct ArgTypeTraits : public ArgTypeTraits {}; + +template <> +struct ArgTypeTraits { + + static bool hasCorrectType(const VariantValue &value) { + return value.isString(); + } + + static const llvm::StringRef &get(const VariantValue &value) { + return value.getString(); + } + + static ArgKind getKind() { return ArgKind::String; } + + static std::optional getBestGuess(const VariantValue &) { + return std::nullopt; + } +}; + +template <> +struct ArgTypeTraits { + + static bool hasCorrectType(const VariantValue &value) { + return value.isMatcher(); + } + + static DynMatcher get(const VariantValue &value) { + return *value.getMatcher().getDynMatcher(); + } + + static ArgKind getKind() { return ArgKind::Matcher; } + + static std::optional getBestGuess(const VariantValue &) { + return std::nullopt; + } +}; + +// Interface for generic matcher descriptor. +// Offers a create() method that constructs the matcher from the provided +// arguments. +class MatcherDescriptor { +public: + virtual ~MatcherDescriptor() = default; + virtual VariantMatcher create(SourceRange nameRange, + const llvm::ArrayRef args, + Diagnostics *error) const = 0; + + // Returns the number of arguments accepted by the matcher. + virtual unsigned getNumArgs() const = 0; + + // Append the set of argument types accepted for argument 'argNo' to + // 'argKinds'. + virtual void getArgKinds(unsigned argNo, + std::vector &argKinds) const = 0; +}; + +class FixedArgCountMatcherDescriptor : public MatcherDescriptor { +public: + using MarshallerType = VariantMatcher (*)(void (*matcherFunc)(), + llvm::StringRef matcherName, + SourceRange nameRange, + llvm::ArrayRef args, + Diagnostics *error); + + // Marshaller Function to unpack the arguments and call Func. Func is the + // Matcher construct function. This is the function that the matcher + // expressions would use to create the matcher. + FixedArgCountMatcherDescriptor(MarshallerType marshaller, + void (*matcherFunc)(), + llvm::StringRef matcherName, + llvm::ArrayRef argKinds) + : marshaller(marshaller), matcherFunc(matcherFunc), + matcherName(matcherName), argKinds(argKinds.begin(), argKinds.end()) {} + + VariantMatcher create(SourceRange nameRange, llvm::ArrayRef args, + Diagnostics *error) const override { + return marshaller(matcherFunc, matcherName, nameRange, args, error); + } + + unsigned getNumArgs() const override { return argKinds.size(); } + + void getArgKinds(unsigned argNo, std::vector &kinds) const override { + kinds.push_back(argKinds[argNo]); + } + +private: + const MarshallerType marshaller; + void (*const matcherFunc)(); + const llvm::StringRef matcherName; + const std::vector argKinds; +}; + +// Helper function to check if argument count matches expected count +inline bool checkArgCount(SourceRange nameRange, size_t expectedArgCount, + llvm::ArrayRef args, + Diagnostics *error) { + if (args.size() != expectedArgCount) { + addError(error, nameRange, ErrorType::RegistryWrongArgCount, + {llvm::Twine(expectedArgCount), llvm::Twine(args.size())}); + return false; + } + return true; +} + +// Helper function for checking argument type +template +inline bool checkArgTypeAtIndex(llvm::StringRef matcherName, + llvm::ArrayRef args, + Diagnostics *error) { + if (!ArgTypeTraits::hasCorrectType(args[Index].value)) { + addError(error, args[Index].range, ErrorType::RegistryWrongArgType, + {llvm::Twine(matcherName), llvm::Twine(Index + 1)}); + return false; + } + return true; +} + +// Marshaller function for fixed number of arguments +template +static VariantMatcher +matcherMarshallFixedImpl(void (*matcherFunc)(), llvm::StringRef matcherName, + SourceRange nameRange, + llvm::ArrayRef args, Diagnostics *error, + std::index_sequence) { + using FuncType = ReturnType (*)(ArgTypes...); + + // Check if the argument count matches the expected count + if (!checkArgCount(nameRange, sizeof...(ArgTypes), args, error)) + return VariantMatcher(); + + // Check if each argument at the corresponding index has the correct type + if ((... && checkArgTypeAtIndex(matcherName, args, error))) { + ReturnType fnPointer = reinterpret_cast(matcherFunc)( + ArgTypeTraits::get(args[Is].value)...); + return VariantMatcher::SingleMatcher( + *DynMatcher::constructDynMatcherFromMatcherFn(fnPointer)); + } + + return VariantMatcher(); +} + +template +static VariantMatcher +matcherMarshallFixed(void (*matcherFunc)(), llvm::StringRef matcherName, + SourceRange nameRange, llvm::ArrayRef args, + Diagnostics *error) { + return matcherMarshallFixedImpl( + matcherFunc, matcherName, nameRange, args, error, + std::index_sequence_for{}); +} + +// Fixed number of arguments overload +template +std::unique_ptr +makeMatcherAutoMarshall(ReturnType (*matcherFunc)(ArgTypes...), + llvm::StringRef matcherName) { + // Create a vector of argument kinds + std::vector argKinds = {ArgTypeTraits::getKind()...}; + return std::make_unique( + matcherMarshallFixed, + reinterpret_cast(matcherFunc), matcherName, argKinds); +} + +} // namespace mlir::query::matcher::internal + +#endif // MLIR_TOOLS_MLIRQUERY_MATCHER_MARSHALLERS_H diff --git a/mlir/include/mlir/Query/Matcher/MatchFinder.h b/mlir/include/mlir/Query/Matcher/MatchFinder.h new file mode 100644 index 0000000000000..b008a21f53ae2 --- /dev/null +++ b/mlir/include/mlir/Query/Matcher/MatchFinder.h @@ -0,0 +1,41 @@ +//===- MatchFinder.h - ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the MatchFinder class, which is used to find operations +// that match a given matcher. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TOOLS_MLIRQUERY_MATCHER_MATCHERFINDER_H +#define MLIR_TOOLS_MLIRQUERY_MATCHER_MATCHERFINDER_H + +#include "MatchersInternal.h" + +namespace mlir::query::matcher { + +// MatchFinder is used to find all operations that match a given matcher. +class MatchFinder { +public: + // Returns all operations that match the given matcher. + static std::vector getMatches(Operation *root, + DynMatcher matcher) { + std::vector matches; + + // Simple match finding with walk. + root->walk([&](Operation *subOp) { + if (matcher.match(subOp)) + matches.push_back(subOp); + }); + + return matches; + } +}; + +} // namespace mlir::query::matcher + +#endif // MLIR_TOOLS_MLIRQUERY_MATCHER_MATCHERFINDER_H diff --git a/mlir/include/mlir/Query/Matcher/MatchersInternal.h b/mlir/include/mlir/Query/Matcher/MatchersInternal.h new file mode 100644 index 0000000000000..67455be592393 --- /dev/null +++ b/mlir/include/mlir/Query/Matcher/MatchersInternal.h @@ -0,0 +1,72 @@ +//===- MatchersInternal.h - Structural query framework ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements the base layer of the matcher framework. +// +// Matchers are methods that return a Matcher which provides a method +// match(Operation *op) +// +// The matcher functions are defined in include/mlir/IR/Matchers.h. +// This file contains the wrapper classes needed to construct matchers for +// mlir-query. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TOOLS_MLIRQUERY_MATCHER_MATCHERSINTERNAL_H +#define MLIR_TOOLS_MLIRQUERY_MATCHER_MATCHERSINTERNAL_H + +#include "mlir/IR/Matchers.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" + +namespace mlir::query::matcher { + +// Generic interface for matchers on an MLIR operation. +class MatcherInterface + : public llvm::ThreadSafeRefCountedBase { +public: + virtual ~MatcherInterface() = default; + + virtual bool match(Operation *op) = 0; +}; + +// MatcherFnImpl takes a matcher function object and implements +// MatcherInterface. +template +class MatcherFnImpl : public MatcherInterface { +public: + MatcherFnImpl(MatcherFn &matcherFn) : matcherFn(matcherFn) {} + bool match(Operation *op) override { return matcherFn.match(op); } + +private: + MatcherFn matcherFn; +}; + +// Matcher wraps a MatcherInterface implementation and provides a match() +// method that redirects calls to the underlying implementation. +class DynMatcher { +public: + // Takes ownership of the provided implementation pointer. + DynMatcher(MatcherInterface *implementation) + : implementation(implementation) {} + + template + static std::unique_ptr + constructDynMatcherFromMatcherFn(MatcherFn &matcherFn) { + auto impl = std::make_unique>(matcherFn); + return std::make_unique(impl.release()); + } + + bool match(Operation *op) const { return implementation->match(op); } + +private: + llvm::IntrusiveRefCntPtr implementation; +}; + +} // namespace mlir::query::matcher + +#endif // MLIR_TOOLS_MLIRQUERY_MATCHER_MATCHERSINTERNAL_H diff --git a/mlir/include/mlir/Query/Matcher/Registry.h b/mlir/include/mlir/Query/Matcher/Registry.h new file mode 100644 index 0000000000000..e929b4a04d151 --- /dev/null +++ b/mlir/include/mlir/Query/Matcher/Registry.h @@ -0,0 +1,51 @@ +//===--- Registry.h - Matcher Registry --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Registry class to manage the registry of matchers using a map. +// +// This class provides a convenient interface for registering and accessing +// matcher constructors using a string-based map. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TOOLS_MLIRQUERY_MATCHER_REGISTRY_H +#define MLIR_TOOLS_MLIRQUERY_MATCHER_REGISTRY_H + +#include "Marshallers.h" +#include "llvm/ADT/StringMap.h" +#include + +namespace mlir::query::matcher { + +using ConstructorMap = + llvm::StringMap>; + +class Registry { +public: + Registry() = default; + ~Registry() = default; + + const ConstructorMap &constructors() const { return constructorMap; } + + template + void registerMatcher(const std::string &name, MatcherType matcher) { + registerMatcherDescriptor(name, + internal::makeMatcherAutoMarshall(matcher, name)); + } + +private: + void registerMatcherDescriptor( + llvm::StringRef matcherName, + std::unique_ptr callback); + + ConstructorMap constructorMap; +}; + +} // namespace mlir::query::matcher + +#endif // MLIR_TOOLS_MLIRQUERY_MATCHER_REGISTRY_H diff --git a/mlir/include/mlir/Query/Matcher/VariantValue.h b/mlir/include/mlir/Query/Matcher/VariantValue.h new file mode 100644 index 0000000000000..449f8b3a01e02 --- /dev/null +++ b/mlir/include/mlir/Query/Matcher/VariantValue.h @@ -0,0 +1,128 @@ +//===--- VariantValue.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Supports all the types required for dynamic Matcher construction. +// Used by the registry to construct matchers in a generic way. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TOOLS_MLIRQUERY_MATCHER_VARIANTVALUE_H +#define MLIR_TOOLS_MLIRQUERY_MATCHER_VARIANTVALUE_H + +#include "ErrorBuilder.h" +#include "MatchersInternal.h" +#include "llvm/ADT/StringRef.h" + +namespace mlir::query::matcher { + +// All types that VariantValue can contain. +enum class ArgKind { Matcher, String }; + +// A variant matcher object to abstract simple and complex matchers into a +// single object type. +class VariantMatcher { + class MatcherOps; + + // Payload interface to be specialized by each matcher type. It follows a + // similar interface as VariantMatcher itself. + class Payload { + public: + virtual ~Payload(); + virtual std::optional getDynMatcher() const = 0; + virtual std::string getTypeAsString() const = 0; + }; + +public: + // A null matcher. + VariantMatcher(); + + // Clones the provided matcher. + static VariantMatcher SingleMatcher(DynMatcher matcher); + + // Makes the matcher the "null" matcher. + void reset(); + + // Checks if the matcher is null. + bool isNull() const { return !value; } + + // Returns the matcher + std::optional getDynMatcher() const; + + // String representation of the type of the value. + std::string getTypeAsString() const; + +private: + explicit VariantMatcher(std::shared_ptr value) + : value(std::move(value)) {} + + class SinglePayload; + + std::shared_ptr value; +}; + +// Variant value class with a tagged union with value type semantics. It is used +// by the registry as the return value and argument type for the matcher factory +// methods. It can be constructed from any of the supported types: +// - StringRef +// - VariantMatcher +class VariantValue { +public: + VariantValue() : type(ValueType::Nothing) {} + + VariantValue(const VariantValue &other); + ~VariantValue(); + VariantValue &operator=(const VariantValue &other); + + // Specific constructors for each supported type. + VariantValue(const llvm::StringRef string); + VariantValue(const VariantMatcher &matcher); + + // String value functions. + bool isString() const; + const llvm::StringRef &getString() const; + void setString(const llvm::StringRef &string); + + // Matcher value functions. + bool isMatcher() const; + const VariantMatcher &getMatcher() const; + void setMatcher(const VariantMatcher &matcher); + + // String representation of the type of the value. + std::string getTypeAsString() const; + +private: + void reset(); + + // All supported value types. + enum class ValueType { + Nothing, + String, + Matcher, + }; + + // All supported value types. + union AllValues { + llvm::StringRef *String; + VariantMatcher *Matcher; + }; + + ValueType type; + AllValues value; +}; + +// A VariantValue instance annotated with its parser context. +struct ParserValue { + ParserValue() {} + llvm::StringRef text; + internal::SourceRange range; + VariantValue value; +}; + +} // namespace mlir::query::matcher + +#endif // MLIR_TOOLS_MLIRQUERY_MATCHER_VARIANTVALUE_H diff --git a/mlir/include/mlir/Query/Query.h b/mlir/include/mlir/Query/Query.h new file mode 100644 index 0000000000000..447fc7ca21c8d --- /dev/null +++ b/mlir/include/mlir/Query/Query.h @@ -0,0 +1,109 @@ +//===--- Query.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TOOLS_MLIRQUERY_QUERY_H +#define MLIR_TOOLS_MLIRQUERY_QUERY_H + +#include "Matcher/VariantValue.h" +#include "mlir/Support/LogicalResult.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/LineEditor/LineEditor.h" +#include + +namespace mlir::query { + +enum class QueryKind { Invalid, NoOp, Help, Match, Quit }; + +class QuerySession; + +struct Query : llvm::RefCountedBase { + Query(QueryKind kind) : kind(kind) {} + virtual ~Query(); + + // Perform the query on qs and print output to os. + virtual mlir::LogicalResult run(llvm::raw_ostream &os, + QuerySession &qs) const = 0; + + llvm::StringRef remainingContent; + const QueryKind kind; +}; + +typedef llvm::IntrusiveRefCntPtr QueryRef; + +QueryRef parse(llvm::StringRef line, const QuerySession &qs); + +std::vector +complete(llvm::StringRef line, size_t pos, const QuerySession &qs); + +// Any query which resulted in a parse error. The error message is in ErrStr. +struct InvalidQuery : Query { + InvalidQuery(const llvm::Twine &errStr) + : Query(QueryKind::Invalid), errStr(errStr.str()) {} + mlir::LogicalResult run(llvm::raw_ostream &os, + QuerySession &qs) const override; + + std::string errStr; + + static bool classof(const Query *query) { + return query->kind == QueryKind::Invalid; + } +}; + +// No-op query (i.e. a blank line). +struct NoOpQuery : Query { + NoOpQuery() : Query(QueryKind::NoOp) {} + mlir::LogicalResult run(llvm::raw_ostream &os, + QuerySession &qs) const override; + + static bool classof(const Query *query) { + return query->kind == QueryKind::NoOp; + } +}; + +// Query for "help". +struct HelpQuery : Query { + HelpQuery() : Query(QueryKind::Help) {} + mlir::LogicalResult run(llvm::raw_ostream &os, + QuerySession &qs) const override; + + static bool classof(const Query *query) { + return query->kind == QueryKind::Help; + } +}; + +// Query for "quit". +struct QuitQuery : Query { + QuitQuery() : Query(QueryKind::Quit) {} + mlir::LogicalResult run(llvm::raw_ostream &os, + QuerySession &qs) const override; + + static bool classof(const Query *query) { + return query->kind == QueryKind::Quit; + } +}; + +// Query for "match MATCHER". +struct MatchQuery : Query { + MatchQuery(llvm::StringRef source, const matcher::DynMatcher &matcher) + : Query(QueryKind::Match), matcher(matcher), source(source) {} + mlir::LogicalResult run(llvm::raw_ostream &os, + QuerySession &qs) const override; + + const matcher::DynMatcher matcher; + + llvm::StringRef source; + + static bool classof(const Query *query) { + return query->kind == QueryKind::Match; + } +}; + +} // namespace mlir::query + +#endif diff --git a/mlir/include/mlir/Query/QuerySession.h b/mlir/include/mlir/Query/QuerySession.h new file mode 100644 index 0000000000000..b03a8cae8f181 --- /dev/null +++ b/mlir/include/mlir/Query/QuerySession.h @@ -0,0 +1,42 @@ +//===--- QuerySession.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TOOLS_MLIRQUERY_QUERYSESSION_H +#define MLIR_TOOLS_MLIRQUERY_QUERYSESSION_H + +#include "llvm/ADT/StringMap.h" + +namespace mlir::query { + +class Registry; +// Represents the state for a particular mlir-query session. +class QuerySession { +public: + QuerySession(Operation *rootOp, llvm::SourceMgr &sourceMgr, unsigned bufferId, + const matcher::Registry &matcherRegistry) + : rootOp(rootOp), sourceMgr(sourceMgr), bufferId(bufferId), + matcherRegistry(matcherRegistry) {} + + Operation *getRootOp() { return rootOp; } + llvm::SourceMgr &getSourceManager() const { return sourceMgr; } + unsigned getBufferId() { return bufferId; } + const matcher::Registry &getRegistryData() const { return matcherRegistry; } + + llvm::StringMap namedValues; + bool terminate = false; + +private: + Operation *rootOp; + llvm::SourceMgr &sourceMgr; + unsigned bufferId; + const matcher::Registry &matcherRegistry; +}; + +} // namespace mlir::query + +#endif // MLIR_TOOLS_MLIRQUERY_QUERYSESSION_H diff --git a/mlir/include/mlir/Tools/mlir-query/MlirQueryMain.h b/mlir/include/mlir/Tools/mlir-query/MlirQueryMain.h new file mode 100644 index 0000000000000..fa1cd5d8176ee --- /dev/null +++ b/mlir/include/mlir/Tools/mlir-query/MlirQueryMain.h @@ -0,0 +1,30 @@ +//===- MlirQueryMain.h - MLIR Query main ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Main entry function for mlir-query for when built as standalone +// binary. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TOOLS_MLIRQUERY_MLIRQUERYMAIN_H +#define MLIR_TOOLS_MLIRQUERY_MLIRQUERYMAIN_H + +#include "mlir/Query/Matcher/Registry.h" +#include "mlir/Support/LogicalResult.h" + +namespace mlir { + +class MLIRContext; + +LogicalResult +mlirQueryMain(int argc, char **argv, MLIRContext &context, + const mlir::query::matcher::Registry &matcherRegistry); + +} // namespace mlir + +#endif // MLIR_TOOLS_MLIRQUERY_MLIRQUERYMAIN_H diff --git a/mlir/lib/CMakeLists.txt b/mlir/lib/CMakeLists.txt index c71664a3f0063..d25c84a3975db 100644 --- a/mlir/lib/CMakeLists.txt +++ b/mlir/lib/CMakeLists.txt @@ -11,6 +11,7 @@ add_subdirectory(IR) add_subdirectory(Interfaces) add_subdirectory(Parser) add_subdirectory(Pass) +add_subdirectory(Query) add_subdirectory(Reducer) add_subdirectory(Rewrite) add_subdirectory(Support) diff --git a/mlir/lib/Query/CMakeLists.txt b/mlir/lib/Query/CMakeLists.txt new file mode 100644 index 0000000000000..817583e94c522 --- /dev/null +++ b/mlir/lib/Query/CMakeLists.txt @@ -0,0 +1,12 @@ +add_mlir_library(MLIRQuery + Query.cpp + QueryParser.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Query + + LINK_LIBS PUBLIC + MLIRQueryMatcher + ) + +add_subdirectory(Matcher) diff --git a/mlir/lib/Query/Matcher/CMakeLists.txt b/mlir/lib/Query/Matcher/CMakeLists.txt new file mode 100644 index 0000000000000..6afd24722bb70 --- /dev/null +++ b/mlir/lib/Query/Matcher/CMakeLists.txt @@ -0,0 +1,10 @@ +add_mlir_library(MLIRQueryMatcher + Parser.cpp + RegistryManager.cpp + VariantValue.cpp + Diagnostics.cpp + ErrorBuilder.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Query/Matcher + ) diff --git a/mlir/lib/Query/Matcher/Diagnostics.cpp b/mlir/lib/Query/Matcher/Diagnostics.cpp new file mode 100644 index 0000000000000..10468dbcc5306 --- /dev/null +++ b/mlir/lib/Query/Matcher/Diagnostics.cpp @@ -0,0 +1,128 @@ +//===- Diagnostic.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Diagnostics.h" +#include "mlir/Query/Matcher/ErrorBuilder.h" + +namespace mlir::query::matcher::internal { + +Diagnostics::ArgStream & +Diagnostics::ArgStream::operator<<(const llvm::Twine &arg) { + out->push_back(arg.str()); + return *this; +} + +Diagnostics::ArgStream Diagnostics::addError(SourceRange range, + ErrorType error) { + errorValues.emplace_back(); + ErrorContent &last = errorValues.back(); + last.contextStack = contextStack; + last.messages.emplace_back(); + last.messages.back().range = range; + last.messages.back().type = error; + return ArgStream(&last.messages.back().args); +} + +static llvm::StringRef errorTypeToFormatString(ErrorType type) { + switch (type) { + case ErrorType::RegistryMatcherNotFound: + return "Matcher not found: $0"; + case ErrorType::RegistryWrongArgCount: + return "Incorrect argument count. (Expected = $0) != (Actual = $1)"; + case ErrorType::RegistryWrongArgType: + return "Incorrect type for arg $0. (Expected = $1) != (Actual = $2)"; + case ErrorType::RegistryValueNotFound: + return "Value not found: $0"; + + case ErrorType::ParserStringError: + return "Error parsing string token: <$0>"; + case ErrorType::ParserNoOpenParen: + return "Error parsing matcher. Found token <$0> while looking for '('."; + case ErrorType::ParserNoCloseParen: + return "Error parsing matcher. Found end-of-code while looking for ')'."; + case ErrorType::ParserNoComma: + return "Error parsing matcher. Found token <$0> while looking for ','."; + case ErrorType::ParserNoCode: + return "End of code found while looking for token."; + case ErrorType::ParserNotAMatcher: + return "Input value is not a matcher expression."; + case ErrorType::ParserInvalidToken: + return "Invalid token <$0> found when looking for a value."; + case ErrorType::ParserTrailingCode: + return "Unexpected end of code."; + case ErrorType::ParserOverloadedType: + return "Input value has unresolved overloaded type: $0"; + case ErrorType::ParserFailedToBuildMatcher: + return "Failed to build matcher: $0."; + + case ErrorType::None: + return ""; + } + llvm_unreachable("Unknown ErrorType value."); +} + +static void formatErrorString(llvm::StringRef formatString, + llvm::ArrayRef args, + llvm::raw_ostream &os) { + while (!formatString.empty()) { + std::pair pieces = + formatString.split("$"); + os << pieces.first.str(); + if (pieces.second.empty()) + break; + + const char next = pieces.second.front(); + formatString = pieces.second.drop_front(); + if (next >= '0' && next <= '9') { + const unsigned index = next - '0'; + if (index < args.size()) { + os << args[index]; + } else { + os << ""; + } + } + } +} + +static void maybeAddLineAndColumn(SourceRange range, llvm::raw_ostream &os) { + if (range.start.line > 0 && range.start.column > 0) { + os << range.start.line << ":" << range.start.column << ": "; + } +} + +void Diagnostics::printMessage( + const Diagnostics::ErrorContent::Message &message, const llvm::Twine prefix, + llvm::raw_ostream &os) const { + maybeAddLineAndColumn(message.range, os); + os << prefix; + formatErrorString(errorTypeToFormatString(message.type), message.args, os); +} + +void Diagnostics::printErrorContent(const Diagnostics::ErrorContent &content, + llvm::raw_ostream &os) const { + if (content.messages.size() == 1) { + printMessage(content.messages[0], "", os); + } else { + for (size_t i = 0, e = content.messages.size(); i != e; ++i) { + if (i != 0) + os << "\n"; + printMessage(content.messages[i], + "Candidate " + llvm::Twine(i + 1) + ": ", os); + } + } +} + +void Diagnostics::print(llvm::raw_ostream &os) const { + for (const ErrorContent &error : errorValues) { + if (&error != &errorValues.front()) + os << "\n"; + printErrorContent(error, os); + } +} + +} // namespace mlir::query::matcher::internal diff --git a/mlir/lib/Query/Matcher/Diagnostics.h b/mlir/lib/Query/Matcher/Diagnostics.h new file mode 100644 index 0000000000000..a58a435b16a90 --- /dev/null +++ b/mlir/lib/Query/Matcher/Diagnostics.h @@ -0,0 +1,82 @@ +//===--- Diagnostics.h - Helper class for error diagnostics -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Diagnostics class to manage error messages. Implementation shares similarity +// to clang-query Diagnostics. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TOOLS_MLIRQUERY_MATCHER_DIAGNOSTICS_H +#define MLIR_TOOLS_MLIRQUERY_MATCHER_DIAGNOSTICS_H + +#include "mlir/Query/Matcher/ErrorBuilder.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +namespace mlir::query::matcher::internal { + +// Diagnostics class to manage error messages. +class Diagnostics { +public: + // Helper stream class for constructing error messages. + class ArgStream { + public: + ArgStream(std::vector *out) : out(out) {} + template + ArgStream &operator<<(const T &arg) { + return operator<<(llvm::Twine(arg)); + } + ArgStream &operator<<(const llvm::Twine &arg); + + private: + std::vector *out; + }; + + // Add an error message with the specified range and error type. + // Returns an ArgStream object to allow constructing the error message using + // the << operator. + ArgStream addError(SourceRange range, ErrorType error); + + // Print all error messages to the specified output stream. + void print(llvm::raw_ostream &os) const; + +private: + // Information stored for one frame of the context. + struct ContextFrame { + SourceRange range; + std::vector args; + }; + + // Information stored for each error found. + struct ErrorContent { + std::vector contextStack; + struct Message { + SourceRange range; + ErrorType type; + std::vector args; + }; + std::vector messages; + }; + + void printMessage(const ErrorContent::Message &message, + const llvm::Twine Prefix, llvm::raw_ostream &os) const; + + void printErrorContent(const ErrorContent &content, + llvm::raw_ostream &os) const; + + std::vector contextStack; + std::vector errorValues; +}; + +} // namespace mlir::query::matcher::internal + +#endif // MLIR_TOOLS_MLIRQUERY_MATCHER_DIAGNOSTICS_H diff --git a/mlir/lib/Query/Matcher/ErrorBuilder.cpp b/mlir/lib/Query/Matcher/ErrorBuilder.cpp new file mode 100644 index 0000000000000..de6447dac490a --- /dev/null +++ b/mlir/lib/Query/Matcher/ErrorBuilder.cpp @@ -0,0 +1,25 @@ +//===--- ErrorBuilder.cpp - Helper for building error messages ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Query/Matcher/ErrorBuilder.h" +#include "Diagnostics.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include + +namespace mlir::query::matcher::internal { + +void addError(Diagnostics *error, SourceRange range, ErrorType errorType, + std::initializer_list errorTexts) { + Diagnostics::ArgStream argStream = error->addError(range, errorType); + for (const llvm::Twine &errorText : errorTexts) { + argStream << errorText; + } +} + +} // namespace mlir::query::matcher::internal diff --git a/mlir/lib/Query/Matcher/Parser.cpp b/mlir/lib/Query/Matcher/Parser.cpp new file mode 100644 index 0000000000000..be9e60de221db --- /dev/null +++ b/mlir/lib/Query/Matcher/Parser.cpp @@ -0,0 +1,540 @@ +//===- Parser.cpp - Matcher expression parser -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Recursive parser implementation for the matcher expression grammar. +// +//===----------------------------------------------------------------------===// + +#include "Parser.h" + +#include + +namespace mlir::query::matcher::internal { + +// Simple structure to hold information for one token from the parser. +struct Parser::TokenInfo { + TokenInfo() = default; + + // Method to set the kind and text of the token + void set(TokenKind newKind, llvm::StringRef newText) { + kind = newKind; + text = newText; + } + + llvm::StringRef text; + TokenKind kind = TokenKind::Eof; + SourceRange range; + VariantValue value; +}; + +class Parser::CodeTokenizer { +public: + // Constructor with matcherCode and error + explicit CodeTokenizer(llvm::StringRef matcherCode, Diagnostics *error) + : code(matcherCode), startOfLine(matcherCode), error(error) { + nextToken = getNextToken(); + } + + // Constructor with matcherCode, error, and codeCompletionOffset + CodeTokenizer(llvm::StringRef matcherCode, Diagnostics *error, + unsigned codeCompletionOffset) + : code(matcherCode), startOfLine(matcherCode), error(error), + codeCompletionLocation(matcherCode.data() + codeCompletionOffset) { + nextToken = getNextToken(); + } + + // Peek at next token without consuming it + const TokenInfo &peekNextToken() const { return nextToken; } + + // Consume and return the next token + TokenInfo consumeNextToken() { + TokenInfo thisToken = nextToken; + nextToken = getNextToken(); + return thisToken; + } + + // Skip any newline tokens + TokenInfo skipNewlines() { + while (nextToken.kind == TokenKind::NewLine) + nextToken = getNextToken(); + return nextToken; + } + + // Consume and return next token, ignoring newlines + TokenInfo consumeNextTokenIgnoreNewlines() { + skipNewlines(); + return nextToken.kind == TokenKind::Eof ? nextToken : consumeNextToken(); + } + + // Return kind of next token + TokenKind nextTokenKind() const { return nextToken.kind; } + +private: + // Helper function to get the first character as a new StringRef and drop it + // from the original string + llvm::StringRef firstCharacterAndDrop(llvm::StringRef &str) { + assert(!str.empty()); + llvm::StringRef firstChar = str.substr(0, 1); + str = str.drop_front(); + return firstChar; + } + + // Get next token, consuming whitespaces and handling different token types + TokenInfo getNextToken() { + consumeWhitespace(); + TokenInfo result; + result.range.start = currentLocation(); + + // Code completion case + if (codeCompletionLocation && codeCompletionLocation <= code.data()) { + result.set(TokenKind::CodeCompletion, + llvm::StringRef(codeCompletionLocation, 0)); + codeCompletionLocation = nullptr; + return result; + } + + // End of file case + if (code.empty()) { + result.set(TokenKind::Eof, ""); + return result; + } + + // Switch to handle specific characters + switch (code[0]) { + case '#': + code = code.drop_until([](char c) { return c == '\n'; }); + return getNextToken(); + case ',': + result.set(TokenKind::Comma, firstCharacterAndDrop(code)); + break; + case '.': + result.set(TokenKind::Period, firstCharacterAndDrop(code)); + break; + case '\n': + ++line; + startOfLine = code.drop_front(); + result.set(TokenKind::NewLine, firstCharacterAndDrop(code)); + break; + case '(': + result.set(TokenKind::OpenParen, firstCharacterAndDrop(code)); + break; + case ')': + result.set(TokenKind::CloseParen, firstCharacterAndDrop(code)); + break; + case '"': + case '\'': + consumeStringLiteral(&result); + break; + default: + parseIdentifierOrInvalid(&result); + break; + } + + result.range.end = currentLocation(); + return result; + } + + // Consume a string literal, handle escape sequences and missing closing + // quote. + void consumeStringLiteral(TokenInfo *result) { + bool inEscape = false; + const char marker = code[0]; + for (size_t length = 1; length < code.size(); ++length) { + if (inEscape) { + inEscape = false; + continue; + } + if (code[length] == '\\') { + inEscape = true; + continue; + } + if (code[length] == marker) { + result->kind = TokenKind::Literal; + result->text = code.substr(0, length + 1); + result->value = code.substr(1, length - 1); + code = code.drop_front(length + 1); + return; + } + } + llvm::StringRef errorText = code; + code = code.drop_front(code.size()); + SourceRange range; + range.start = result->range.start; + range.end = currentLocation(); + error->addError(range, ErrorType::ParserStringError) << errorText; + result->kind = TokenKind::Error; + } + + void parseIdentifierOrInvalid(TokenInfo *result) { + if (isalnum(code[0])) { + // Parse an identifier + size_t tokenLength = 1; + + while (true) { + // A code completion location in/immediately after an identifier will + // cause the portion of the identifier before the code completion + // location to become a code completion token. + if (codeCompletionLocation == code.data() + tokenLength) { + codeCompletionLocation = nullptr; + result->kind = TokenKind::CodeCompletion; + result->text = code.substr(0, tokenLength); + code = code.drop_front(tokenLength); + return; + } + if (tokenLength == code.size() || !(isalnum(code[tokenLength]))) + break; + ++tokenLength; + } + result->kind = TokenKind::Ident; + result->text = code.substr(0, tokenLength); + code = code.drop_front(tokenLength); + } else { + result->kind = TokenKind::InvalidChar; + result->text = code.substr(0, 1); + code = code.drop_front(1); + } + } + + // Consume all leading whitespace from code, except newlines + void consumeWhitespace() { + code = code.drop_while( + [](char c) { return llvm::StringRef(" \t\v\f\r").contains(c); }); + } + + // Returns the current location in the source code + SourceLocation currentLocation() { + SourceLocation location; + location.line = line; + location.column = code.data() - startOfLine.data() + 1; + return location; + } + + llvm::StringRef code; + llvm::StringRef startOfLine; + unsigned line = 1; + Diagnostics *error; + TokenInfo nextToken; + const char *codeCompletionLocation = nullptr; +}; + +Parser::Sema::~Sema() = default; + +std::vector Parser::Sema::getAcceptedCompletionTypes( + llvm::ArrayRef> context) { + return {}; +} + +std::vector +Parser::Sema::getMatcherCompletions(llvm::ArrayRef acceptedTypes) { + return {}; +} + +// Entry for the scope of a parser +struct Parser::ScopedContextEntry { + Parser *parser; + + ScopedContextEntry(Parser *parser, MatcherCtor c) : parser(parser) { + parser->contextStack.emplace_back(c, 0u); + } + + ~ScopedContextEntry() { parser->contextStack.pop_back(); } + + void nextArg() { ++parser->contextStack.back().second; } +}; + +// Parse and validate expressions starting with an identifier. +// This function can parse named values and matchers. In case of failure, it +// will try to determine the user's intent to give an appropriate error message. +bool Parser::parseIdentifierPrefixImpl(VariantValue *value) { + const TokenInfo nameToken = tokenizer->consumeNextToken(); + + if (tokenizer->nextTokenKind() != TokenKind::OpenParen) { + // Parse as a named value. + auto namedValue = + namedValues ? namedValues->lookup(nameToken.text) : VariantValue(); + + if (!namedValue.isMatcher()) { + error->addError(tokenizer->peekNextToken().range, + ErrorType::ParserNotAMatcher); + return false; + } + + if (tokenizer->nextTokenKind() == TokenKind::NewLine) { + error->addError(tokenizer->peekNextToken().range, + ErrorType::ParserNoOpenParen) + << "NewLine"; + return false; + } + + // If the syntax is correct and the name is not a matcher either, report + // an unknown named value. + if ((tokenizer->nextTokenKind() == TokenKind::Comma || + tokenizer->nextTokenKind() == TokenKind::CloseParen || + tokenizer->nextTokenKind() == TokenKind::NewLine || + tokenizer->nextTokenKind() == TokenKind::Eof) && + !sema->lookupMatcherCtor(nameToken.text)) { + error->addError(nameToken.range, ErrorType::RegistryValueNotFound) + << nameToken.text; + return false; + } + // Otherwise, fallback to the matcher parser. + } + + tokenizer->skipNewlines(); + + assert(nameToken.kind == TokenKind::Ident); + TokenInfo openToken = tokenizer->consumeNextToken(); + if (openToken.kind != TokenKind::OpenParen) { + error->addError(openToken.range, ErrorType::ParserNoOpenParen) + << openToken.text; + return false; + } + + std::optional ctor = sema->lookupMatcherCtor(nameToken.text); + + // Parse as a matcher expression. + return parseMatcherExpressionImpl(nameToken, openToken, ctor, value); +} + +// Parse the arguments of a matcher +bool Parser::parseMatcherArgs(std::vector &args, MatcherCtor ctor, + const TokenInfo &nameToken, TokenInfo &endToken) { + ScopedContextEntry sce(this, ctor); + + while (tokenizer->nextTokenKind() != TokenKind::Eof) { + if (tokenizer->nextTokenKind() == TokenKind::CloseParen) { + // end of args. + endToken = tokenizer->consumeNextToken(); + break; + } + + if (!args.empty()) { + // We must find a , token to continue. + TokenInfo commaToken = tokenizer->consumeNextToken(); + if (commaToken.kind != TokenKind::Comma) { + error->addError(commaToken.range, ErrorType::ParserNoComma) + << commaToken.text; + return false; + } + } + + ParserValue argValue; + tokenizer->skipNewlines(); + + argValue.text = tokenizer->peekNextToken().text; + argValue.range = tokenizer->peekNextToken().range; + if (!parseExpressionImpl(&argValue.value)) { + return false; + } + + tokenizer->skipNewlines(); + args.push_back(argValue); + sce.nextArg(); + } + + return true; +} + +// Parse and validate a matcher expression. +bool Parser::parseMatcherExpressionImpl(const TokenInfo &nameToken, + const TokenInfo &openToken, + std::optional ctor, + VariantValue *value) { + if (!ctor) { + error->addError(nameToken.range, ErrorType::RegistryMatcherNotFound) + << nameToken.text; + // Do not return here. We need to continue to give completion suggestions. + } + + std::vector args; + TokenInfo endToken; + + tokenizer->skipNewlines(); + + if (!parseMatcherArgs(args, ctor.value_or(nullptr), nameToken, endToken)) { + return false; + } + + // Check for the missing closing parenthesis + if (endToken.kind != TokenKind::CloseParen) { + error->addError(openToken.range, ErrorType::ParserNoCloseParen) + << nameToken.text; + return false; + } + + if (!ctor) + return false; + // Merge the start and end infos. + SourceRange matcherRange = nameToken.range; + matcherRange.end = endToken.range.end; + VariantMatcher result = + sema->actOnMatcherExpression(*ctor, matcherRange, args, error); + if (result.isNull()) + return false; + *value = result; + return true; +} + +// If the prefix of this completion matches the completion token, add it to +// completions minus the prefix. +void Parser::addCompletion(const TokenInfo &compToken, + const MatcherCompletion &completion) { + if (llvm::StringRef(completion.typedText).startswith(compToken.text)) { + completions.emplace_back(completion.typedText.substr(compToken.text.size()), + completion.matcherDecl); + } +} + +std::vector +Parser::getNamedValueCompletions(llvm::ArrayRef acceptedTypes) { + if (!namedValues) + return {}; + + std::vector result; + for (const auto &entry : *namedValues) { + std::string decl = + (entry.getValue().getTypeAsString() + " " + entry.getKey()).str(); + result.emplace_back(entry.getKey(), decl); + } + return result; +} + +void Parser::addExpressionCompletions() { + const TokenInfo compToken = tokenizer->consumeNextTokenIgnoreNewlines(); + assert(compToken.kind == TokenKind::CodeCompletion); + + // We cannot complete code if there is an invalid element on the context + // stack. + for (const auto &entry : contextStack) { + if (!entry.first) + return; + } + + auto acceptedTypes = sema->getAcceptedCompletionTypes(contextStack); + for (const auto &completion : sema->getMatcherCompletions(acceptedTypes)) { + addCompletion(compToken, completion); + } + + for (const auto &completion : getNamedValueCompletions(acceptedTypes)) { + addCompletion(compToken, completion); + } +} + +// Parse an +bool Parser::parseExpressionImpl(VariantValue *value) { + switch (tokenizer->nextTokenKind()) { + case TokenKind::Literal: + *value = tokenizer->consumeNextToken().value; + return true; + case TokenKind::Ident: + return parseIdentifierPrefixImpl(value); + case TokenKind::CodeCompletion: + addExpressionCompletions(); + return false; + case TokenKind::Eof: + error->addError(tokenizer->consumeNextToken().range, + ErrorType::ParserNoCode); + return false; + + case TokenKind::Error: + // This error was already reported by the tokenizer. + return false; + case TokenKind::NewLine: + case TokenKind::OpenParen: + case TokenKind::CloseParen: + case TokenKind::Comma: + case TokenKind::Period: + case TokenKind::InvalidChar: + const TokenInfo token = tokenizer->consumeNextToken(); + error->addError(token.range, ErrorType::ParserInvalidToken) + << (token.kind == TokenKind::NewLine ? "NewLine" : token.text); + return false; + } + + llvm_unreachable("Unknown token kind."); +} + +Parser::Parser(CodeTokenizer *tokenizer, const Registry &matcherRegistry, + const NamedValueMap *namedValues, Diagnostics *error) + : tokenizer(tokenizer), + sema(std::make_unique(matcherRegistry)), + namedValues(namedValues), error(error) {} + +Parser::RegistrySema::~RegistrySema() = default; + +std::optional +Parser::RegistrySema::lookupMatcherCtor(llvm::StringRef matcherName) { + return RegistryManager::lookupMatcherCtor(matcherName, matcherRegistry); +} + +VariantMatcher Parser::RegistrySema::actOnMatcherExpression( + MatcherCtor ctor, SourceRange nameRange, llvm::ArrayRef args, + Diagnostics *error) { + return RegistryManager::constructMatcher(ctor, nameRange, args, error); +} + +std::vector Parser::RegistrySema::getAcceptedCompletionTypes( + llvm::ArrayRef> context) { + return RegistryManager::getAcceptedCompletionTypes(context); +} + +std::vector Parser::RegistrySema::getMatcherCompletions( + llvm::ArrayRef acceptedTypes) { + return RegistryManager::getMatcherCompletions(acceptedTypes, matcherRegistry); +} + +bool Parser::parseExpression(llvm::StringRef &code, + const Registry &matcherRegistry, + const NamedValueMap *namedValues, + VariantValue *value, Diagnostics *error) { + CodeTokenizer tokenizer(code, error); + Parser parser(&tokenizer, matcherRegistry, namedValues, error); + if (!parser.parseExpressionImpl(value)) + return false; + auto nextToken = tokenizer.peekNextToken(); + if (nextToken.kind != TokenKind::Eof && + nextToken.kind != TokenKind::NewLine) { + error->addError(tokenizer.peekNextToken().range, + ErrorType::ParserTrailingCode); + return false; + } + return true; +} + +std::vector +Parser::completeExpression(llvm::StringRef &code, unsigned completionOffset, + const Registry &matcherRegistry, + const NamedValueMap *namedValues) { + Diagnostics error; + CodeTokenizer tokenizer(code, &error, completionOffset); + Parser parser(&tokenizer, matcherRegistry, namedValues, &error); + VariantValue dummy; + parser.parseExpressionImpl(&dummy); + + return parser.completions; +} + +std::optional Parser::parseMatcherExpression( + llvm::StringRef &code, const Registry &matcherRegistry, + const NamedValueMap *namedValues, Diagnostics *error) { + VariantValue value; + if (!parseExpression(code, matcherRegistry, namedValues, &value, error)) + return std::nullopt; + if (!value.isMatcher()) { + error->addError(SourceRange(), ErrorType::ParserNotAMatcher); + return std::nullopt; + } + std::optional result = value.getMatcher().getDynMatcher(); + if (!result) { + error->addError(SourceRange(), ErrorType::ParserOverloadedType) + << value.getTypeAsString(); + } + return result; +} + +} // namespace mlir::query::matcher::internal diff --git a/mlir/lib/Query/Matcher/Parser.h b/mlir/lib/Query/Matcher/Parser.h new file mode 100644 index 0000000000000..f049af34e9c90 --- /dev/null +++ b/mlir/lib/Query/Matcher/Parser.h @@ -0,0 +1,188 @@ +//===--- Parser.h - Matcher expression parser -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Simple matcher expression parser. +// +// This file contains the Parser class, which is responsible for parsing +// expressions in a specific format: matcherName(Arg0, Arg1, ..., ArgN). The +// parser can also interpret simple types, like strings. +// +// The actual processing of the matchers is handled by a Sema object that is +// provided to the parser. +// +// The grammar for the supported expressions is as follows: +// := | +// := "quoted string" +// := () +// := [a-zA-Z]+ +// := | , +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TOOLS_MLIRQUERY_MATCHER_PARSER_H +#define MLIR_TOOLS_MLIRQUERY_MATCHER_PARSER_H + +#include "Diagnostics.h" +#include "RegistryManager.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include +#include + +namespace mlir::query::matcher::internal { + +// Matcher expression parser. +class Parser { +public: + // Different possible tokens. + enum class TokenKind { + Eof, + NewLine, + OpenParen, + CloseParen, + Comma, + Period, + Literal, + Ident, + InvalidChar, + CodeCompletion, + Error + }; + + // Interface to connect the parser with the registry and more. The parser uses + // the Sema instance passed into parseMatcherExpression() to handle all + // matcher tokens. + class Sema { + public: + virtual ~Sema(); + + // Process a matcher expression. The caller takes ownership of the Matcher + // object returned. + virtual VariantMatcher + actOnMatcherExpression(MatcherCtor ctor, SourceRange nameRange, + llvm::ArrayRef args, + Diagnostics *error) = 0; + + // Look up a matcher by name in the matcher name found by the parser. + virtual std::optional + lookupMatcherCtor(llvm::StringRef matcherName) = 0; + + // Compute the list of completion types for Context. + virtual std::vector getAcceptedCompletionTypes( + llvm::ArrayRef> Context); + + // Compute the list of completions that match any of acceptedTypes. + virtual std::vector + getMatcherCompletions(llvm::ArrayRef acceptedTypes); + }; + + // An implementation of the Sema interface that uses the matcher registry to + // process tokens. + class RegistrySema : public Parser::Sema { + public: + RegistrySema(const Registry &matcherRegistry) + : matcherRegistry(matcherRegistry) {} + ~RegistrySema() override; + + std::optional + lookupMatcherCtor(llvm::StringRef matcherName) override; + + VariantMatcher actOnMatcherExpression(MatcherCtor ctor, + SourceRange nameRange, + llvm::ArrayRef args, + Diagnostics *error) override; + + std::vector getAcceptedCompletionTypes( + llvm::ArrayRef> context) override; + + std::vector + getMatcherCompletions(llvm::ArrayRef acceptedTypes) override; + + private: + const Registry &matcherRegistry; + }; + + using NamedValueMap = llvm::StringMap; + + // Methods to parse a matcher expression and return a DynMatcher object, + // transferring ownership to the caller. + static std::optional + parseMatcherExpression(llvm::StringRef &matcherCode, + const Registry &matcherRegistry, + const NamedValueMap *namedValues, Diagnostics *error); + static std::optional + parseMatcherExpression(llvm::StringRef &matcherCode, + const Registry &matcherRegistry, Diagnostics *error) { + return parseMatcherExpression(matcherCode, matcherRegistry, nullptr, error); + } + + // Methods to parse any expression supported by this parser. + static bool parseExpression(llvm::StringRef &code, + const Registry &matcherRegistry, + const NamedValueMap *namedValues, + VariantValue *value, Diagnostics *error); + + static bool parseExpression(llvm::StringRef &code, + const Registry &matcherRegistry, + VariantValue *value, Diagnostics *error) { + return parseExpression(code, matcherRegistry, nullptr, value, error); + } + + // Methods to complete an expression at a given offset. + static std::vector + completeExpression(llvm::StringRef &code, unsigned completionOffset, + const Registry &matcherRegistry, + const NamedValueMap *namedValues); + static std::vector + completeExpression(llvm::StringRef &code, unsigned completionOffset, + const Registry &matcherRegistry) { + return completeExpression(code, completionOffset, matcherRegistry, nullptr); + } + +private: + class CodeTokenizer; + struct ScopedContextEntry; + struct TokenInfo; + + Parser(CodeTokenizer *tokenizer, const Registry &matcherRegistry, + const NamedValueMap *namedValues, Diagnostics *error); + + bool parseExpressionImpl(VariantValue *value); + + bool parseMatcherArgs(std::vector &args, MatcherCtor ctor, + const TokenInfo &nameToken, TokenInfo &endToken); + + bool parseMatcherExpressionImpl(const TokenInfo &nameToken, + const TokenInfo &openToken, + std::optional ctor, + VariantValue *value); + + bool parseIdentifierPrefixImpl(VariantValue *value); + + void addCompletion(const TokenInfo &compToken, + const MatcherCompletion &completion); + void addExpressionCompletions(); + + std::vector + getNamedValueCompletions(llvm::ArrayRef acceptedTypes); + + CodeTokenizer *const tokenizer; + std::unique_ptr sema; + const NamedValueMap *const namedValues; + Diagnostics *const error; + + using ContextStackTy = std::vector>; + + ContextStackTy contextStack; + std::vector completions; +}; + +} // namespace mlir::query::matcher::internal + +#endif // MLIR_TOOLS_MLIRQUERY_MATCHER_PARSER_H diff --git a/mlir/lib/Query/Matcher/RegistryManager.cpp b/mlir/lib/Query/Matcher/RegistryManager.cpp new file mode 100644 index 0000000000000..01856aa8ffa67 --- /dev/null +++ b/mlir/lib/Query/Matcher/RegistryManager.cpp @@ -0,0 +1,139 @@ +//===- RegistryManager.cpp - Matcher registry -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Registry map populated at static initialization time. +// +//===----------------------------------------------------------------------===// + +#include "RegistryManager.h" +#include "mlir/Query/Matcher/Registry.h" + +#include +#include + +namespace mlir::query::matcher { +namespace { + +// This is needed because these matchers are defined as overloaded functions. +using IsConstantOp = detail::constant_op_matcher(); +using HasOpAttrName = detail::AttrOpMatcher(llvm::StringRef); +using HasOpName = detail::NameOpMatcher(llvm::StringRef); + +// Enum to string for autocomplete. +static std::string asArgString(ArgKind kind) { + switch (kind) { + case ArgKind::Matcher: + return "Matcher"; + case ArgKind::String: + return "String"; + } + llvm_unreachable("Unhandled ArgKind"); +} + +} // namespace + +void Registry::registerMatcherDescriptor( + llvm::StringRef matcherName, + std::unique_ptr callback) { + assert(!constructorMap.contains(matcherName)); + constructorMap[matcherName] = std::move(callback); +} + +std::optional +RegistryManager::lookupMatcherCtor(llvm::StringRef matcherName, + const Registry &matcherRegistry) { + auto it = matcherRegistry.constructors().find(matcherName); + return it == matcherRegistry.constructors().end() + ? std::optional() + : it->second.get(); +} + +std::vector RegistryManager::getAcceptedCompletionTypes( + llvm::ArrayRef> context) { + // Starting with the above seed of acceptable top-level matcher types, compute + // the acceptable type set for the argument indicated by each context element. + std::set typeSet; + typeSet.insert(ArgKind::Matcher); + + for (const auto &ctxEntry : context) { + MatcherCtor ctor = ctxEntry.first; + unsigned argNumber = ctxEntry.second; + std::vector nextTypeSet; + + if (argNumber < ctor->getNumArgs()) + ctor->getArgKinds(argNumber, nextTypeSet); + + typeSet.insert(nextTypeSet.begin(), nextTypeSet.end()); + } + + return std::vector(typeSet.begin(), typeSet.end()); +} + +std::vector +RegistryManager::getMatcherCompletions(llvm::ArrayRef acceptedTypes, + const Registry &matcherRegistry) { + std::vector completions; + + // Search the registry for acceptable matchers. + for (const auto &m : matcherRegistry.constructors()) { + const internal::MatcherDescriptor &matcher = *m.getValue(); + llvm::StringRef name = m.getKey(); + + unsigned numArgs = matcher.getNumArgs(); + std::vector> argKinds(numArgs); + + for (const ArgKind &kind : acceptedTypes) { + if (kind != ArgKind::Matcher) + continue; + + for (unsigned arg = 0; arg != numArgs; ++arg) + matcher.getArgKinds(arg, argKinds[arg]); + } + + std::string decl; + llvm::raw_string_ostream os(decl); + + std::string typedText = std::string(name); + os << "Matcher: " << name << "("; + + for (const std::vector &arg : argKinds) { + if (&arg != &argKinds[0]) + os << ", "; + + bool firstArgKind = true; + // Two steps. First all non-matchers, then matchers only. + for (const ArgKind &argKind : arg) { + if (!firstArgKind) + os << "|"; + + firstArgKind = false; + os << asArgString(argKind); + } + } + + os << ")"; + typedText += "("; + + if (argKinds.empty()) + typedText += ")"; + else if (argKinds[0][0] == ArgKind::String) + typedText += "\""; + + completions.emplace_back(typedText, os.str()); + } + + return completions; +} + +VariantMatcher RegistryManager::constructMatcher( + MatcherCtor ctor, internal::SourceRange nameRange, + llvm::ArrayRef args, internal::Diagnostics *error) { + return ctor->create(nameRange, args, error); +} + +} // namespace mlir::query::matcher diff --git a/mlir/lib/Query/Matcher/RegistryManager.h b/mlir/lib/Query/Matcher/RegistryManager.h new file mode 100644 index 0000000000000..5f2867261225e --- /dev/null +++ b/mlir/lib/Query/Matcher/RegistryManager.h @@ -0,0 +1,70 @@ +//===--- RegistryManager.h - Matcher registry -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// RegistryManager to manage registry of all known matchers. +// +// The registry provides a generic interface to construct any matcher by name. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TOOLS_MLIRQUERY_MATCHER_REGISTRYMANAGER_H +#define MLIR_TOOLS_MLIRQUERY_MATCHER_REGISTRYMANAGER_H + +#include "Diagnostics.h" +#include "mlir/Query/Matcher/Marshallers.h" +#include "mlir/Query/Matcher/Registry.h" +#include "mlir/Query/Matcher/VariantValue.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include + +namespace mlir::query::matcher { + +using MatcherCtor = const internal::MatcherDescriptor *; + +struct MatcherCompletion { + MatcherCompletion() = default; + MatcherCompletion(llvm::StringRef typedText, llvm::StringRef matcherDecl) + : typedText(typedText.str()), matcherDecl(matcherDecl.str()) {} + + bool operator==(const MatcherCompletion &other) const { + return typedText == other.typedText && matcherDecl == other.matcherDecl; + } + + // The text to type to select this matcher. + std::string typedText; + + // The "declaration" of the matcher, with type information. + std::string matcherDecl; +}; + +class RegistryManager { +public: + RegistryManager() = delete; + + static std::optional + lookupMatcherCtor(llvm::StringRef matcherName, + const Registry &matcherRegistry); + + static std::vector getAcceptedCompletionTypes( + llvm::ArrayRef> context); + + static std::vector + getMatcherCompletions(ArrayRef acceptedTypes, + const Registry &matcherRegistry); + + static VariantMatcher constructMatcher(MatcherCtor ctor, + internal::SourceRange nameRange, + ArrayRef args, + internal::Diagnostics *error); +}; + +} // namespace mlir::query::matcher + +#endif // MLIR_TOOLS_MLIRQUERY_MATCHER_REGISTRYMANAGER_H diff --git a/mlir/lib/Query/Matcher/VariantValue.cpp b/mlir/lib/Query/Matcher/VariantValue.cpp new file mode 100644 index 0000000000000..65bd4bd77bcf8 --- /dev/null +++ b/mlir/lib/Query/Matcher/VariantValue.cpp @@ -0,0 +1,132 @@ +//===--- Variantvalue.cpp -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "mlir/Query/Matcher/VariantValue.h" + +namespace mlir::query::matcher { + +VariantMatcher::Payload::~Payload() = default; + +class VariantMatcher::SinglePayload : public VariantMatcher::Payload { +public: + explicit SinglePayload(DynMatcher matcher) : matcher(std::move(matcher)) {} + + std::optional getDynMatcher() const override { return matcher; } + + std::string getTypeAsString() const override { return "Matcher"; } + +private: + DynMatcher matcher; +}; + +VariantMatcher::VariantMatcher() = default; + +VariantMatcher VariantMatcher::SingleMatcher(DynMatcher matcher) { + return VariantMatcher(std::make_shared(std::move(matcher))); +} + +std::optional VariantMatcher::getDynMatcher() const { + return value ? value->getDynMatcher() : std::nullopt; +} + +void VariantMatcher::reset() { value.reset(); } + +std::string VariantMatcher::getTypeAsString() const { return ""; } + +VariantValue::VariantValue(const VariantValue &other) + : type(ValueType::Nothing) { + *this = other; +} + +VariantValue::VariantValue(const llvm::StringRef string) + : type(ValueType::String) { + value.String = new llvm::StringRef(string); +} + +VariantValue::VariantValue(const VariantMatcher &matcher) + : type(ValueType::Matcher) { + value.Matcher = new VariantMatcher(matcher); +} + +VariantValue::~VariantValue() { reset(); } + +VariantValue &VariantValue::operator=(const VariantValue &other) { + if (this == &other) + return *this; + reset(); + switch (other.type) { + case ValueType::String: + setString(other.getString()); + break; + case ValueType::Matcher: + setMatcher(other.getMatcher()); + break; + case ValueType::Nothing: + type = ValueType::Nothing; + break; + } + return *this; +} + +void VariantValue::reset() { + switch (type) { + case ValueType::String: + delete value.String; + break; + case ValueType::Matcher: + delete value.Matcher; + break; + // Cases that do nothing. + case ValueType::Nothing: + break; + } + type = ValueType::Nothing; +} + +bool VariantValue::isString() const { return type == ValueType::String; } + +const llvm::StringRef &VariantValue::getString() const { + assert(isString()); + return *value.String; +} + +void VariantValue::setString(const llvm::StringRef &newValue) { + reset(); + type = ValueType::String; + value.String = new llvm::StringRef(newValue); +} + +bool VariantValue::isMatcher() const { return type == ValueType::Matcher; } + +const VariantMatcher &VariantValue::getMatcher() const { + assert(isMatcher()); + return *value.Matcher; +} + +void VariantValue::setMatcher(const VariantMatcher &newValue) { + reset(); + type = ValueType::Matcher; + value.Matcher = new VariantMatcher(newValue); +} + +std::string VariantValue::getTypeAsString() const { + switch (type) { + case ValueType::String: + return "String"; + case ValueType::Matcher: + return "Matcher"; + case ValueType::Nothing: + return "Nothing"; + } + llvm_unreachable("Invalid Type"); +} + +} // namespace mlir::query::matcher diff --git a/mlir/lib/Query/Query.cpp b/mlir/lib/Query/Query.cpp new file mode 100644 index 0000000000000..5c42e5a5f0a11 --- /dev/null +++ b/mlir/lib/Query/Query.cpp @@ -0,0 +1,82 @@ +//===---- Query.cpp - -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Query/Query.h" +#include "QueryParser.h" +#include "mlir/Query/Matcher/MatchFinder.h" +#include "mlir/Query/QuerySession.h" +#include "mlir/Support/LogicalResult.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" + +namespace mlir::query { + +QueryRef parse(llvm::StringRef line, const QuerySession &qs) { + return QueryParser::parse(line, qs); +} + +std::vector +complete(llvm::StringRef line, size_t pos, const QuerySession &qs) { + return QueryParser::complete(line, pos, qs); +} + +static void printMatch(llvm::raw_ostream &os, QuerySession &qs, Operation *op, + const std::string &binding) { + auto fileLoc = op->getLoc()->findInstanceOf(); + auto smloc = qs.getSourceManager().FindLocForLineAndColumn( + qs.getBufferId(), fileLoc.getLine(), fileLoc.getColumn()); + qs.getSourceManager().PrintMessage(os, smloc, llvm::SourceMgr::DK_Note, + "\"" + binding + "\" binds here"); +} + +Query::~Query() = default; + +mlir::LogicalResult InvalidQuery::run(llvm::raw_ostream &os, + QuerySession &qs) const { + os << errStr << "\n"; + return mlir::failure(); +} + +mlir::LogicalResult NoOpQuery::run(llvm::raw_ostream &os, + QuerySession &qs) const { + return mlir::success(); +} + +mlir::LogicalResult HelpQuery::run(llvm::raw_ostream &os, + QuerySession &qs) const { + os << "Available commands:\n\n" + " match MATCHER, m MATCHER " + "Match the mlir against the given matcher.\n" + " quit " + "Terminates the query session.\n\n"; + return mlir::success(); +} + +mlir::LogicalResult QuitQuery::run(llvm::raw_ostream &os, + QuerySession &qs) const { + qs.terminate = true; + return mlir::success(); +} + +mlir::LogicalResult MatchQuery::run(llvm::raw_ostream &os, + QuerySession &qs) const { + int matchCount = 0; + std::vector matches = + matcher::MatchFinder().getMatches(qs.getRootOp(), matcher); + os << "\n"; + for (Operation *op : matches) { + os << "Match #" << ++matchCount << ":\n\n"; + // Placeholder "root" binding for the initial draft. + printMatch(os, qs, op, "root"); + } + os << matchCount << (matchCount == 1 ? " match.\n\n" : " matches.\n\n"); + + return mlir::success(); +} + +} // namespace mlir::query diff --git a/mlir/lib/Query/QueryParser.cpp b/mlir/lib/Query/QueryParser.cpp new file mode 100644 index 0000000000000..f43a28569f007 --- /dev/null +++ b/mlir/lib/Query/QueryParser.cpp @@ -0,0 +1,217 @@ +//===---- QueryParser.cpp - mlir-query command parser ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "QueryParser.h" +#include "llvm/ADT/StringSwitch.h" + +namespace mlir::query { + +// Lex any amount of whitespace followed by a "word" (any sequence of +// non-whitespace characters) from the start of region [begin,end). If no word +// is found before end, return StringRef(). begin is adjusted to exclude the +// lexed region. +llvm::StringRef QueryParser::lexWord() { + line = line.drop_while([](char c) { + // Don't trim newlines. + return llvm::StringRef(" \t\v\f\r").contains(c); + }); + + if (line.empty()) + // Even though the line is empty, it contains a pointer and + // a (zero) length. The pointer is used in the LexOrCompleteWord + // code completion. + return line; + + llvm::StringRef word; + if (line.front() == '#') { + word = line.substr(0, 1); + } else { + word = line.take_until([](char c) { + // Don't trim newlines. + return llvm::StringRef(" \t\v\f\r").contains(c); + }); + } + + line = line.drop_front(word.size()); + return word; +} + +// This is the StringSwitch-alike used by LexOrCompleteWord below. See that +// function for details. +template +struct QueryParser::LexOrCompleteWord { + llvm::StringRef word; + llvm::StringSwitch stringSwitch; + + QueryParser *queryParser; + // Set to the completion point offset in word, or StringRef::npos if + // completion point not in word. + size_t wordCompletionPos; + + // Lexes a word and stores it in word. Returns a LexOrCompleteword object + // that can be used like a llvm::StringSwitch, but adds cases as possible + // completions if the lexed word contains the completion point. + LexOrCompleteWord(QueryParser *queryParser, llvm::StringRef &outWord) + : word(queryParser->lexWord()), stringSwitch(word), + queryParser(queryParser), wordCompletionPos(llvm::StringRef::npos) { + outWord = word; + if (queryParser->completionPos && + queryParser->completionPos <= word.data() + word.size()) { + if (queryParser->completionPos < word.data()) + wordCompletionPos = 0; + else + wordCompletionPos = queryParser->completionPos - word.data(); + } + } + + LexOrCompleteWord &Case(llvm::StringLiteral caseStr, const T &value, + bool isCompletion = true) { + + if (wordCompletionPos == llvm::StringRef::npos) + stringSwitch.Case(caseStr, value); + else if (!caseStr.empty() && isCompletion && + wordCompletionPos <= caseStr.size() && + caseStr.substr(0, wordCompletionPos) == + word.substr(0, wordCompletionPos)) { + + queryParser->completions.emplace_back( + (caseStr.substr(wordCompletionPos) + " ").str(), + std::string(caseStr)); + } + return *this; + } + + T Default(T value) { return stringSwitch.Default(value); } +}; + +QueryRef QueryParser::endQuery(QueryRef queryRef) { + llvm::StringRef extra = line; + llvm::StringRef extraTrimmed = extra.drop_while( + [](char c) { return llvm::StringRef(" \t\v\f\r").contains(c); }); + + if ((!extraTrimmed.empty() && extraTrimmed[0] == '\n') || + (extraTrimmed.size() >= 2 && extraTrimmed[0] == '\r' && + extraTrimmed[1] == '\n')) + queryRef->remainingContent = extra; + else { + llvm::StringRef trailingWord = lexWord(); + if (!trailingWord.empty() && trailingWord.front() == '#') { + line = line.drop_until([](char c) { return c == '\n'; }); + line = line.drop_while([](char c) { return c == '\n'; }); + return endQuery(queryRef); + } + if (!trailingWord.empty()) { + return new InvalidQuery("unexpected extra input: '" + extra + "'"); + } + } + return queryRef; +} + +namespace { + +enum class ParsedQueryKind { + Invalid, + Comment, + NoOp, + Help, + Match, + Quit, +}; + +QueryRef +makeInvalidQueryFromDiagnostics(const matcher::internal::Diagnostics &diag) { + std::string errStr; + llvm::raw_string_ostream os(errStr); + diag.print(os); + return new InvalidQuery(os.str()); +} +} // namespace + +QueryRef QueryParser::completeMatcherExpression() { + std::vector comps = + matcher::internal::Parser::completeExpression( + line, completionPos - line.begin(), qs.getRegistryData(), + &qs.namedValues); + for (const auto &comp : comps) { + completions.emplace_back(comp.typedText, comp.matcherDecl); + } + return QueryRef(); +} + +QueryRef QueryParser::doParse() { + + llvm::StringRef commandStr; + ParsedQueryKind qKind = + LexOrCompleteWord(this, commandStr) + .Case("", ParsedQueryKind::NoOp) + .Case("#", ParsedQueryKind::Comment, /*isCompletion=*/false) + .Case("help", ParsedQueryKind::Help) + .Case("m", ParsedQueryKind::Match, /*isCompletion=*/false) + .Case("match", ParsedQueryKind::Match) + .Case("q", ParsedQueryKind::Quit, /*IsCompletion=*/false) + .Case("quit", ParsedQueryKind::Quit) + .Default(ParsedQueryKind::Invalid); + + switch (qKind) { + case ParsedQueryKind::Comment: + case ParsedQueryKind::NoOp: + line = line.drop_until([](char c) { return c == '\n'; }); + line = line.drop_while([](char c) { return c == '\n'; }); + if (line.empty()) + return new NoOpQuery; + return doParse(); + + case ParsedQueryKind::Help: + return endQuery(new HelpQuery); + + case ParsedQueryKind::Quit: + return endQuery(new QuitQuery); + + case ParsedQueryKind::Match: { + if (completionPos) { + return completeMatcherExpression(); + } + + matcher::internal::Diagnostics diag; + auto matcherSource = line.ltrim(); + auto origMatcherSource = matcherSource; + std::optional matcher = + matcher::internal::Parser::parseMatcherExpression( + matcherSource, qs.getRegistryData(), &qs.namedValues, &diag); + if (!matcher) { + return makeInvalidQueryFromDiagnostics(diag); + } + auto actualSource = origMatcherSource.slice(0, origMatcherSource.size() - + matcherSource.size()); + QueryRef query = new MatchQuery(actualSource, *matcher); + query->remainingContent = matcherSource; + return query; + } + + case ParsedQueryKind::Invalid: + return new InvalidQuery("unknown command: " + commandStr); + } + + llvm_unreachable("Invalid query kind"); +} + +QueryRef QueryParser::parse(llvm::StringRef line, const QuerySession &qs) { + return QueryParser(line, qs).doParse(); +} + +std::vector +QueryParser::complete(llvm::StringRef line, size_t pos, + const QuerySession &qs) { + QueryParser queryParser(line, qs); + queryParser.completionPos = line.data() + pos; + + queryParser.doParse(); + return queryParser.completions; +} + +} // namespace mlir::query diff --git a/mlir/lib/Query/QueryParser.h b/mlir/lib/Query/QueryParser.h new file mode 100644 index 0000000000000..e9c30eccecab9 --- /dev/null +++ b/mlir/lib/Query/QueryParser.h @@ -0,0 +1,59 @@ +//===--- QueryParser.h - ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TOOLS_MLIRQUERY_QUERYPARSER_H +#define MLIR_TOOLS_MLIRQUERY_QUERYPARSER_H + +#include "Matcher/Parser.h" +#include "mlir/Query/Query.h" +#include "mlir/Query/QuerySession.h" + +#include "llvm/ADT/StringRef.h" +#include "llvm/LineEditor/LineEditor.h" + +namespace mlir::query { + +class QuerySession; + +class QueryParser { +public: + // Parse line as a query and return a QueryRef representing the query, which + // may be an InvalidQuery. + static QueryRef parse(llvm::StringRef line, const QuerySession &qs); + + static std::vector + complete(llvm::StringRef line, size_t pos, const QuerySession &qs); + +private: + QueryParser(llvm::StringRef line, const QuerySession &qs) + : line(line), completionPos(nullptr), qs(qs) {} + + llvm::StringRef lexWord(); + + template + struct LexOrCompleteWord; + + QueryRef completeMatcherExpression(); + + QueryRef endQuery(QueryRef queryRef); + + // Parse [begin, end) and returns a reference to the parsed query object, + // which may be an InvalidQuery if a parse error occurs. + QueryRef doParse(); + + llvm::StringRef line; + + const char *completionPos; + std::vector completions; + + const QuerySession &qs; +}; + +} // namespace mlir::query + +#endif // MLIR_TOOLS_MLIRQUERY_QUERYPARSER_H diff --git a/mlir/lib/Tools/CMakeLists.txt b/mlir/lib/Tools/CMakeLists.txt index 6175a1ce5f8d1..01270fa4b0fc3 100644 --- a/mlir/lib/Tools/CMakeLists.txt +++ b/mlir/lib/Tools/CMakeLists.txt @@ -2,6 +2,7 @@ add_subdirectory(lsp-server-support) add_subdirectory(mlir-lsp-server) add_subdirectory(mlir-opt) add_subdirectory(mlir-pdll-lsp-server) +add_subdirectory(mlir-query) add_subdirectory(mlir-reduce) add_subdirectory(mlir-tblgen) add_subdirectory(mlir-translate) diff --git a/mlir/lib/Tools/mlir-query/CMakeLists.txt b/mlir/lib/Tools/mlir-query/CMakeLists.txt new file mode 100644 index 0000000000000..b81b02d42bfca --- /dev/null +++ b/mlir/lib/Tools/mlir-query/CMakeLists.txt @@ -0,0 +1,13 @@ +set(LLVM_LINK_COMPONENTS + lineeditor + ) + +add_mlir_library(MLIRQueryLib + MlirQueryMain.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Tools/mlir-query + + LINK_LIBS PUBLIC + MLIRQuery + ) diff --git a/mlir/lib/Tools/mlir-query/MlirQueryMain.cpp b/mlir/lib/Tools/mlir-query/MlirQueryMain.cpp new file mode 100644 index 0000000000000..15de16a8774bc --- /dev/null +++ b/mlir/lib/Tools/mlir-query/MlirQueryMain.cpp @@ -0,0 +1,115 @@ +//===- MlirQueryMain.cpp - MLIR Query main --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the general framework of the MLIR query tool. It +// parses the command line arguments, parses the MLIR file and outputs the query +// results. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Tools/mlir-query/MlirQueryMain.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/Parser/Parser.h" +#include "mlir/Query/Query.h" +#include "mlir/Query/QuerySession.h" +#include "mlir/Support/FileUtilities.h" +#include "mlir/Support/LogicalResult.h" +#include "llvm/LineEditor/LineEditor.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/SourceMgr.h" + +//===----------------------------------------------------------------------===// +// Query Parser +//===----------------------------------------------------------------------===// + +mlir::LogicalResult +mlir::mlirQueryMain(int argc, char **argv, MLIRContext &context, + const mlir::query::matcher::Registry &matcherRegistry) { + + // Override the default '-h' and use the default PrintHelpMessage() which + // won't print options in categories. + static llvm::cl::opt help("h", llvm::cl::desc("Alias for -help"), + llvm::cl::Hidden); + + static llvm::cl::OptionCategory mlirQueryCategory("mlir-query options"); + + static llvm::cl::list commands( + "c", llvm::cl::desc("Specify command to run"), + llvm::cl::value_desc("command"), llvm::cl::cat(mlirQueryCategory)); + + static llvm::cl::opt inputFilename( + llvm::cl::Positional, llvm::cl::desc(""), + llvm::cl::cat(mlirQueryCategory)); + + static llvm::cl::opt noImplicitModule{ + "no-implicit-module", + llvm::cl::desc( + "Disable implicit addition of a top-level module op during parsing"), + llvm::cl::init(false)}; + + static llvm::cl::opt allowUnregisteredDialects( + "allow-unregistered-dialect", + llvm::cl::desc("Allow operation with no registered dialects"), + llvm::cl::init(false)); + + llvm::cl::HideUnrelatedOptions(mlirQueryCategory); + + llvm::InitLLVM y(argc, argv); + + llvm::cl::ParseCommandLineOptions(argc, argv, "MLIR test case query tool.\n"); + + if (help) { + llvm::cl::PrintHelpMessage(); + return mlir::success(); + } + + // Set up the input file. + std::string errorMessage; + auto file = openInputFile(inputFilename, &errorMessage); + if (!file) { + llvm::errs() << errorMessage << "\n"; + return mlir::failure(); + } + + auto sourceMgr = llvm::SourceMgr(); + auto bufferId = sourceMgr.AddNewSourceBuffer(std::move(file), SMLoc()); + + context.allowUnregisteredDialects(allowUnregisteredDialects); + + // Parse the input MLIR file. + OwningOpRef opRef = + noImplicitModule ? parseSourceFile(sourceMgr, &context) + : parseSourceFile(sourceMgr, &context); + if (!opRef) + return mlir::failure(); + + mlir::query::QuerySession qs(opRef.get(), sourceMgr, bufferId, + matcherRegistry); + if (!commands.empty()) { + for (auto &command : commands) { + mlir::query::QueryRef queryRef = mlir::query::parse(command, qs); + if (mlir::failed(queryRef->run(llvm::outs(), qs))) + return mlir::failure(); + } + } else { + llvm::LineEditor le("mlir-query"); + le.setListCompleter([&qs](llvm::StringRef line, size_t pos) { + return mlir::query::complete(line, pos, qs); + }); + while (std::optional line = le.readLine()) { + mlir::query::QueryRef queryRef = mlir::query::parse(*line, qs); + (void)queryRef->run(llvm::outs(), qs); + llvm::outs().flush(); + if (qs.terminate) + break; + } + } + + return mlir::success(); +} diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt index bf143d036c2f6..6fc9ae0f3fc58 100644 --- a/mlir/test/CMakeLists.txt +++ b/mlir/test/CMakeLists.txt @@ -104,6 +104,7 @@ set(MLIR_TEST_DEPENDS mlir-pdll-lsp-server mlir-opt mlir-pdll + mlir-query mlir-reduce mlir-tblgen mlir-translate diff --git a/mlir/test/mlir-query/simple-test.mlir b/mlir/test/mlir-query/simple-test.mlir new file mode 100644 index 0000000000000..a4d006598767b --- /dev/null +++ b/mlir/test/mlir-query/simple-test.mlir @@ -0,0 +1,16 @@ +// RUN: mlir-query %s -c "m isConstantOp()" | FileCheck %s + +// CHECK: {{.*}}.mlir:5:13: note: "root" binds here +func.func @simple1() { + %c1_i32 = arith.constant 1 : i32 + return +} + +// CHECK: {{.*}}.mlir:12:11: note: "root" binds here +// CHECK: {{.*}}.mlir:13:11: note: "root" binds here +func.func @simple2() { + %cst1 = arith.constant 1.0 : f32 + %cst2 = arith.constant 2.0 : f32 + %add = arith.addf %cst1, %cst2 : f32 + return +} diff --git a/mlir/tools/CMakeLists.txt b/mlir/tools/CMakeLists.txt index e9a1e4d625172..a01f74f737e1b 100644 --- a/mlir/tools/CMakeLists.txt +++ b/mlir/tools/CMakeLists.txt @@ -2,6 +2,7 @@ add_subdirectory(mlir-lsp-server) add_subdirectory(mlir-opt) add_subdirectory(mlir-parser-fuzzer) add_subdirectory(mlir-pdll-lsp-server) +add_subdirectory(mlir-query) add_subdirectory(mlir-reduce) add_subdirectory(mlir-shlib) add_subdirectory(mlir-spirv-cpu-runner) diff --git a/mlir/tools/mlir-query/CMakeLists.txt b/mlir/tools/mlir-query/CMakeLists.txt new file mode 100644 index 0000000000000..ef2e5a84b5569 --- /dev/null +++ b/mlir/tools/mlir-query/CMakeLists.txt @@ -0,0 +1,20 @@ +get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) + +if(MLIR_INCLUDE_TESTS) + set(test_libs + MLIRTestDialect + ) +endif() + +add_mlir_tool(mlir-query + mlir-query.cpp + ) +llvm_update_compile_flags(mlir-query) +target_link_libraries(mlir-query + PRIVATE + ${dialect_libs} + ${test_libs} + MLIRQueryLib + ) + +mlir_check_link_libraries(mlir-query) diff --git a/mlir/tools/mlir-query/mlir-query.cpp b/mlir/tools/mlir-query/mlir-query.cpp new file mode 100644 index 0000000000000..0ed4f94d5802b --- /dev/null +++ b/mlir/tools/mlir-query/mlir-query.cpp @@ -0,0 +1,63 @@ +//===- mlir-query.cpp - MLIR Query Driver ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a command line utility that queries a file from/to MLIR using one +// of the registered queries. +// +//===----------------------------------------------------------------------===// + +#include "mlir/IR/Dialect.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/Matchers.h" +#include "mlir/InitAllDialects.h" +#include "mlir/Query/Matcher/Registry.h" +#include "mlir/Tools/mlir-query/MlirQueryMain.h" + +using namespace mlir; + +// This is needed because these matchers are defined as overloaded functions. +using HasOpAttrName = detail::AttrOpMatcher(StringRef); +using HasOpName = detail::NameOpMatcher(StringRef); +using IsConstantOp = detail::constant_op_matcher(); + +namespace test { +#ifdef MLIR_INCLUDE_TESTS +void registerTestDialect(DialectRegistry &); +#endif +} // namespace test + +int main(int argc, char **argv) { + + DialectRegistry dialectRegistry; + registerAllDialects(dialectRegistry); + + query::matcher::Registry matcherRegistry; + + // Matchers registered in alphabetical order for consistency: + matcherRegistry.registerMatcher("hasOpAttrName", + static_cast(m_Attr)); + matcherRegistry.registerMatcher("hasOpName", static_cast(m_Op)); + matcherRegistry.registerMatcher("isConstantOp", + static_cast(m_Constant)); + matcherRegistry.registerMatcher("isNegInfFloat", m_NegInfFloat); + matcherRegistry.registerMatcher("isNegZeroFloat", m_NegZeroFloat); + matcherRegistry.registerMatcher("isNonZero", m_NonZero); + matcherRegistry.registerMatcher("isOne", m_One); + matcherRegistry.registerMatcher("isOneFloat", m_OneFloat); + matcherRegistry.registerMatcher("isPosInfFloat", m_PosInfFloat); + matcherRegistry.registerMatcher("isPosZeroFloat", m_PosZeroFloat); + matcherRegistry.registerMatcher("isZero", m_Zero); + matcherRegistry.registerMatcher("isZeroFloat", m_AnyZeroFloat); + +#ifdef MLIR_INCLUDE_TESTS + test::registerTestDialect(dialectRegistry); +#endif + MLIRContext context(dialectRegistry); + + return failed(mlirQueryMain(argc, argv, context, matcherRegistry)); +} From a669a237c45a515bea0d258cbbecdbbb3170d57a Mon Sep 17 00:00:00 2001 From: walter erquinigo Date: Fri, 13 Oct 2023 17:22:47 -0400 Subject: [PATCH 108/720] [LLDB] Fix buildbots https://lab.llvm.org/buildbot/#/builders/96/builds/46935 https://lab.llvm.org/buildbot/#/builders/68/builds/61651 are failing because of some namespace changes introduced by https://reviews.llvm.org/rG1673a1ba5dec --- lldb/unittests/Expression/DWARFExpressionTest.cpp | 1 + lldb/unittests/Symbol/SymtabTest.cpp | 1 + lldb/unittests/Symbol/TestLineEntry.cpp | 3 ++- .../unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp | 1 + lldb/unittests/SymbolFile/DWARF/DWARFDIETest.cpp | 1 + lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp | 1 + lldb/unittests/SymbolFile/DWARF/DWARFUnitTest.cpp | 1 + lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp | 1 + lldb/unittests/SymbolFile/DWARF/XcodeSDKModuleTests.cpp | 1 + lldb/unittests/TestingSupport/Symbol/YAMLModuleTester.cpp | 1 + lldb/unittests/TestingSupport/Symbol/YAMLModuleTester.h | 6 +++--- 11 files changed, 14 insertions(+), 4 deletions(-) diff --git a/lldb/unittests/Expression/DWARFExpressionTest.cpp b/lldb/unittests/Expression/DWARFExpressionTest.cpp index b8b5b39422a4f..8d77d6b2585f1 100644 --- a/lldb/unittests/Expression/DWARFExpressionTest.cpp +++ b/lldb/unittests/Expression/DWARFExpressionTest.cpp @@ -25,6 +25,7 @@ using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; static llvm::Expected Evaluate(llvm::ArrayRef expr, lldb::ModuleSP module_sp = {}, diff --git a/lldb/unittests/Symbol/SymtabTest.cpp b/lldb/unittests/Symbol/SymtabTest.cpp index e6b78eb5cd763..7b8892e5b5c0f 100644 --- a/lldb/unittests/Symbol/SymtabTest.cpp +++ b/lldb/unittests/Symbol/SymtabTest.cpp @@ -27,6 +27,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; class SymtabTest : public testing::Test { SubsystemRAII File = TestFile::fromYaml(yaml_data); diff --git a/lldb/unittests/TestingSupport/Symbol/YAMLModuleTester.h b/lldb/unittests/TestingSupport/Symbol/YAMLModuleTester.h index 3021ca7c9f8c7..37d9025cbcebf 100644 --- a/lldb/unittests/TestingSupport/Symbol/YAMLModuleTester.h +++ b/lldb/unittests/TestingSupport/Symbol/YAMLModuleTester.h @@ -26,16 +26,16 @@ namespace lldb_private { class YAMLModuleTester { protected: SubsystemRAII + plugin::dwarf::SymbolFileDWARF> subsystems; std::optional m_file; lldb::ModuleSP m_module_sp; - DWARFUnit *m_dwarf_unit; + plugin::dwarf::DWARFUnit *m_dwarf_unit; public: /// Parse the debug info sections from the YAML description. YAMLModuleTester(llvm::StringRef yaml_data, size_t cu_index = 0); - DWARFUnit *GetDwarfUnit() const { return m_dwarf_unit; } + plugin::dwarf::DWARFUnit *GetDwarfUnit() const { return m_dwarf_unit; } lldb::ModuleSP GetModule() const { return m_module_sp; } }; From 9d1a3fdd6278154fb4e7706419095ac7bfd72dcb Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 13 Oct 2023 14:44:17 -0700 Subject: [PATCH 109/720] [RISCV][GISel] Add isel patterns for ADDIW/SRLIW/SRAIW/SLLIW and remove custom selection. (#68470) I had trouble getting patterns working previously because GISel was using an i32 immediate, but the instructions expected an i64 immediate because SelectionDAG doesn't have i32 as a legal type yet. After looking at other targets like AMDGPU, I discovered that I could use a SDNodeXForm and a cast to get the type checking in tablegen to allow me to do it. --- .../RISCV/GISel/RISCVInstructionSelector.cpp | 87 +++---------------- llvm/lib/Target/RISCV/RISCVGISel.td | 28 +++++- 2 files changed, 38 insertions(+), 77 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 59c95f9c740b5..3a86dcbd86a0a 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -65,9 +65,6 @@ class RISCVInstructionSelector : public InstructionSelector { bool selectSelect(MachineInstr &MI, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) const; - bool earlySelectShift(unsigned Opc, MachineInstr &I, MachineIRBuilder &MIB, - const MachineRegisterInfo &MRI); - ComplexRendererFns selectShiftMask(MachineOperand &Root) const; ComplexRendererFns selectAddrRegImm(MachineOperand &Root) const; @@ -76,6 +73,8 @@ class RISCVInstructionSelector : public InstructionSelector { int OpIdx) const; void renderImmPlus1(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const; + void renderImm(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; const RISCVSubtarget &STI; const RISCVInstrInfo &TII; @@ -131,30 +130,6 @@ RISCVInstructionSelector::selectAddrRegImm(MachineOperand &Root) const { [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }}}; } -// Tablegen doesn't allow us to write SRLIW/SRAIW/SLLIW patterns because the -// immediate Operand has type XLenVT. GlobalISel wants it to be i32. -bool RISCVInstructionSelector::earlySelectShift( - unsigned Opc, MachineInstr &I, MachineIRBuilder &MIB, - const MachineRegisterInfo &MRI) { - if (!Subtarget->is64Bit()) - return false; - - LLT Ty = MRI.getType(I.getOperand(0).getReg()); - if (!Ty.isScalar() || Ty.getSizeInBits() != 32) - return false; - - std::optional CstVal = - getIConstantVRegSExtVal(I.getOperand(2).getReg(), MRI); - if (!CstVal || !isUInt<5>(*CstVal)) - return false; - - auto NewI = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, - {I.getOperand(1).getReg()}) - .addImm(*CstVal); - I.eraseFromParent(); - return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); -} - bool RISCVInstructionSelector::select(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); @@ -199,55 +174,6 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) { return true; } - switch (Opc) { - case TargetOpcode::G_ADD: { - // Tablegen doesn't pick up the ADDIW pattern because i32 isn't a legal - // type for RV64 in SelectionDAG. Manually select it here. - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - if (Subtarget->is64Bit() && Ty.isScalar() && Ty.getSizeInBits() == 32) { - std::optional CstVal = - getIConstantVRegSExtVal(MI.getOperand(2).getReg(), MRI); - if (CstVal && isInt<12>(*CstVal)) { - auto NewI = MIB.buildInstr(RISCV::ADDIW, {MI.getOperand(0).getReg()}, - {MI.getOperand(1).getReg()}) - .addImm(*CstVal); - MI.eraseFromParent(); - return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); - } - } - break; - } - case TargetOpcode::G_SUB: { - // Tablegen doesn't pick up the ADDIW pattern because i32 isn't a legal - // type for RV64 in SelectionDAG. Manually select it here. - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - if (Subtarget->is64Bit() && Ty.isScalar() && Ty.getSizeInBits() == 32) { - std::optional CstVal = - getIConstantVRegSExtVal(MI.getOperand(2).getReg(), MRI); - if (CstVal && ((isInt<12>(*CstVal) && *CstVal != -2048) || *CstVal == 2048)) { - auto NewI = MIB.buildInstr(RISCV::ADDIW, {MI.getOperand(0).getReg()}, - {MI.getOperand(1).getReg()}) - .addImm(-*CstVal); - MI.eraseFromParent(); - return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); - } - } - break; - } - case TargetOpcode::G_ASHR: - if (earlySelectShift(RISCV::SRAIW, MI, MIB, MRI)) - return true; - break; - case TargetOpcode::G_LSHR: - if (earlySelectShift(RISCV::SRLIW, MI, MIB, MRI)) - return true; - break; - case TargetOpcode::G_SHL: - if (earlySelectShift(RISCV::SLLIW, MI, MIB, MRI)) - return true; - break; - } - if (selectImpl(MI, *CoverageInfo)) return true; @@ -323,6 +249,15 @@ void RISCVInstructionSelector::renderImmPlus1(MachineInstrBuilder &MIB, MIB.addImm(CstVal + 1); } +void RISCVInstructionSelector::renderImm(MachineInstrBuilder &MIB, + const MachineInstr &MI, + int OpIdx) const { + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && + "Expected G_CONSTANT"); + int64_t CstVal = MI.getOperand(1).getCImm()->getSExtValue(); + MIB.addImm(CstVal); +} + const TargetRegisterClass *RISCVInstructionSelector::getRegClassForTypeOnBank( LLT Ty, const RegisterBank &RB) const { if (RB.getID() == RISCV::GPRRegBankID) { diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index b20c27517b490..1e22ba8a930ed 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -18,6 +18,12 @@ include "RISCVCombine.td" def simm12Plus1 : ImmLeaf(Imm) && Imm != -2048) || Imm == 2048;}]>; +def simm12Plus1i32 : ImmLeaf(Imm) && Imm != -2048) || Imm == 2048;}]>; + +def simm12i32 : ImmLeaf(Imm);}]>; + +def uimm5i32 : ImmLeaf(Imm);}]>; // FIXME: This doesn't check that the G_CONSTANT we're deriving the immediate // from is only used once @@ -43,6 +49,14 @@ def GIAddrRegImm : GIComplexOperandMatcher, GIComplexPatternEquiv; +// Convert from i32 immediate to i64 target immediate to make SelectionDAG type +// checking happy so we can use ADDIW which expects an XLen immediate. +def as_i64imm : SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); +}]>; +def gi_as_i64imm : GICustomOperandRenderer<"renderImm">, + GISDNodeXFormEquiv; + // FIXME: This is labelled as handling 's32', however the ComplexPattern it // refers to handles both i32 and i64 based on the HwMode. Currently this LLT // parameter appears to be ignored so this pattern works for both, however we @@ -60,11 +74,23 @@ let Predicates = [IsRV64] in { def : Pat<(i32 (add GPR:$rs1, GPR:$rs2)), (ADDW GPR:$rs1, GPR:$rs2)>; def : Pat<(i32 (sub GPR:$rs1, GPR:$rs2)), (SUBW GPR:$rs1, GPR:$rs2)>; +def : Pat<(i32 (add GPR:$rs1, simm12i32:$imm)), + (ADDIW GPR:$rs1, (i64 (as_i64imm $imm)))>; +def : Pat<(i32 (sub GPR:$rs1, simm12Plus1i32:$imm)), + (ADDIW GPR:$rs1, (i64 (NegImm $imm)))>; + def : Pat<(i32 (shl GPR:$rs1, (i32 GPR:$rs2))), (SLLW GPR:$rs1, GPR:$rs2)>; def : Pat<(i32 (sra GPR:$rs1, (i32 GPR:$rs2))), (SRAW GPR:$rs1, GPR:$rs2)>; def : Pat<(i32 (srl GPR:$rs1, (i32 GPR:$rs2))), (SRLW GPR:$rs1, GPR:$rs2)>; -def: Pat<(i64 (sext i32:$rs)), (ADDIW GPR:$rs, 0)>; +def : Pat<(i32 (shl GPR:$rs1, uimm5i32:$imm)), + (SLLIW GPR:$rs1, (i64 (as_i64imm $imm)))>; +def : Pat<(i32 (sra GPR:$rs1, uimm5i32:$imm)), + (SRAIW GPR:$rs1, (i64 (as_i64imm $imm)))>; +def : Pat<(i32 (srl GPR:$rs1, uimm5i32:$imm)), + (SRLIW GPR:$rs1, (i64 (as_i64imm $imm)))>; + +def : Pat<(i64 (sext i32:$rs)), (ADDIW GPR:$rs, 0)>; } let Predicates = [HasStdExtMOrZmmul, IsRV64] in { From 53c81a8c165dc3d71eea10ae63daf20e31fc8afa Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sat, 14 Oct 2023 06:38:17 +0800 Subject: [PATCH 110/720] [RISCV][SDAG] Fix constant narrowing when narrowing loads (#69015) When narrowing logic ops(OR/XOR) with constant rhs, `DAGCombiner` will fixup the constant rhs node. It is incorrect when lhs is also a constant. For example, we will incorrectly replace `xor OpaqueConstant:i64<8191>, Constant:i64<-1>` with `xor (and OpaqueConstant:i64<8191>, Constant:i64<65535>), Constant:i64<-1>`. Fixes #68855. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 13 ++++++--- llvm/test/CodeGen/RISCV/pr68855.ll | 28 +++++++++++++++++++ 2 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/pr68855.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1021b07da1ac6..73438113651f5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6635,12 +6635,17 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) { SDValue Op1 = LogicN->getOperand(1); if (isa(Op0)) - std::swap(Op0, Op1); + Op0 = + DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp); - SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), - Op1, MaskOp); + if (isa(Op1)) + Op1 = + DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp); - DAG.UpdateNodeOperands(LogicN, Op0, And); + if (isa(Op0) && !isa(Op1)) + std::swap(Op0, Op1); + + DAG.UpdateNodeOperands(LogicN, Op0, Op1); } // Create narrow loads. diff --git a/llvm/test/CodeGen/RISCV/pr68855.ll b/llvm/test/CodeGen/RISCV/pr68855.ll new file mode 100644 index 0000000000000..e9d1f6c2d1b2c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/pr68855.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc < %s -mtriple=riscv64 | FileCheck %s + +define i16 @narrow_load(ptr %p1, ptr %p2) { +; CHECK-LABEL: narrow_load: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lhu a2, 0(a0) +; CHECK-NEXT: lui a3, 2 +; CHECK-NEXT: addiw a3, a3, -1 +; CHECK-NEXT: xor a2, a2, a3 +; CHECK-NEXT: lui a4, 16 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: xor a4, a3, a4 +; CHECK-NEXT: or a2, a2, a4 +; CHECK-NEXT: sw a2, 0(a1) +; CHECK-NEXT: lhu a0, 0(a0) +; CHECK-NEXT: and a0, a0, a3 +; CHECK-NEXT: ret +entry: + %bf.load = load i16, ptr %p1, align 2 + %bf.clear = and i16 %bf.load, 8191 + %not = xor i16 %bf.clear, -1 + %conv1 = zext i16 %not to i32 + store i32 %conv1, ptr %p2, align 4 + %bf.load2 = load i16, ptr %p1, align 2 + %bf.clear3 = and i16 %bf.load2, 8191 + ret i16 %bf.clear3 +} From 9dbfd5828e3b23d3a752641e073ecfae04674f7f Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 13 Oct 2023 16:13:41 -0700 Subject: [PATCH 111/720] [CodeLayout] CDSortImpl: remove two conditions that cannot trigger. NFC --- llvm/lib/Transforms/Utils/CodeLayout.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp index 620b52b69c31d..d9e302d8b4fa5 100644 --- a/llvm/lib/Transforms/Utils/CodeLayout.cpp +++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp @@ -1139,13 +1139,6 @@ class CDSortImpl { // Extract the best (top) edge for merging. ChainEdge *BestEdge = *Queue.begin(); Queue.erase(Queue.begin()); - // Ignore self-edges. - if (BestEdge->isSelfEdge()) - continue; - // Ignore edges with non-positive gains. - if (BestEdge->gain() <= EPS) - continue; - ChainT *BestSrcChain = BestEdge->srcChain(); ChainT *BestDstChain = BestEdge->dstChain(); From 2f80dfc07978cc9bd48868ca1b6692f10f5bf24b Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Fri, 13 Oct 2023 14:49:37 -0700 Subject: [PATCH 112/720] [GlobalISel][NFC] Add distinct CHECK/SDAG/GISEL run lines to test. --- .../AArch64/arm64-indexed-vector-ldst.ll | 8181 ++++++++--------- 1 file changed, 4062 insertions(+), 4119 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index 7d73e1c6c1d7f..1b9583464edea 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s --check-prefix=CHECK -; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s --check-prefix=CHECK-GISEL +; RUN: llc -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s --check-prefixes=CHECK,SDAG +; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-GISEL ; CHECK-GISEL-NOT: warning: Instruction selection used fallback path for test_v8i8_pre_load ; CHECK-GISEL-NOT: warning: Instruction selection used fallback path for test_v8i8_post_load @@ -629,12 +629,12 @@ @ptr = global ptr null define <8 x i8> @test_v8i8_pre_load(ptr %addr) { -; CHECK-LABEL: test_v8i8_pre_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr d0, [x0, #40]! -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_pre_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr d0, [x0, #40]! +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_pre_load: ; CHECK-GISEL: ; %bb.0: @@ -650,12 +650,12 @@ define <8 x i8> @test_v8i8_pre_load(ptr %addr) { } define <8 x i8> @test_v8i8_post_load(ptr %addr) { -; CHECK-LABEL: test_v8i8_post_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr d0, [x0], #40 -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr d0, [x0], #40 +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_load: ; CHECK-GISEL: ; %bb.0: @@ -671,12 +671,12 @@ define <8 x i8> @test_v8i8_post_load(ptr %addr) { } define void @test_v8i8_pre_store(<8 x i8> %in, ptr %addr) { -; CHECK-LABEL: test_v8i8_pre_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str d0, [x0, #40]! -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_pre_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str d0, [x0, #40]! +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_pre_store: ; CHECK-GISEL: ; %bb.0: @@ -692,12 +692,12 @@ define void @test_v8i8_pre_store(<8 x i8> %in, ptr %addr) { } define void @test_v8i8_post_store(<8 x i8> %in, ptr %addr) { -; CHECK-LABEL: test_v8i8_post_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str d0, [x0], #40 -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str d0, [x0], #40 +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_store: ; CHECK-GISEL: ; %bb.0: @@ -713,12 +713,12 @@ define void @test_v8i8_post_store(<8 x i8> %in, ptr %addr) { } define <4 x i16> @test_v4i16_pre_load(ptr %addr) { -; CHECK-LABEL: test_v4i16_pre_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr d0, [x0, #40]! -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_pre_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr d0, [x0, #40]! +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_pre_load: ; CHECK-GISEL: ; %bb.0: @@ -734,12 +734,12 @@ define <4 x i16> @test_v4i16_pre_load(ptr %addr) { } define <4 x i16> @test_v4i16_post_load(ptr %addr) { -; CHECK-LABEL: test_v4i16_post_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr d0, [x0], #40 -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr d0, [x0], #40 +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_load: ; CHECK-GISEL: ; %bb.0: @@ -755,12 +755,12 @@ define <4 x i16> @test_v4i16_post_load(ptr %addr) { } define void @test_v4i16_pre_store(<4 x i16> %in, ptr %addr) { -; CHECK-LABEL: test_v4i16_pre_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str d0, [x0, #40]! -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_pre_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str d0, [x0, #40]! +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_pre_store: ; CHECK-GISEL: ; %bb.0: @@ -776,12 +776,12 @@ define void @test_v4i16_pre_store(<4 x i16> %in, ptr %addr) { } define void @test_v4i16_post_store(<4 x i16> %in, ptr %addr) { -; CHECK-LABEL: test_v4i16_post_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str d0, [x0], #40 -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str d0, [x0], #40 +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_store: ; CHECK-GISEL: ; %bb.0: @@ -797,12 +797,12 @@ define void @test_v4i16_post_store(<4 x i16> %in, ptr %addr) { } define <2 x i32> @test_v2i32_pre_load(ptr %addr) { -; CHECK-LABEL: test_v2i32_pre_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr d0, [x0, #40]! -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_pre_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr d0, [x0, #40]! +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_pre_load: ; CHECK-GISEL: ; %bb.0: @@ -818,12 +818,12 @@ define <2 x i32> @test_v2i32_pre_load(ptr %addr) { } define <2 x i32> @test_v2i32_post_load(ptr %addr) { -; CHECK-LABEL: test_v2i32_post_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr d0, [x0], #40 -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr d0, [x0], #40 +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_load: ; CHECK-GISEL: ; %bb.0: @@ -839,12 +839,12 @@ define <2 x i32> @test_v2i32_post_load(ptr %addr) { } define void @test_v2i32_pre_store(<2 x i32> %in, ptr %addr) { -; CHECK-LABEL: test_v2i32_pre_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str d0, [x0, #40]! -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_pre_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str d0, [x0, #40]! +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_pre_store: ; CHECK-GISEL: ; %bb.0: @@ -860,12 +860,12 @@ define void @test_v2i32_pre_store(<2 x i32> %in, ptr %addr) { } define void @test_v2i32_post_store(<2 x i32> %in, ptr %addr) { -; CHECK-LABEL: test_v2i32_post_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str d0, [x0], #40 -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str d0, [x0], #40 +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_store: ; CHECK-GISEL: ; %bb.0: @@ -881,12 +881,12 @@ define void @test_v2i32_post_store(<2 x i32> %in, ptr %addr) { } define <2 x float> @test_v2f32_pre_load(ptr %addr) { -; CHECK-LABEL: test_v2f32_pre_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr d0, [x0, #40]! -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_pre_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr d0, [x0, #40]! +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_pre_load: ; CHECK-GISEL: ; %bb.0: @@ -902,12 +902,12 @@ define <2 x float> @test_v2f32_pre_load(ptr %addr) { } define <2 x float> @test_v2f32_post_load(ptr %addr) { -; CHECK-LABEL: test_v2f32_post_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr d0, [x0], #40 -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr d0, [x0], #40 +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_load: ; CHECK-GISEL: ; %bb.0: @@ -923,12 +923,12 @@ define <2 x float> @test_v2f32_post_load(ptr %addr) { } define void @test_v2f32_pre_store(<2 x float> %in, ptr %addr) { -; CHECK-LABEL: test_v2f32_pre_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str d0, [x0, #40]! -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_pre_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str d0, [x0, #40]! +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_pre_store: ; CHECK-GISEL: ; %bb.0: @@ -944,12 +944,12 @@ define void @test_v2f32_pre_store(<2 x float> %in, ptr %addr) { } define void @test_v2f32_post_store(<2 x float> %in, ptr %addr) { -; CHECK-LABEL: test_v2f32_post_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str d0, [x0], #40 -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str d0, [x0], #40 +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_store: ; CHECK-GISEL: ; %bb.0: @@ -965,12 +965,12 @@ define void @test_v2f32_post_store(<2 x float> %in, ptr %addr) { } define <1 x i64> @test_v1i64_pre_load(ptr %addr) { -; CHECK-LABEL: test_v1i64_pre_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr d0, [x0, #40]! -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_pre_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr d0, [x0, #40]! +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_pre_load: ; CHECK-GISEL: ; %bb.0: @@ -986,12 +986,12 @@ define <1 x i64> @test_v1i64_pre_load(ptr %addr) { } define <1 x i64> @test_v1i64_post_load(ptr %addr) { -; CHECK-LABEL: test_v1i64_post_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr d0, [x0], #40 -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr d0, [x0], #40 +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_load: ; CHECK-GISEL: ; %bb.0: @@ -1007,12 +1007,12 @@ define <1 x i64> @test_v1i64_post_load(ptr %addr) { } define void @test_v1i64_pre_store(<1 x i64> %in, ptr %addr) { -; CHECK-LABEL: test_v1i64_pre_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str d0, [x0, #40]! -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_pre_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str d0, [x0, #40]! +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_pre_store: ; CHECK-GISEL: ; %bb.0: @@ -1028,12 +1028,12 @@ define void @test_v1i64_pre_store(<1 x i64> %in, ptr %addr) { } define void @test_v1i64_post_store(<1 x i64> %in, ptr %addr) { -; CHECK-LABEL: test_v1i64_post_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str d0, [x0], #40 -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str d0, [x0], #40 +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_store: ; CHECK-GISEL: ; %bb.0: @@ -1049,12 +1049,12 @@ define void @test_v1i64_post_store(<1 x i64> %in, ptr %addr) { } define <16 x i8> @test_v16i8_pre_load(ptr %addr) { -; CHECK-LABEL: test_v16i8_pre_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr q0, [x0, #80]! -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_pre_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr q0, [x0, #80]! +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_pre_load: ; CHECK-GISEL: ; %bb.0: @@ -1070,12 +1070,12 @@ define <16 x i8> @test_v16i8_pre_load(ptr %addr) { } define <16 x i8> @test_v16i8_post_load(ptr %addr) { -; CHECK-LABEL: test_v16i8_post_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr q0, [x0], #80 -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr q0, [x0], #80 +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_load: ; CHECK-GISEL: ; %bb.0: @@ -1091,12 +1091,12 @@ define <16 x i8> @test_v16i8_post_load(ptr %addr) { } define void @test_v16i8_pre_store(<16 x i8> %in, ptr %addr) { -; CHECK-LABEL: test_v16i8_pre_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str q0, [x0, #80]! -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_pre_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str q0, [x0, #80]! +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_pre_store: ; CHECK-GISEL: ; %bb.0: @@ -1112,12 +1112,12 @@ define void @test_v16i8_pre_store(<16 x i8> %in, ptr %addr) { } define void @test_v16i8_post_store(<16 x i8> %in, ptr %addr) { -; CHECK-LABEL: test_v16i8_post_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str q0, [x0], #80 -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str q0, [x0], #80 +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_store: ; CHECK-GISEL: ; %bb.0: @@ -1133,12 +1133,12 @@ define void @test_v16i8_post_store(<16 x i8> %in, ptr %addr) { } define <8 x i16> @test_v8i16_pre_load(ptr %addr) { -; CHECK-LABEL: test_v8i16_pre_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr q0, [x0, #80]! -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_pre_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr q0, [x0, #80]! +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_pre_load: ; CHECK-GISEL: ; %bb.0: @@ -1154,12 +1154,12 @@ define <8 x i16> @test_v8i16_pre_load(ptr %addr) { } define <8 x i16> @test_v8i16_post_load(ptr %addr) { -; CHECK-LABEL: test_v8i16_post_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr q0, [x0], #80 -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr q0, [x0], #80 +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_load: ; CHECK-GISEL: ; %bb.0: @@ -1175,12 +1175,12 @@ define <8 x i16> @test_v8i16_post_load(ptr %addr) { } define void @test_v8i16_pre_store(<8 x i16> %in, ptr %addr) { -; CHECK-LABEL: test_v8i16_pre_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str q0, [x0, #80]! -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_pre_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str q0, [x0, #80]! +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_pre_store: ; CHECK-GISEL: ; %bb.0: @@ -1196,12 +1196,12 @@ define void @test_v8i16_pre_store(<8 x i16> %in, ptr %addr) { } define void @test_v8i16_post_store(<8 x i16> %in, ptr %addr) { -; CHECK-LABEL: test_v8i16_post_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str q0, [x0], #80 -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str q0, [x0], #80 +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_store: ; CHECK-GISEL: ; %bb.0: @@ -1217,12 +1217,12 @@ define void @test_v8i16_post_store(<8 x i16> %in, ptr %addr) { } define <4 x i32> @test_v4i32_pre_load(ptr %addr) { -; CHECK-LABEL: test_v4i32_pre_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr q0, [x0, #80]! -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_pre_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr q0, [x0, #80]! +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_pre_load: ; CHECK-GISEL: ; %bb.0: @@ -1238,12 +1238,12 @@ define <4 x i32> @test_v4i32_pre_load(ptr %addr) { } define <4 x i32> @test_v4i32_post_load(ptr %addr) { -; CHECK-LABEL: test_v4i32_post_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr q0, [x0], #80 -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr q0, [x0], #80 +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_load: ; CHECK-GISEL: ; %bb.0: @@ -1259,12 +1259,12 @@ define <4 x i32> @test_v4i32_post_load(ptr %addr) { } define void @test_v4i32_pre_store(<4 x i32> %in, ptr %addr) { -; CHECK-LABEL: test_v4i32_pre_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str q0, [x0, #80]! -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_pre_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str q0, [x0, #80]! +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_pre_store: ; CHECK-GISEL: ; %bb.0: @@ -1280,12 +1280,12 @@ define void @test_v4i32_pre_store(<4 x i32> %in, ptr %addr) { } define void @test_v4i32_post_store(<4 x i32> %in, ptr %addr) { -; CHECK-LABEL: test_v4i32_post_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str q0, [x0], #80 -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str q0, [x0], #80 +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_store: ; CHECK-GISEL: ; %bb.0: @@ -1302,12 +1302,12 @@ define void @test_v4i32_post_store(<4 x i32> %in, ptr %addr) { define <4 x float> @test_v4f32_pre_load(ptr %addr) { -; CHECK-LABEL: test_v4f32_pre_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr q0, [x0, #80]! -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_pre_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr q0, [x0, #80]! +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_pre_load: ; CHECK-GISEL: ; %bb.0: @@ -1323,12 +1323,12 @@ define <4 x float> @test_v4f32_pre_load(ptr %addr) { } define <4 x float> @test_v4f32_post_load(ptr %addr) { -; CHECK-LABEL: test_v4f32_post_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr q0, [x0], #80 -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr q0, [x0], #80 +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_load: ; CHECK-GISEL: ; %bb.0: @@ -1344,12 +1344,12 @@ define <4 x float> @test_v4f32_post_load(ptr %addr) { } define void @test_v4f32_pre_store(<4 x float> %in, ptr %addr) { -; CHECK-LABEL: test_v4f32_pre_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str q0, [x0, #80]! -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_pre_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str q0, [x0, #80]! +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_pre_store: ; CHECK-GISEL: ; %bb.0: @@ -1365,12 +1365,12 @@ define void @test_v4f32_pre_store(<4 x float> %in, ptr %addr) { } define void @test_v4f32_post_store(<4 x float> %in, ptr %addr) { -; CHECK-LABEL: test_v4f32_post_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str q0, [x0], #80 -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str q0, [x0], #80 +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_store: ; CHECK-GISEL: ; %bb.0: @@ -1387,12 +1387,12 @@ define void @test_v4f32_post_store(<4 x float> %in, ptr %addr) { define <2 x i64> @test_v2i64_pre_load(ptr %addr) { -; CHECK-LABEL: test_v2i64_pre_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr q0, [x0, #80]! -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_pre_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr q0, [x0, #80]! +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_pre_load: ; CHECK-GISEL: ; %bb.0: @@ -1408,12 +1408,12 @@ define <2 x i64> @test_v2i64_pre_load(ptr %addr) { } define <2 x i64> @test_v2i64_post_load(ptr %addr) { -; CHECK-LABEL: test_v2i64_post_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr q0, [x0], #80 -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr q0, [x0], #80 +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_load: ; CHECK-GISEL: ; %bb.0: @@ -1429,12 +1429,12 @@ define <2 x i64> @test_v2i64_post_load(ptr %addr) { } define void @test_v2i64_pre_store(<2 x i64> %in, ptr %addr) { -; CHECK-LABEL: test_v2i64_pre_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str q0, [x0, #80]! -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_pre_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str q0, [x0, #80]! +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_pre_store: ; CHECK-GISEL: ; %bb.0: @@ -1450,12 +1450,12 @@ define void @test_v2i64_pre_store(<2 x i64> %in, ptr %addr) { } define void @test_v2i64_post_store(<2 x i64> %in, ptr %addr) { -; CHECK-LABEL: test_v2i64_post_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str q0, [x0], #80 -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str q0, [x0], #80 +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_store: ; CHECK-GISEL: ; %bb.0: @@ -1472,12 +1472,12 @@ define void @test_v2i64_post_store(<2 x i64> %in, ptr %addr) { define <2 x double> @test_v2f64_pre_load(ptr %addr) { -; CHECK-LABEL: test_v2f64_pre_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr q0, [x0, #80]! -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_pre_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr q0, [x0, #80]! +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_pre_load: ; CHECK-GISEL: ; %bb.0: @@ -1493,12 +1493,12 @@ define <2 x double> @test_v2f64_pre_load(ptr %addr) { } define <2 x double> @test_v2f64_post_load(ptr %addr) { -; CHECK-LABEL: test_v2f64_post_load: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr q0, [x0], #80 -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr q0, [x0], #80 +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_load: ; CHECK-GISEL: ; %bb.0: @@ -1514,12 +1514,12 @@ define <2 x double> @test_v2f64_post_load(ptr %addr) { } define void @test_v2f64_pre_store(<2 x double> %in, ptr %addr) { -; CHECK-LABEL: test_v2f64_pre_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str q0, [x0, #80]! -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_pre_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str q0, [x0, #80]! +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_pre_store: ; CHECK-GISEL: ; %bb.0: @@ -1535,12 +1535,12 @@ define void @test_v2f64_pre_store(<2 x double> %in, ptr %addr) { } define void @test_v2f64_post_store(<2 x double> %in, ptr %addr) { -; CHECK-LABEL: test_v2f64_post_store: -; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, _ptr@PAGE -; CHECK-NEXT: str q0, [x0], #80 -; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_store: +; SDAG: ; %bb.0: +; SDAG-NEXT: adrp x8, _ptr@PAGE +; SDAG-NEXT: str q0, [x0], #80 +; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_store: ; CHECK-GISEL: ; %bb.0: @@ -1556,10 +1556,10 @@ define void @test_v2f64_post_store(<2 x double> %in, ptr %addr) { } define ptr @test_v16i8_post_imm_st1_lane(<16 x i8> %in, ptr %addr) { -; CHECK-LABEL: test_v16i8_post_imm_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: st1.b { v0 }[3], [x0], #1 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: st1.b { v0 }[3], [x0], #1 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1575,11 +1575,11 @@ define ptr @test_v16i8_post_imm_st1_lane(<16 x i8> %in, ptr %addr) { } define ptr @test_v16i8_post_reg_st1_lane(<16 x i8> %in, ptr %addr) { -; CHECK-LABEL: test_v16i8_post_reg_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #2 ; =0x2 -; CHECK-NEXT: st1.b { v0 }[3], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: mov w8, #2 ; =0x2 +; SDAG-NEXT: st1.b { v0 }[3], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1596,10 +1596,10 @@ define ptr @test_v16i8_post_reg_st1_lane(<16 x i8> %in, ptr %addr) { define ptr @test_v8i16_post_imm_st1_lane(<8 x i16> %in, ptr %addr) { -; CHECK-LABEL: test_v8i16_post_imm_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: st1.h { v0 }[3], [x0], #2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: st1.h { v0 }[3], [x0], #2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1615,11 +1615,11 @@ define ptr @test_v8i16_post_imm_st1_lane(<8 x i16> %in, ptr %addr) { } define ptr @test_v8i16_post_reg_st1_lane(<8 x i16> %in, ptr %addr) { -; CHECK-LABEL: test_v8i16_post_reg_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #4 ; =0x4 -; CHECK-NEXT: st1.h { v0 }[3], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: mov w8, #4 ; =0x4 +; SDAG-NEXT: st1.h { v0 }[3], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1635,10 +1635,10 @@ define ptr @test_v8i16_post_reg_st1_lane(<8 x i16> %in, ptr %addr) { } define ptr @test_v4i32_post_imm_st1_lane(<4 x i32> %in, ptr %addr) { -; CHECK-LABEL: test_v4i32_post_imm_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: st1.s { v0 }[3], [x0], #4 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: st1.s { v0 }[3], [x0], #4 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1654,11 +1654,11 @@ define ptr @test_v4i32_post_imm_st1_lane(<4 x i32> %in, ptr %addr) { } define ptr @test_v4i32_post_reg_st1_lane(<4 x i32> %in, ptr %addr) { -; CHECK-LABEL: test_v4i32_post_reg_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #8 ; =0x8 -; CHECK-NEXT: st1.s { v0 }[3], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: mov w8, #8 ; =0x8 +; SDAG-NEXT: st1.s { v0 }[3], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1674,10 +1674,10 @@ define ptr @test_v4i32_post_reg_st1_lane(<4 x i32> %in, ptr %addr) { } define ptr @test_v4f32_post_imm_st1_lane(<4 x float> %in, ptr %addr) { -; CHECK-LABEL: test_v4f32_post_imm_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: st1.s { v0 }[3], [x0], #4 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: st1.s { v0 }[3], [x0], #4 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1693,11 +1693,11 @@ define ptr @test_v4f32_post_imm_st1_lane(<4 x float> %in, ptr %addr) { } define ptr @test_v4f32_post_reg_st1_lane(<4 x float> %in, ptr %addr) { -; CHECK-LABEL: test_v4f32_post_reg_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #8 ; =0x8 -; CHECK-NEXT: st1.s { v0 }[3], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: mov w8, #8 ; =0x8 +; SDAG-NEXT: st1.s { v0 }[3], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1713,10 +1713,10 @@ define ptr @test_v4f32_post_reg_st1_lane(<4 x float> %in, ptr %addr) { } define ptr @test_v2i64_post_imm_st1_lane(<2 x i64> %in, ptr %addr) { -; CHECK-LABEL: test_v2i64_post_imm_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: st1.d { v0 }[1], [x0], #8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: st1.d { v0 }[1], [x0], #8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1732,11 +1732,11 @@ define ptr @test_v2i64_post_imm_st1_lane(<2 x i64> %in, ptr %addr) { } define ptr @test_v2i64_post_reg_st1_lane(<2 x i64> %in, ptr %addr) { -; CHECK-LABEL: test_v2i64_post_reg_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #16 ; =0x10 -; CHECK-NEXT: st1.d { v0 }[1], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: mov w8, #16 ; =0x10 +; SDAG-NEXT: st1.d { v0 }[1], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1752,10 +1752,10 @@ define ptr @test_v2i64_post_reg_st1_lane(<2 x i64> %in, ptr %addr) { } define ptr @test_v2f64_post_imm_st1_lane(<2 x double> %in, ptr %addr) { -; CHECK-LABEL: test_v2f64_post_imm_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: st1.d { v0 }[1], [x0], #8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: st1.d { v0 }[1], [x0], #8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1771,11 +1771,11 @@ define ptr @test_v2f64_post_imm_st1_lane(<2 x double> %in, ptr %addr) { } define ptr @test_v2f64_post_reg_st1_lane(<2 x double> %in, ptr %addr) { -; CHECK-LABEL: test_v2f64_post_reg_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #16 ; =0x10 -; CHECK-NEXT: st1.d { v0 }[1], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: mov w8, #16 ; =0x10 +; SDAG-NEXT: st1.d { v0 }[1], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1791,11 +1791,11 @@ define ptr @test_v2f64_post_reg_st1_lane(<2 x double> %in, ptr %addr) { } define ptr @test_v8i8_post_imm_st1_lane(<8 x i8> %in, ptr %addr) { -; CHECK-LABEL: test_v8i8_post_imm_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: st1.b { v0 }[3], [x0], #1 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d0 killed $d0 def $q0 +; SDAG-NEXT: st1.b { v0 }[3], [x0], #1 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1812,12 +1812,12 @@ define ptr @test_v8i8_post_imm_st1_lane(<8 x i8> %in, ptr %addr) { } define ptr @test_v8i8_post_reg_st1_lane(<8 x i8> %in, ptr %addr) { -; CHECK-LABEL: test_v8i8_post_reg_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #2 ; =0x2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: st1.b { v0 }[3], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: mov w8, #2 ; =0x2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 def $q0 +; SDAG-NEXT: st1.b { v0 }[3], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1834,11 +1834,11 @@ define ptr @test_v8i8_post_reg_st1_lane(<8 x i8> %in, ptr %addr) { } define ptr @test_v4i16_post_imm_st1_lane(<4 x i16> %in, ptr %addr) { -; CHECK-LABEL: test_v4i16_post_imm_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: st1.h { v0 }[3], [x0], #2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d0 killed $d0 def $q0 +; SDAG-NEXT: st1.h { v0 }[3], [x0], #2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1855,12 +1855,12 @@ define ptr @test_v4i16_post_imm_st1_lane(<4 x i16> %in, ptr %addr) { } define ptr @test_v4i16_post_reg_st1_lane(<4 x i16> %in, ptr %addr) { -; CHECK-LABEL: test_v4i16_post_reg_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #4 ; =0x4 -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: st1.h { v0 }[3], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: mov w8, #4 ; =0x4 +; SDAG-NEXT: ; kill: def $d0 killed $d0 def $q0 +; SDAG-NEXT: st1.h { v0 }[3], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1877,11 +1877,11 @@ define ptr @test_v4i16_post_reg_st1_lane(<4 x i16> %in, ptr %addr) { } define ptr @test_v2i32_post_imm_st1_lane(<2 x i32> %in, ptr %addr) { -; CHECK-LABEL: test_v2i32_post_imm_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: st1.s { v0 }[1], [x0], #4 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d0 killed $d0 def $q0 +; SDAG-NEXT: st1.s { v0 }[1], [x0], #4 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1898,12 +1898,12 @@ define ptr @test_v2i32_post_imm_st1_lane(<2 x i32> %in, ptr %addr) { } define ptr @test_v2i32_post_reg_st1_lane(<2 x i32> %in, ptr %addr) { -; CHECK-LABEL: test_v2i32_post_reg_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #8 ; =0x8 -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: st1.s { v0 }[1], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: mov w8, #8 ; =0x8 +; SDAG-NEXT: ; kill: def $d0 killed $d0 def $q0 +; SDAG-NEXT: st1.s { v0 }[1], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1920,11 +1920,11 @@ define ptr @test_v2i32_post_reg_st1_lane(<2 x i32> %in, ptr %addr) { } define ptr @test_v2f32_post_imm_st1_lane(<2 x float> %in, ptr %addr) { -; CHECK-LABEL: test_v2f32_post_imm_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: st1.s { v0 }[1], [x0], #4 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d0 killed $d0 def $q0 +; SDAG-NEXT: st1.s { v0 }[1], [x0], #4 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1941,12 +1941,12 @@ define ptr @test_v2f32_post_imm_st1_lane(<2 x float> %in, ptr %addr) { } define ptr @test_v2f32_post_reg_st1_lane(<2 x float> %in, ptr %addr) { -; CHECK-LABEL: test_v2f32_post_reg_st1_lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #8 ; =0x8 -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: st1.s { v0 }[1], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_st1_lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: mov w8, #8 ; =0x8 +; SDAG-NEXT: ; kill: def $d0 killed $d0 def $q0 +; SDAG-NEXT: st1.s { v0 }[1], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_st1_lane: ; CHECK-GISEL: ; %bb.0: @@ -1963,11 +1963,11 @@ define ptr @test_v2f32_post_reg_st1_lane(<2 x float> %in, ptr %addr) { } define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v16i8_post_imm_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2.16b { v0, v1 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2.16b { v0, v1 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld2: ; CHECK-GISEL: ; %bb.0: @@ -1982,11 +1982,11 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(ptr %A, ptr %ptr) { } define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v16i8_post_reg_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2.16b { v0, v1 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2.16b { v0, v1 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2004,11 +2004,11 @@ declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr) define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v8i8_post_imm_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2.8b { v0, v1 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2.8b { v0, v1 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2023,11 +2023,11 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(ptr %A, ptr %ptr) { } define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v8i8_post_reg_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2.8b { v0, v1 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2.8b { v0, v1 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2045,11 +2045,11 @@ declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr) define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v8i16_post_imm_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2.8h { v0, v1 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2.8h { v0, v1 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2064,12 +2064,12 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(ptr %A, ptr %ptr) { } define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v8i16_post_reg_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld2.8h { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld2.8h { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2087,11 +2087,11 @@ declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr) define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v4i16_post_imm_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2.4h { v0, v1 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2.4h { v0, v1 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2106,12 +2106,12 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(ptr %A, ptr %ptr) { } define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4i16_post_reg_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld2.4h { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld2.4h { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2129,11 +2129,11 @@ declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr) define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v4i32_post_imm_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2.4s { v0, v1 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2.4s { v0, v1 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2148,12 +2148,12 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(ptr %A, ptr %ptr) { } define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4i32_post_reg_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld2.4s { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld2.4s { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2171,11 +2171,11 @@ declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr) define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2i32_post_imm_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2.2s { v0, v1 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2.2s { v0, v1 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2190,12 +2190,12 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(ptr %A, ptr %ptr) { } define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2i32_post_reg_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld2.2s { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld2.2s { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2213,11 +2213,11 @@ declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr) define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2i64_post_imm_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2.2d { v0, v1 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2.2d { v0, v1 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2232,12 +2232,12 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(ptr %A, ptr %ptr) { } define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2i64_post_reg_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld2.2d { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld2.2d { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2255,11 +2255,11 @@ declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr) define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v1i64_post_imm_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.1d { v0, v1 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.1d { v0, v1 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2274,12 +2274,12 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(ptr %A, ptr %ptr) { } define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v1i64_post_reg_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.1d { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.1d { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2297,11 +2297,11 @@ declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr) define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v4f32_post_imm_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2.4s { v0, v1 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2.4s { v0, v1 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2316,12 +2316,12 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(ptr %A, ptr %ptr) { } define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4f32_post_reg_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld2.4s { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld2.4s { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2339,11 +2339,11 @@ declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr) define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2f32_post_imm_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2.2s { v0, v1 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2.2s { v0, v1 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2358,12 +2358,12 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(ptr %A, ptr %ptr) { } define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2f32_post_reg_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld2.2s { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld2.2s { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2381,11 +2381,11 @@ declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0(ptr) define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2f64_post_imm_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2.2d { v0, v1 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2.2d { v0, v1 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2400,12 +2400,12 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(ptr %A, ptr %ptr) } define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2f64_post_reg_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld2.2d { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld2.2d { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2423,11 +2423,11 @@ declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0(ptr) define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v1f64_post_imm_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.1d { v0, v1 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.1d { v0, v1 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2442,12 +2442,12 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(ptr %A, ptr %ptr) } define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v1f64_post_reg_ld2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.1d { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_ld2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.1d { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_ld2: ; CHECK-GISEL: ; %bb.0: @@ -2465,11 +2465,11 @@ declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0(ptr) define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v16i8_post_imm_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3.16b { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3.16b { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2484,11 +2484,11 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(ptr %A, ptr } define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v16i8_post_reg_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3.16b { v0, v1, v2 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3.16b { v0, v1, v2 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2506,11 +2506,11 @@ declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr) define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v8i8_post_imm_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3.8b { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3.8b { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2525,11 +2525,11 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(ptr %A, ptr %ptr } define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v8i8_post_reg_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3.8b { v0, v1, v2 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3.8b { v0, v1, v2 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2547,11 +2547,11 @@ declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr) define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v8i16_post_imm_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3.8h { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3.8h { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2566,12 +2566,12 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(ptr %A, ptr } define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v8i16_post_reg_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld3.8h { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld3.8h { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2589,11 +2589,11 @@ declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr) define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v4i16_post_imm_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3.4h { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3.4h { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2608,12 +2608,12 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(ptr %A, ptr } define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4i16_post_reg_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld3.4h { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld3.4h { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2631,11 +2631,11 @@ declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr) define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v4i32_post_imm_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3.4s { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3.4s { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2650,12 +2650,12 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(ptr %A, ptr } define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4i32_post_reg_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld3.4s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld3.4s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2673,11 +2673,11 @@ declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr) define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2i32_post_imm_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3.2s { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3.2s { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2692,12 +2692,12 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(ptr %A, ptr } define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2i32_post_reg_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld3.2s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld3.2s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2715,11 +2715,11 @@ declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr) define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2i64_post_imm_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3.2d { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3.2d { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2734,12 +2734,12 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(ptr %A, ptr } define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2i64_post_reg_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld3.2d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld3.2d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2757,11 +2757,11 @@ declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr) define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v1i64_post_imm_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.1d { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2776,12 +2776,12 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(ptr %A, ptr } define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v1i64_post_reg_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.1d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2799,11 +2799,11 @@ declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr) define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v4f32_post_imm_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3.4s { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3.4s { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2818,12 +2818,12 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(ptr %A } define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4f32_post_reg_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld3.4s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld3.4s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2841,11 +2841,11 @@ declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2f32_post_imm_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3.2s { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3.2s { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2860,12 +2860,12 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(ptr %A } define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2f32_post_reg_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld3.2s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld3.2s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2883,11 +2883,11 @@ declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2f64_post_imm_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3.2d { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3.2d { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2902,12 +2902,12 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(ptr } define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2f64_post_reg_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld3.2d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld3.2d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2925,11 +2925,11 @@ declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f6 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v1f64_post_imm_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.1d { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2944,12 +2944,12 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(ptr } define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v1f64_post_reg_ld3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_ld3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.1d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_ld3: ; CHECK-GISEL: ; %bb.0: @@ -2967,11 +2967,11 @@ declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f6 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v16i8_post_imm_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4.16b { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4.16b { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld4: ; CHECK-GISEL: ; %bb.0: @@ -2986,11 +2986,11 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(p } define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v16i8_post_reg_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4.16b { v0, v1, v2, v3 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4.16b { v0, v1, v2, v3 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3008,11 +3008,11 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v1 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v8i8_post_imm_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4.8b { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4.8b { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3027,11 +3027,11 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(ptr %A } define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v8i8_post_reg_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4.8b { v0, v1, v2, v3 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4.8b { v0, v1, v2, v3 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3049,11 +3049,11 @@ declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v8i16_post_imm_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4.8h { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4.8h { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3068,12 +3068,12 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(p } define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v8i16_post_reg_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld4.8h { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld4.8h { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3091,11 +3091,11 @@ declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v4i16_post_imm_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4.4h { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4.4h { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3110,12 +3110,12 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(p } define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4i16_post_reg_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld4.4h { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld4.4h { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3133,11 +3133,11 @@ declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v4i32_post_imm_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4.4s { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4.4s { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3152,12 +3152,12 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(p } define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4i32_post_reg_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld4.4s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld4.4s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3175,11 +3175,11 @@ declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2i32_post_imm_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4.2s { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4.2s { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3194,12 +3194,12 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(p } define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2i32_post_reg_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld4.2s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld4.2s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3217,11 +3217,11 @@ declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2i64_post_imm_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4.2d { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4.2d { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3236,12 +3236,12 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(p } define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2i64_post_reg_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld4.2d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld4.2d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3259,11 +3259,11 @@ declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v1i64_post_imm_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3278,12 +3278,12 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(p } define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v1i64_post_reg_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3301,11 +3301,11 @@ declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v4f32_post_imm_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4.4s { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4.4s { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3320,12 +3320,12 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i } define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4f32_post_reg_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld4.4s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld4.4s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3343,11 +3343,11 @@ declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neo define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2f32_post_imm_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4.2s { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4.2s { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3362,12 +3362,12 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i } define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2f32_post_reg_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld4.2s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld4.2s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3385,11 +3385,11 @@ declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neo define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2f64_post_imm_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4.2d { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4.2d { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3404,12 +3404,12 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po } define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2f64_post_reg_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld4.2d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld4.2d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3427,11 +3427,11 @@ declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v1f64_post_imm_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3446,12 +3446,12 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po } define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v1f64_post_reg_ld4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_ld4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_ld4: ; CHECK-GISEL: ; %bb.0: @@ -3468,11 +3468,11 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0(ptr) define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v16i8_post_imm_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.16b { v0, v1 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.16b { v0, v1 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3487,11 +3487,11 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(ptr %A, ptr %ptr) { } define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v16i8_post_reg_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.16b { v0, v1 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.16b { v0, v1 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3509,11 +3509,11 @@ declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr) define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v8i8_post_imm_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.8b { v0, v1 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.8b { v0, v1 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3528,11 +3528,11 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(ptr %A, ptr %ptr) { } define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v8i8_post_reg_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.8b { v0, v1 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.8b { v0, v1 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3550,11 +3550,11 @@ declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr) define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v8i16_post_imm_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.8h { v0, v1 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.8h { v0, v1 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3569,12 +3569,12 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(ptr %A, ptr %ptr) { } define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v8i16_post_reg_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld1.8h { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld1.8h { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3592,11 +3592,11 @@ declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0(ptr) define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v4i16_post_imm_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.4h { v0, v1 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.4h { v0, v1 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3611,12 +3611,12 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(ptr %A, ptr %ptr) { } define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4i16_post_reg_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld1.4h { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld1.4h { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3634,11 +3634,11 @@ declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0(ptr) define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v4i32_post_imm_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.4s { v0, v1 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.4s { v0, v1 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3653,12 +3653,12 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(ptr %A, ptr %ptr) { } define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4i32_post_reg_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld1.4s { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld1.4s { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3676,11 +3676,11 @@ declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0(ptr) define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2i32_post_imm_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.2s { v0, v1 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.2s { v0, v1 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3695,12 +3695,12 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(ptr %A, ptr %ptr) { } define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2i32_post_reg_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld1.2s { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld1.2s { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3718,11 +3718,11 @@ declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0(ptr) define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2i64_post_imm_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.2d { v0, v1 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.2d { v0, v1 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3737,12 +3737,12 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(ptr %A, ptr %ptr) { } define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2i64_post_reg_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.2d { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.2d { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3760,11 +3760,11 @@ declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr) define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v1i64_post_imm_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.1d { v0, v1 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.1d { v0, v1 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3779,12 +3779,12 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(ptr %A, ptr %ptr) { } define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v1i64_post_reg_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.1d { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.1d { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3802,11 +3802,11 @@ declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr) define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v4f32_post_imm_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.4s { v0, v1 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.4s { v0, v1 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3821,12 +3821,12 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(ptr %A, ptr %ptr) } define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4f32_post_reg_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld1.4s { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld1.4s { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3844,11 +3844,11 @@ declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0(ptr) define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2f32_post_imm_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.2s { v0, v1 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.2s { v0, v1 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3863,12 +3863,12 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(ptr %A, ptr %ptr) } define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2f32_post_reg_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld1.2s { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld1.2s { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3886,11 +3886,11 @@ declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0(ptr) define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2f64_post_imm_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.2d { v0, v1 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.2d { v0, v1 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3905,12 +3905,12 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(ptr %A, ptr %pt } define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2f64_post_reg_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.2d { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.2d { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3928,11 +3928,11 @@ declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr) define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v1f64_post_imm_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.1d { v0, v1 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.1d { v0, v1 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3947,12 +3947,12 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(ptr %A, ptr %pt } define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v1f64_post_reg_ld1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.1d { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_ld1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.1d { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_ld1x2: ; CHECK-GISEL: ; %bb.0: @@ -3970,11 +3970,11 @@ declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr) define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v16i8_post_imm_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.16b { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.16b { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -3989,11 +3989,11 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(ptr %A, pt } define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v16i8_post_reg_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.16b { v0, v1, v2 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.16b { v0, v1, v2 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4011,11 +4011,11 @@ declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0(pt define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v8i8_post_imm_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.8b { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.8b { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4030,11 +4030,11 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(ptr %A, ptr %p } define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v8i8_post_reg_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.8b { v0, v1, v2 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.8b { v0, v1, v2 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4052,11 +4052,11 @@ declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr) define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v8i16_post_imm_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.8h { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.8h { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4071,12 +4071,12 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(ptr %A, pt } define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v8i16_post_reg_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld1.8h { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld1.8h { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4094,11 +4094,11 @@ declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0(pt define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v4i16_post_imm_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.4h { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.4h { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4113,12 +4113,12 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(ptr %A, pt } define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4i16_post_reg_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld1.4h { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld1.4h { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4136,11 +4136,11 @@ declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0(pt define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v4i32_post_imm_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.4s { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.4s { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4155,12 +4155,12 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(ptr %A, pt } define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4i32_post_reg_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld1.4s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld1.4s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4178,11 +4178,11 @@ declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0(pt define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2i32_post_imm_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.2s { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.2s { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4197,12 +4197,12 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(ptr %A, pt } define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2i32_post_reg_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld1.2s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld1.2s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4220,11 +4220,11 @@ declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0(pt define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2i64_post_imm_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.2d { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.2d { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4239,12 +4239,12 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(ptr %A, pt } define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2i64_post_reg_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.2d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.2d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4262,11 +4262,11 @@ declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0(pt define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v1i64_post_imm_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.1d { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4281,12 +4281,12 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(ptr %A, pt } define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v1i64_post_reg_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.1d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4304,11 +4304,11 @@ declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0(pt define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v4f32_post_imm_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.4s { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.4s { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4323,12 +4323,12 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(ptr } define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4f32_post_reg_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld1.4s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld1.4s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4346,11 +4346,11 @@ declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2f32_post_imm_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.2s { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.2s { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4365,12 +4365,12 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(ptr } define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2f32_post_reg_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld1.2s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld1.2s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4388,11 +4388,11 @@ declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2f64_post_imm_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.2d { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.2d { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4407,12 +4407,12 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(p } define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2f64_post_reg_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.2d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.2d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4430,11 +4430,11 @@ declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v1f64_post_imm_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.1d { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4449,12 +4449,12 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(p } define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v1f64_post_reg_ld1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_ld1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.1d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_ld1x3: ; CHECK-GISEL: ; %bb.0: @@ -4472,11 +4472,11 @@ declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v16i8_post_imm_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.16b { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.16b { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4491,11 +4491,11 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4 } define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v16i8_post_reg_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.16b { v0, v1, v2, v3 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.16b { v0, v1, v2, v3 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4513,11 +4513,11 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4. define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v8i8_post_imm_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.8b { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.8b { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4532,11 +4532,11 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(ptr } define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v8i8_post_reg_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.8b { v0, v1, v2, v3 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.8b { v0, v1, v2, v3 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4554,11 +4554,11 @@ declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v8i16_post_imm_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.8h { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.8h { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4573,12 +4573,12 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4 } define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v8i16_post_reg_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld1.8h { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld1.8h { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4596,11 +4596,11 @@ declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4. define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v4i16_post_imm_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.4h { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.4h { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4615,12 +4615,12 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4 } define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4i16_post_reg_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld1.4h { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld1.4h { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4638,11 +4638,11 @@ declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4. define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v4i32_post_imm_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.4s { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.4s { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4657,12 +4657,12 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4 } define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4i32_post_reg_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld1.4s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld1.4s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4680,11 +4680,11 @@ declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4. define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2i32_post_imm_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.2s { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.2s { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4699,12 +4699,12 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4 } define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2i32_post_reg_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld1.2s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld1.2s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4722,11 +4722,11 @@ declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4. define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2i64_post_imm_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.2d { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.2d { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4741,12 +4741,12 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4 } define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2i64_post_reg_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.2d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.2d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4764,11 +4764,11 @@ declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4. define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v1i64_post_imm_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4783,12 +4783,12 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4 } define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v1i64_post_reg_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4806,11 +4806,11 @@ declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4. define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v4f32_post_imm_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.4s { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.4s { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4825,12 +4825,12 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i } define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4f32_post_reg_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld1.4s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld1.4s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4848,11 +4848,11 @@ declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neo define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2f32_post_imm_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.2s { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.2s { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4867,12 +4867,12 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i } define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2f32_post_reg_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld1.2s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld1.2s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4890,11 +4890,11 @@ declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neo define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v2f64_post_imm_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.2d { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.2d { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4909,12 +4909,12 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po } define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2f64_post_reg_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.2d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.2d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4932,11 +4932,11 @@ declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x4(ptr %A, ptr %ptr) { -; CHECK-LABEL: test_v1f64_post_imm_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4951,12 +4951,12 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po } define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v1f64_post_reg_ld1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_ld1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_ld1x4: ; CHECK-GISEL: ; %bb.0: @@ -4974,11 +4974,11 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v16i8_post_imm_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2r.16b { v0, v1 }, [x0], #2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2r.16b { v0, v1 }, [x0], #2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -4993,11 +4993,11 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(ptr %A, ptr %ptr) noun } define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v16i8_post_reg_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2r.16b { v0, v1 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2r.16b { v0, v1 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5015,11 +5015,11 @@ declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0(ptr) nounwind define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v8i8_post_imm_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2r.8b { v0, v1 }, [x0], #2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2r.8b { v0, v1 }, [x0], #2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5034,11 +5034,11 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(ptr %A, ptr %ptr) nounwin } define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i8_post_reg_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2r.8b { v0, v1 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2r.8b { v0, v1 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5056,11 +5056,11 @@ declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0(ptr) nounwind rea define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v8i16_post_imm_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2r.8h { v0, v1 }, [x0], #4 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2r.8h { v0, v1 }, [x0], #4 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5075,12 +5075,12 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(ptr %A, ptr %ptr) noun } define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i16_post_reg_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld2r.8h { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld2r.8h { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5098,11 +5098,11 @@ declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0(ptr) nounwind define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v4i16_post_imm_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2r.4h { v0, v1 }, [x0], #4 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2r.4h { v0, v1 }, [x0], #4 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5117,12 +5117,12 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(ptr %A, ptr %ptr) noun } define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i16_post_reg_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld2r.4h { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld2r.4h { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5140,11 +5140,11 @@ declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0(ptr) nounwind define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v4i32_post_imm_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2r.4s { v0, v1 }, [x0], #8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2r.4s { v0, v1 }, [x0], #8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5159,12 +5159,12 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(ptr %A, ptr %ptr) noun } define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i32_post_reg_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld2r.4s { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld2r.4s { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5181,11 +5181,11 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2r(ptr %A, ptr %ptr, i64 declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0(ptr) nounwind readonly define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v2i32_post_imm_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2r.2s { v0, v1 }, [x0], #8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2r.2s { v0, v1 }, [x0], #8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5200,12 +5200,12 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(ptr %A, ptr %ptr) noun } define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i32_post_reg_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld2r.2s { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld2r.2s { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5223,11 +5223,11 @@ declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0(ptr) nounwind define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v2i64_post_imm_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2r.2d { v0, v1 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2r.2d { v0, v1 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5242,12 +5242,12 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(ptr %A, ptr %ptr) noun } define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i64_post_reg_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld2r.2d { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld2r.2d { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5264,11 +5264,11 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2r(ptr %A, ptr %ptr, i64 declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0(ptr) nounwind readonly define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v1i64_post_imm_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2r.1d { v0, v1 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2r.1d { v0, v1 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5283,12 +5283,12 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(ptr %A, ptr %ptr) noun } define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v1i64_post_reg_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld2r.1d { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld2r.1d { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5306,11 +5306,11 @@ declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0(ptr) nounwind define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v4f32_post_imm_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2r.4s { v0, v1 }, [x0], #8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2r.4s { v0, v1 }, [x0], #8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5325,12 +5325,12 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(ptr %A, ptr %ptr) } define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v4f32_post_reg_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld2r.4s { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld2r.4s { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5347,11 +5347,11 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2r(ptr %A, ptr %ptr, declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0(ptr) nounwind readonly define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v2f32_post_imm_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2r.2s { v0, v1 }, [x0], #8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2r.2s { v0, v1 }, [x0], #8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5366,12 +5366,12 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(ptr %A, ptr %ptr) } define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f32_post_reg_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld2r.2s { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld2r.2s { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5389,11 +5389,11 @@ declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0(ptr) nounw define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v2f64_post_imm_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2r.2d { v0, v1 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2r.2d { v0, v1 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5408,12 +5408,12 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(ptr %A, ptr %ptr } define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f64_post_reg_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld2r.2d { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld2r.2d { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5430,11 +5430,11 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2r(ptr %A, ptr %ptr declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0(ptr) nounwind readonly define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v1f64_post_imm_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld2r.1d { v0, v1 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld2r.1d { v0, v1 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5449,12 +5449,12 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(ptr %A, ptr %ptr } define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v1f64_post_reg_ld2r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld2r.1d { v0, v1 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_ld2r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld2r.1d { v0, v1 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_ld2r: ; CHECK-GISEL: ; %bb.0: @@ -5472,11 +5472,11 @@ declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0(ptr) nou define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v16i8_post_imm_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3r.16b { v0, v1, v2 }, [x0], #3 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3r.16b { v0, v1, v2 }, [x0], #3 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5491,11 +5491,11 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(ptr %A, ptr } define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v16i8_post_reg_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3r.16b { v0, v1, v2 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3r.16b { v0, v1, v2 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5513,11 +5513,11 @@ declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0(ptr define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v8i8_post_imm_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3r.8b { v0, v1, v2 }, [x0], #3 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3r.8b { v0, v1, v2 }, [x0], #3 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5532,11 +5532,11 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(ptr %A, ptr %pt } define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i8_post_reg_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3r.8b { v0, v1, v2 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3r.8b { v0, v1, v2 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5554,11 +5554,11 @@ declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0(ptr) no define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v8i16_post_imm_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3r.8h { v0, v1, v2 }, [x0], #6 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3r.8h { v0, v1, v2 }, [x0], #6 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5573,12 +5573,12 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(ptr %A, ptr } define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i16_post_reg_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld3r.8h { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld3r.8h { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5596,11 +5596,11 @@ declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0(ptr define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v4i16_post_imm_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3r.4h { v0, v1, v2 }, [x0], #6 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3r.4h { v0, v1, v2 }, [x0], #6 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5615,12 +5615,12 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(ptr %A, ptr } define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i16_post_reg_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld3r.4h { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld3r.4h { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5638,11 +5638,11 @@ declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0(ptr define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v4i32_post_imm_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3r.4s { v0, v1, v2 }, [x0], #12 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3r.4s { v0, v1, v2 }, [x0], #12 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5657,12 +5657,12 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(ptr %A, ptr } define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i32_post_reg_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld3r.4s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld3r.4s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5679,11 +5679,11 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3r(ptr %A, ptr declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0(ptr) nounwind readonly define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v2i32_post_imm_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3r.2s { v0, v1, v2 }, [x0], #12 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3r.2s { v0, v1, v2 }, [x0], #12 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5698,12 +5698,12 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(ptr %A, ptr } define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i32_post_reg_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld3r.2s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld3r.2s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5721,11 +5721,11 @@ declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0(ptr define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v2i64_post_imm_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3r.2d { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3r.2d { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5740,12 +5740,12 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(ptr %A, ptr } define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i64_post_reg_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld3r.2d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld3r.2d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5762,11 +5762,11 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3r(ptr %A, ptr declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0(ptr) nounwind readonly define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v1i64_post_imm_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3r.1d { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3r.1d { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5781,12 +5781,12 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(ptr %A, ptr } define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v1i64_post_reg_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld3r.1d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld3r.1d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5804,11 +5804,11 @@ declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0(ptr define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v4f32_post_imm_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3r.4s { v0, v1, v2 }, [x0], #12 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3r.4s { v0, v1, v2 }, [x0], #12 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5823,12 +5823,12 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(ptr % } define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v4f32_post_reg_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld3r.4s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld3r.4s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5845,11 +5845,11 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3r(ptr % declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0(ptr) nounwind readonly define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v2f32_post_imm_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3r.2s { v0, v1, v2 }, [x0], #12 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3r.2s { v0, v1, v2 }, [x0], #12 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5864,12 +5864,12 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(ptr % } define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f32_post_reg_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld3r.2s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld3r.2s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5887,11 +5887,11 @@ declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32. define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v2f64_post_imm_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3r.2d { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3r.2d { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5906,12 +5906,12 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(pt } define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f64_post_reg_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld3r.2d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld3r.2d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5928,11 +5928,11 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3r(pt declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0(ptr) nounwind readonly define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v1f64_post_imm_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld3r.1d { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld3r.1d { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5947,12 +5947,12 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(pt } define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v1f64_post_reg_ld3r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld3r.1d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_ld3r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld3r.1d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_ld3r: ; CHECK-GISEL: ; %bb.0: @@ -5970,11 +5970,11 @@ declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v16i8_post_imm_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4r.16b { v0, v1, v2, v3 }, [x0], #4 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4r.16b { v0, v1, v2, v3 }, [x0], #4 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -5989,11 +5989,11 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r( } define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v16i8_post_reg_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4r.16b { v0, v1, v2, v3 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4r.16b { v0, v1, v2, v3 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6011,11 +6011,11 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v8i8_post_imm_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4r.8b { v0, v1, v2, v3 }, [x0], #4 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4r.8b { v0, v1, v2, v3 }, [x0], #4 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6030,11 +6030,11 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(ptr % } define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i8_post_reg_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4r.8b { v0, v1, v2, v3 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4r.8b { v0, v1, v2, v3 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6052,11 +6052,11 @@ declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8. define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v8i16_post_imm_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4r.8h { v0, v1, v2, v3 }, [x0], #8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4r.8h { v0, v1, v2, v3 }, [x0], #8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6071,12 +6071,12 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r( } define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i16_post_reg_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld4r.8h { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld4r.8h { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6094,11 +6094,11 @@ declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v4i16_post_imm_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4r.4h { v0, v1, v2, v3 }, [x0], #8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4r.4h { v0, v1, v2, v3 }, [x0], #8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6113,12 +6113,12 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r( } define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i16_post_reg_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld4r.4h { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld4r.4h { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6136,11 +6136,11 @@ declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v4i32_post_imm_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4r.4s { v0, v1, v2, v3 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4r.4s { v0, v1, v2, v3 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6155,12 +6155,12 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r( } define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i32_post_reg_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld4r.4s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld4r.4s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6177,11 +6177,11 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4r( declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0(ptr) nounwind readonly define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v2i32_post_imm_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4r.2s { v0, v1, v2, v3 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4r.2s { v0, v1, v2, v3 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6196,12 +6196,12 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r( } define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i32_post_reg_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld4r.2s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld4r.2s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6219,11 +6219,11 @@ declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v2i64_post_imm_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4r.2d { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4r.2d { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6238,12 +6238,12 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r( } define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i64_post_reg_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld4r.2d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld4r.2d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6260,11 +6260,11 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4r( declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0(ptr) nounwind readonly define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v1i64_post_imm_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4r.1d { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4r.1d { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6279,12 +6279,12 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r( } define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v1i64_post_reg_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld4r.1d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld4r.1d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6302,11 +6302,11 @@ declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v4f32_post_imm_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4r.4s { v0, v1, v2, v3 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4r.4s { v0, v1, v2, v3 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6321,12 +6321,12 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i } define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v4f32_post_reg_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld4r.4s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld4r.4s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6343,11 +6343,11 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_r declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0(ptr) nounwind readonly define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v2f32_post_imm_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4r.2s { v0, v1, v2, v3 }, [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4r.2s { v0, v1, v2, v3 }, [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6362,12 +6362,12 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i } define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f32_post_reg_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld4r.2s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld4r.2s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6385,11 +6385,11 @@ declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neo define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v2f64_post_imm_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4r.2d { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4r.2d { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6404,12 +6404,12 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po } define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f64_post_reg_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld4r.2d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld4r.2d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6426,11 +6426,11 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0(ptr) nounwind readonly define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4r(ptr %A, ptr %ptr) nounwind { -; CHECK-LABEL: test_v1f64_post_imm_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld4r.1d { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld4r.1d { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6445,12 +6445,12 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po } define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind { -; CHECK-LABEL: test_v1f64_post_reg_ld4r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld4r.1d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_ld4r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld4r.1d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_ld4r: ; CHECK-GISEL: ; %bb.0: @@ -6468,13 +6468,13 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { -; CHECK-LABEL: test_v16i8_post_imm_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.b { v0, v1 }[0], [x0], #2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.b { v0, v1 }[0], [x0], #2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6491,13 +6491,13 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(ptr %A, ptr %ptr, < } define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind { -; CHECK-LABEL: test_v16i8_post_reg_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.b { v0, v1 }[0], [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.b { v0, v1 }[0], [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6517,13 +6517,13 @@ declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8>, define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { -; CHECK-LABEL: test_v8i8_post_imm_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.b { v0, v1 }[0], [x0], #2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.b { v0, v1 }[0], [x0], #2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6540,13 +6540,13 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x } define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind { -; CHECK-LABEL: test_v8i8_post_reg_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.b { v0, v1 }[0], [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.b { v0, v1 }[0], [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6566,13 +6566,13 @@ declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8>, <8 x define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { -; CHECK-LABEL: test_v8i16_post_imm_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.h { v0, v1 }[0], [x0], #4 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.h { v0, v1 }[0], [x0], #4 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6589,14 +6589,14 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(ptr %A, ptr %ptr, < } define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind { -; CHECK-LABEL: test_v8i16_post_reg_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.h { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.h { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6616,13 +6616,13 @@ declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16>, define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { -; CHECK-LABEL: test_v4i16_post_imm_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.h { v0, v1 }[0], [x0], #4 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.h { v0, v1 }[0], [x0], #4 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6639,14 +6639,14 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(ptr %A, ptr %ptr, < } define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind { -; CHECK-LABEL: test_v4i16_post_reg_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.h { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.h { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6666,13 +6666,13 @@ declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16>, define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { -; CHECK-LABEL: test_v4i32_post_imm_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], #8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.s { v0, v1 }[0], [x0], #8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6689,14 +6689,14 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(ptr %A, ptr %ptr, < } define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind { -; CHECK-LABEL: test_v4i32_post_reg_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.s { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6716,13 +6716,13 @@ declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32>, define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { -; CHECK-LABEL: test_v2i32_post_imm_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], #8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.s { v0, v1 }[0], [x0], #8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6739,14 +6739,14 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(ptr %A, ptr %ptr, < } define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind { -; CHECK-LABEL: test_v2i32_post_reg_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.s { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6766,13 +6766,13 @@ declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32>, define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { -; CHECK-LABEL: test_v2i64_post_imm_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.d { v0, v1 }[0], [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6789,14 +6789,14 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(ptr %A, ptr %ptr, < } define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind { -; CHECK-LABEL: test_v2i64_post_reg_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.d { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6816,13 +6816,13 @@ declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64>, define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { -; CHECK-LABEL: test_v1i64_post_imm_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.d { v0, v1 }[0], [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6838,15 +6838,15 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(ptr %A, ptr %ptr, < ret { <1 x i64>, <1 x i64> } %ld2 } -define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind { -; CHECK-LABEL: test_v1i64_post_reg_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind { +; SDAG-LABEL: test_v1i64_post_reg_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.d { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6866,13 +6866,13 @@ declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64>, define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind { -; CHECK-LABEL: test_v4f32_post_imm_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], #8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.s { v0, v1 }[0], [x0], #8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6889,14 +6889,14 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(ptr %A, ptr %pt } define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind { -; CHECK-LABEL: test_v4f32_post_reg_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.s { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6916,13 +6916,13 @@ declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x fl define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind { -; CHECK-LABEL: test_v2f32_post_imm_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], #8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.s { v0, v1 }[0], [x0], #8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6939,14 +6939,14 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(ptr %A, ptr %pt } define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind { -; CHECK-LABEL: test_v2f32_post_reg_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.s { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6966,13 +6966,13 @@ declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x fl define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind { -; CHECK-LABEL: test_v2f64_post_imm_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.d { v0, v1 }[0], [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -6989,14 +6989,14 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(ptr %A, ptr % } define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind { -; CHECK-LABEL: test_v2f64_post_reg_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.d { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -7016,13 +7016,13 @@ declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind { -; CHECK-LABEL: test_v1f64_post_imm_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.d { v0, v1 }[0], [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -7039,14 +7039,14 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(ptr %A, ptr % } define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind { -; CHECK-LABEL: test_v1f64_post_reg_ld2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_ld2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ld2.d { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_ld2lane: ; CHECK-GISEL: ; %bb.0: @@ -7066,14 +7066,14 @@ declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { -; CHECK-LABEL: test_v16i8_post_imm_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.b { v0, v1, v2 }[0], [x0], #3 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.b { v0, v1, v2 }[0], [x0], #3 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7091,14 +7091,14 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(ptr %A, } define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { -; CHECK-LABEL: test_v16i8_post_reg_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.b { v0, v1, v2 }[0], [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.b { v0, v1, v2 }[0], [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7119,14 +7119,14 @@ declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0( define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { -; CHECK-LABEL: test_v8i8_post_imm_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.b { v0, v1, v2 }[0], [x0], #3 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.b { v0, v1, v2 }[0], [x0], #3 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7144,14 +7144,14 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(ptr %A, ptr } define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { -; CHECK-LABEL: test_v8i8_post_reg_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.b { v0, v1, v2 }[0], [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.b { v0, v1, v2 }[0], [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7172,14 +7172,14 @@ declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { -; CHECK-LABEL: test_v8i16_post_imm_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.h { v0, v1, v2 }[0], [x0], #6 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.h { v0, v1, v2 }[0], [x0], #6 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7197,15 +7197,15 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(ptr %A, } define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { -; CHECK-LABEL: test_v8i16_post_reg_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.h { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.h { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7226,14 +7226,14 @@ declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0( define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { -; CHECK-LABEL: test_v4i16_post_imm_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.h { v0, v1, v2 }[0], [x0], #6 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.h { v0, v1, v2 }[0], [x0], #6 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7251,15 +7251,15 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(ptr %A, } define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { -; CHECK-LABEL: test_v4i16_post_reg_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.h { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.h { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7280,14 +7280,14 @@ declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0( define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { -; CHECK-LABEL: test_v4i32_post_imm_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7305,15 +7305,15 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(ptr %A, } define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { -; CHECK-LABEL: test_v4i32_post_reg_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7334,14 +7334,14 @@ declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0( define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { -; CHECK-LABEL: test_v2i32_post_imm_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7359,15 +7359,15 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(ptr %A, } define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { -; CHECK-LABEL: test_v2i32_post_reg_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7388,14 +7388,14 @@ declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0( define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { -; CHECK-LABEL: test_v2i64_post_imm_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7413,15 +7413,15 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(ptr %A, } define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { -; CHECK-LABEL: test_v2i64_post_reg_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7442,14 +7442,14 @@ declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0( define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { -; CHECK-LABEL: test_v1i64_post_imm_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7467,15 +7467,15 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(ptr %A, } define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { -; CHECK-LABEL: test_v1i64_post_reg_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7496,14 +7496,14 @@ declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0( define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { -; CHECK-LABEL: test_v4f32_post_imm_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7521,15 +7521,15 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(pt } define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { -; CHECK-LABEL: test_v4f32_post_reg_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7550,14 +7550,14 @@ declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { -; CHECK-LABEL: test_v2f32_post_imm_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7575,15 +7575,15 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(pt } define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { -; CHECK-LABEL: test_v2f32_post_reg_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7604,14 +7604,14 @@ declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { -; CHECK-LABEL: test_v2f64_post_imm_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7629,15 +7629,15 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane } define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { -; CHECK-LABEL: test_v2f64_post_reg_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7658,14 +7658,14 @@ declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane. define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { -; CHECK-LABEL: test_v1f64_post_imm_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7683,15 +7683,15 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane } define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { -; CHECK-LABEL: test_v1f64_post_reg_ld3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_ld3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_ld3lane: ; CHECK-GISEL: ; %bb.0: @@ -7712,15 +7712,15 @@ declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane. define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { -; CHECK-LABEL: test_v16i8_post_imm_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], #4 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], #4 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -7739,15 +7739,15 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4la } define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { -; CHECK-LABEL: test_v16i8_post_reg_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -7769,15 +7769,15 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lan define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { -; CHECK-LABEL: test_v8i8_post_imm_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], #4 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], #4 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -7796,15 +7796,15 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(pt } define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { -; CHECK-LABEL: test_v8i8_post_reg_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -7826,15 +7826,15 @@ declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { -; CHECK-LABEL: test_v8i16_post_imm_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], #8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], #8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -7853,16 +7853,16 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4la } define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { -; CHECK-LABEL: test_v8i16_post_reg_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -7884,15 +7884,15 @@ declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lan define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { -; CHECK-LABEL: test_v4i16_post_imm_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], #8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], #8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -7911,16 +7911,16 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4la } define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { -; CHECK-LABEL: test_v4i16_post_reg_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -7942,15 +7942,15 @@ declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lan define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { -; CHECK-LABEL: test_v4i32_post_imm_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -7969,16 +7969,16 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4la } define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { -; CHECK-LABEL: test_v4i32_post_reg_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -8000,15 +8000,15 @@ declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lan define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { -; CHECK-LABEL: test_v2i32_post_imm_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -8027,16 +8027,16 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4la } define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { -; CHECK-LABEL: test_v2i32_post_reg_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -8058,15 +8058,15 @@ declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lan define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { -; CHECK-LABEL: test_v2i64_post_imm_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -8085,16 +8085,16 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4la } define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { -; CHECK-LABEL: test_v2i64_post_reg_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -8116,15 +8116,15 @@ declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lan define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { -; CHECK-LABEL: test_v1i64_post_imm_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -8143,16 +8143,16 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4la } define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { -; CHECK-LABEL: test_v1i64_post_reg_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -8174,15 +8174,15 @@ declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lan define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { -; CHECK-LABEL: test_v4f32_post_imm_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -8201,16 +8201,16 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i } define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { -; CHECK-LABEL: test_v4f32_post_reg_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -8232,15 +8232,15 @@ declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neo define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { -; CHECK-LABEL: test_v2f32_post_imm_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -8259,16 +8259,16 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i } define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { -; CHECK-LABEL: test_v2f32_post_reg_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -8290,15 +8290,15 @@ declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neo define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { -; CHECK-LABEL: test_v2f64_post_imm_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -8317,16 +8317,16 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po } define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { -; CHECK-LABEL: test_v2f64_post_reg_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -8348,15 +8348,15 @@ declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { -; CHECK-LABEL: test_v1f64_post_imm_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -8375,16 +8375,16 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po } define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { -; CHECK-LABEL: test_v1f64_post_reg_ld4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_ld4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_ld4lane: ; CHECK-GISEL: ; %bb.0: @@ -8406,12 +8406,12 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64 define ptr @test_v16i8_post_imm_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { -; CHECK-LABEL: test_v16i8_post_imm_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.16b { v0, v1 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.16b { v0, v1 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_st2: ; CHECK-GISEL: ; %bb.0: @@ -8427,12 +8427,12 @@ define ptr @test_v16i8_post_imm_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C } define ptr @test_v16i8_post_reg_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v16i8_post_reg_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.16b { v0, v1 }, [x0], x2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.16b { v0, v1 }, [x0], x2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_st2: ; CHECK-GISEL: ; %bb.0: @@ -8451,12 +8451,12 @@ declare void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8>, <16 x i8>, ptr) define ptr @test_v8i8_post_imm_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { -; CHECK-LABEL: test_v8i8_post_imm_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st2.8b { v0, v1 }, [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st2.8b { v0, v1 }, [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_st2: ; CHECK-GISEL: ; %bb.0: @@ -8472,12 +8472,12 @@ define ptr @test_v8i8_post_imm_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) n } define ptr @test_v8i8_post_reg_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i8_post_reg_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st2.8b { v0, v1 }, [x0], x2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st2.8b { v0, v1 }, [x0], x2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_st2: ; CHECK-GISEL: ; %bb.0: @@ -8496,12 +8496,12 @@ declare void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8>, <8 x i8>, ptr) define ptr @test_v8i16_post_imm_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { -; CHECK-LABEL: test_v8i16_post_imm_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.8h { v0, v1 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.8h { v0, v1 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_st2: ; CHECK-GISEL: ; %bb.0: @@ -8517,13 +8517,13 @@ define ptr @test_v8i16_post_imm_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C } define ptr @test_v8i16_post_reg_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i16_post_reg_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.8h { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.8h { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_st2: ; CHECK-GISEL: ; %bb.0: @@ -8542,12 +8542,12 @@ declare void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16>, <8 x i16>, ptr) define ptr @test_v4i16_post_imm_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { -; CHECK-LABEL: test_v4i16_post_imm_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st2.4h { v0, v1 }, [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st2.4h { v0, v1 }, [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_st2: ; CHECK-GISEL: ; %bb.0: @@ -8563,13 +8563,13 @@ define ptr @test_v4i16_post_imm_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C } define ptr @test_v4i16_post_reg_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i16_post_reg_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st2.4h { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st2.4h { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_st2: ; CHECK-GISEL: ; %bb.0: @@ -8588,12 +8588,12 @@ declare void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16>, <4 x i16>, ptr) define ptr @test_v4i32_post_imm_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { -; CHECK-LABEL: test_v4i32_post_imm_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.4s { v0, v1 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.4s { v0, v1 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_st2: ; CHECK-GISEL: ; %bb.0: @@ -8609,13 +8609,13 @@ define ptr @test_v4i32_post_imm_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C } define ptr @test_v4i32_post_reg_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i32_post_reg_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.4s { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.4s { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_st2: ; CHECK-GISEL: ; %bb.0: @@ -8634,12 +8634,12 @@ declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr) define ptr @test_v2i32_post_imm_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { -; CHECK-LABEL: test_v2i32_post_imm_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st2.2s { v0, v1 }, [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st2.2s { v0, v1 }, [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_st2: ; CHECK-GISEL: ; %bb.0: @@ -8655,13 +8655,13 @@ define ptr @test_v2i32_post_imm_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C } define ptr @test_v2i32_post_reg_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i32_post_reg_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st2.2s { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st2.2s { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_st2: ; CHECK-GISEL: ; %bb.0: @@ -8680,12 +8680,12 @@ declare void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32>, <2 x i32>, ptr) define ptr @test_v2i64_post_imm_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { -; CHECK-LABEL: test_v2i64_post_imm_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.2d { v0, v1 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.2d { v0, v1 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_st2: ; CHECK-GISEL: ; %bb.0: @@ -8701,13 +8701,13 @@ define ptr @test_v2i64_post_imm_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C } define ptr @test_v2i64_post_reg_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i64_post_reg_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.2d { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.2d { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_st2: ; CHECK-GISEL: ; %bb.0: @@ -8726,12 +8726,12 @@ declare void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64>, <2 x i64>, ptr) define ptr @test_v1i64_post_imm_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { -; CHECK-LABEL: test_v1i64_post_imm_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st1.1d { v0, v1 }, [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st1.1d { v0, v1 }, [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_st2: ; CHECK-GISEL: ; %bb.0: @@ -8747,13 +8747,13 @@ define ptr @test_v1i64_post_imm_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C } define ptr @test_v1i64_post_reg_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v1i64_post_reg_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st1.1d { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st1.1d { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_st2: ; CHECK-GISEL: ; %bb.0: @@ -8772,12 +8772,12 @@ declare void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64>, <1 x i64>, ptr) define ptr @test_v4f32_post_imm_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind { -; CHECK-LABEL: test_v4f32_post_imm_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.4s { v0, v1 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.4s { v0, v1 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_st2: ; CHECK-GISEL: ; %bb.0: @@ -8793,13 +8793,13 @@ define ptr @test_v4f32_post_imm_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float } define ptr @test_v4f32_post_reg_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v4f32_post_reg_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.4s { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.4s { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_st2: ; CHECK-GISEL: ; %bb.0: @@ -8818,12 +8818,12 @@ declare void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float>, <4 x float>, ptr) define ptr @test_v2f32_post_imm_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind { -; CHECK-LABEL: test_v2f32_post_imm_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st2.2s { v0, v1 }, [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st2.2s { v0, v1 }, [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_st2: ; CHECK-GISEL: ; %bb.0: @@ -8839,13 +8839,13 @@ define ptr @test_v2f32_post_imm_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float } define ptr @test_v2f32_post_reg_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f32_post_reg_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st2.2s { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st2.2s { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_st2: ; CHECK-GISEL: ; %bb.0: @@ -8864,12 +8864,12 @@ declare void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float>, <2 x float>, ptr) define ptr @test_v2f64_post_imm_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind { -; CHECK-LABEL: test_v2f64_post_imm_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.2d { v0, v1 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.2d { v0, v1 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_st2: ; CHECK-GISEL: ; %bb.0: @@ -8885,13 +8885,13 @@ define ptr @test_v2f64_post_imm_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub } define ptr @test_v2f64_post_reg_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f64_post_reg_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.2d { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.2d { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_st2: ; CHECK-GISEL: ; %bb.0: @@ -8910,12 +8910,12 @@ declare void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double>, <2 x double>, ptr) define ptr @test_v1f64_post_imm_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind { -; CHECK-LABEL: test_v1f64_post_imm_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st1.1d { v0, v1 }, [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st1.1d { v0, v1 }, [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_st2: ; CHECK-GISEL: ; %bb.0: @@ -8931,13 +8931,13 @@ define ptr @test_v1f64_post_imm_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub } define ptr @test_v1f64_post_reg_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v1f64_post_reg_st2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st1.1d { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_st2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st1.1d { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_st2: ; CHECK-GISEL: ; %bb.0: @@ -8956,13 +8956,13 @@ declare void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double>, <1 x double>, ptr) define ptr @test_v16i8_post_imm_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { -; CHECK-LABEL: test_v16i8_post_imm_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.16b { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.16b { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_st3: ; CHECK-GISEL: ; %bb.0: @@ -8979,13 +8979,13 @@ define ptr @test_v16i8_post_imm_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C } define ptr @test_v16i8_post_reg_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v16i8_post_reg_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.16b { v0, v1, v2 }, [x0], x2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.16b { v0, v1, v2 }, [x0], x2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_st3: ; CHECK-GISEL: ; %bb.0: @@ -9005,13 +9005,13 @@ declare void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, pt define ptr @test_v8i8_post_imm_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { -; CHECK-LABEL: test_v8i8_post_imm_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st3.8b { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st3.8b { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_st3: ; CHECK-GISEL: ; %bb.0: @@ -9028,13 +9028,13 @@ define ptr @test_v8i8_post_imm_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, < } define ptr @test_v8i8_post_reg_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i8_post_reg_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st3.8b { v0, v1, v2 }, [x0], x2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st3.8b { v0, v1, v2 }, [x0], x2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_st3: ; CHECK-GISEL: ; %bb.0: @@ -9054,13 +9054,13 @@ declare void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr) define ptr @test_v8i16_post_imm_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { -; CHECK-LABEL: test_v8i16_post_imm_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.8h { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.8h { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_st3: ; CHECK-GISEL: ; %bb.0: @@ -9077,14 +9077,14 @@ define ptr @test_v8i16_post_imm_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C } define ptr @test_v8i16_post_reg_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i16_post_reg_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.8h { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.8h { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_st3: ; CHECK-GISEL: ; %bb.0: @@ -9104,13 +9104,13 @@ declare void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, pt define ptr @test_v4i16_post_imm_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { -; CHECK-LABEL: test_v4i16_post_imm_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st3.4h { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st3.4h { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_st3: ; CHECK-GISEL: ; %bb.0: @@ -9127,14 +9127,14 @@ define ptr @test_v4i16_post_imm_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C } define ptr @test_v4i16_post_reg_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i16_post_reg_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st3.4h { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st3.4h { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_st3: ; CHECK-GISEL: ; %bb.0: @@ -9154,13 +9154,13 @@ declare void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, pt define ptr @test_v4i32_post_imm_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { -; CHECK-LABEL: test_v4i32_post_imm_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.4s { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.4s { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_st3: ; CHECK-GISEL: ; %bb.0: @@ -9177,14 +9177,14 @@ define ptr @test_v4i32_post_imm_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C } define ptr @test_v4i32_post_reg_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i32_post_reg_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.4s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.4s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_st3: ; CHECK-GISEL: ; %bb.0: @@ -9204,13 +9204,13 @@ declare void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, pt define ptr @test_v2i32_post_imm_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { -; CHECK-LABEL: test_v2i32_post_imm_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st3.2s { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st3.2s { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_st3: ; CHECK-GISEL: ; %bb.0: @@ -9227,14 +9227,14 @@ define ptr @test_v2i32_post_imm_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C } define ptr @test_v2i32_post_reg_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i32_post_reg_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st3.2s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st3.2s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_st3: ; CHECK-GISEL: ; %bb.0: @@ -9254,13 +9254,13 @@ declare void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, pt define ptr @test_v2i64_post_imm_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { -; CHECK-LABEL: test_v2i64_post_imm_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.2d { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.2d { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_st3: ; CHECK-GISEL: ; %bb.0: @@ -9277,14 +9277,14 @@ define ptr @test_v2i64_post_imm_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C } define ptr @test_v2i64_post_reg_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i64_post_reg_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.2d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.2d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_st3: ; CHECK-GISEL: ; %bb.0: @@ -9304,13 +9304,13 @@ declare void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, pt define ptr @test_v1i64_post_imm_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { -; CHECK-LABEL: test_v1i64_post_imm_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_st3: ; CHECK-GISEL: ; %bb.0: @@ -9327,14 +9327,14 @@ define ptr @test_v1i64_post_imm_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C } define ptr @test_v1i64_post_reg_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v1i64_post_reg_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_st3: ; CHECK-GISEL: ; %bb.0: @@ -9354,13 +9354,13 @@ declare void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, pt define ptr @test_v4f32_post_imm_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { -; CHECK-LABEL: test_v4f32_post_imm_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.4s { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.4s { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_st3: ; CHECK-GISEL: ; %bb.0: @@ -9377,14 +9377,14 @@ define ptr @test_v4f32_post_imm_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float } define ptr @test_v4f32_post_reg_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v4f32_post_reg_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.4s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.4s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_st3: ; CHECK-GISEL: ; %bb.0: @@ -9403,14 +9403,14 @@ define ptr @test_v4f32_post_reg_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float declare void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, ptr) -define ptr @test_v2f32_post_imm_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { -; CHECK-LABEL: test_v2f32_post_imm_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st3.2s { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: ret +define ptr @test_v2f32_post_imm_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { +; SDAG-LABEL: test_v2f32_post_imm_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st3.2s { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_st3: ; CHECK-GISEL: ; %bb.0: @@ -9427,14 +9427,14 @@ define ptr @test_v2f32_post_imm_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float } define ptr @test_v2f32_post_reg_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f32_post_reg_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st3.2s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st3.2s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_st3: ; CHECK-GISEL: ; %bb.0: @@ -9454,13 +9454,13 @@ declare void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float>, <2 x float>, <2 x floa define ptr @test_v2f64_post_imm_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { -; CHECK-LABEL: test_v2f64_post_imm_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.2d { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.2d { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_st3: ; CHECK-GISEL: ; %bb.0: @@ -9477,14 +9477,14 @@ define ptr @test_v2f64_post_imm_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub } define ptr @test_v2f64_post_reg_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f64_post_reg_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.2d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.2d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_st3: ; CHECK-GISEL: ; %bb.0: @@ -9504,13 +9504,13 @@ declare void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double>, <2 x double>, <2 x do define ptr @test_v1f64_post_imm_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { -; CHECK-LABEL: test_v1f64_post_imm_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_st3: ; CHECK-GISEL: ; %bb.0: @@ -9527,14 +9527,14 @@ define ptr @test_v1f64_post_imm_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub } define ptr @test_v1f64_post_reg_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v1f64_post_reg_st3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_st3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_st3: ; CHECK-GISEL: ; %bb.0: @@ -9554,14 +9554,14 @@ declare void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double>, <1 x double>, <1 x do define ptr @test_v16i8_post_imm_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { -; CHECK-LABEL: test_v16i8_post_imm_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.16b { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.16b { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_st4: ; CHECK-GISEL: ; %bb.0: @@ -9579,14 +9579,14 @@ define ptr @test_v16i8_post_imm_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C } define ptr @test_v16i8_post_reg_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v16i8_post_reg_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.16b { v0, v1, v2, v3 }, [x0], x2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.16b { v0, v1, v2, v3 }, [x0], x2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_st4: ; CHECK-GISEL: ; %bb.0: @@ -9607,14 +9607,14 @@ declare void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <1 define ptr @test_v8i8_post_imm_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { -; CHECK-LABEL: test_v8i8_post_imm_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st4.8b { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st4.8b { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_st4: ; CHECK-GISEL: ; %bb.0: @@ -9632,14 +9632,14 @@ define ptr @test_v8i8_post_imm_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, < } define ptr @test_v8i8_post_reg_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i8_post_reg_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st4.8b { v0, v1, v2, v3 }, [x0], x2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st4.8b { v0, v1, v2, v3 }, [x0], x2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_st4: ; CHECK-GISEL: ; %bb.0: @@ -9660,14 +9660,14 @@ declare void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i define ptr @test_v8i16_post_imm_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { -; CHECK-LABEL: test_v8i16_post_imm_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.8h { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.8h { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_st4: ; CHECK-GISEL: ; %bb.0: @@ -9685,15 +9685,15 @@ define ptr @test_v8i16_post_imm_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C } define ptr @test_v8i16_post_reg_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i16_post_reg_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.8h { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.8h { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_st4: ; CHECK-GISEL: ; %bb.0: @@ -9714,14 +9714,14 @@ declare void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 define ptr @test_v4i16_post_imm_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { -; CHECK-LABEL: test_v4i16_post_imm_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st4.4h { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st4.4h { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_st4: ; CHECK-GISEL: ; %bb.0: @@ -9739,15 +9739,15 @@ define ptr @test_v4i16_post_imm_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C } define ptr @test_v4i16_post_reg_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i16_post_reg_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st4.4h { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st4.4h { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_st4: ; CHECK-GISEL: ; %bb.0: @@ -9768,14 +9768,14 @@ declare void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,<4 define ptr @test_v4i32_post_imm_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { -; CHECK-LABEL: test_v4i32_post_imm_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_st4: ; CHECK-GISEL: ; %bb.0: @@ -9793,15 +9793,15 @@ define ptr @test_v4i32_post_imm_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C } define ptr @test_v4i32_post_reg_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i32_post_reg_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_st4: ; CHECK-GISEL: ; %bb.0: @@ -9822,14 +9822,14 @@ declare void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,<4 define ptr @test_v2i32_post_imm_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { -; CHECK-LABEL: test_v2i32_post_imm_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_st4: ; CHECK-GISEL: ; %bb.0: @@ -9847,15 +9847,15 @@ define ptr @test_v2i32_post_imm_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C } define ptr @test_v2i32_post_reg_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i32_post_reg_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_st4: ; CHECK-GISEL: ; %bb.0: @@ -9876,14 +9876,14 @@ declare void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2 define ptr @test_v2i64_post_imm_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { -; CHECK-LABEL: test_v2i64_post_imm_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_st4: ; CHECK-GISEL: ; %bb.0: @@ -9901,15 +9901,15 @@ define ptr @test_v2i64_post_imm_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C } define ptr @test_v2i64_post_reg_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i64_post_reg_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_st4: ; CHECK-GISEL: ; %bb.0: @@ -9930,14 +9930,14 @@ declare void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,<2 define ptr @test_v1i64_post_imm_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { -; CHECK-LABEL: test_v1i64_post_imm_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_st4: ; CHECK-GISEL: ; %bb.0: @@ -9955,15 +9955,15 @@ define ptr @test_v1i64_post_imm_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C } define ptr @test_v1i64_post_reg_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v1i64_post_reg_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_st4: ; CHECK-GISEL: ; %bb.0: @@ -9984,14 +9984,14 @@ declare void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,<1 define ptr @test_v4f32_post_imm_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { -; CHECK-LABEL: test_v4f32_post_imm_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_st4: ; CHECK-GISEL: ; %bb.0: @@ -10009,15 +10009,15 @@ define ptr @test_v4f32_post_imm_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float } define ptr @test_v4f32_post_reg_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v4f32_post_reg_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_st4: ; CHECK-GISEL: ; %bb.0: @@ -10038,14 +10038,14 @@ declare void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float>, <4 x float>, <4 x floa define ptr @test_v2f32_post_imm_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { -; CHECK-LABEL: test_v2f32_post_imm_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_st4: ; CHECK-GISEL: ; %bb.0: @@ -10063,15 +10063,15 @@ define ptr @test_v2f32_post_imm_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float } define ptr @test_v2f32_post_reg_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f32_post_reg_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_st4: ; CHECK-GISEL: ; %bb.0: @@ -10092,14 +10092,14 @@ declare void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float>, <2 x float>, <2 x floa define ptr @test_v2f64_post_imm_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { -; CHECK-LABEL: test_v2f64_post_imm_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_st4: ; CHECK-GISEL: ; %bb.0: @@ -10117,15 +10117,15 @@ define ptr @test_v2f64_post_imm_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub } define ptr @test_v2f64_post_reg_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f64_post_reg_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_st4: ; CHECK-GISEL: ; %bb.0: @@ -10146,14 +10146,14 @@ declare void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double>, <2 x double>, <2 x do define ptr @test_v1f64_post_imm_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { -; CHECK-LABEL: test_v1f64_post_imm_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_st4: ; CHECK-GISEL: ; %bb.0: @@ -10171,15 +10171,15 @@ define ptr @test_v1f64_post_imm_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub } define ptr @test_v1f64_post_reg_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v1f64_post_reg_st4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_st4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_st4: ; CHECK-GISEL: ; %bb.0: @@ -10200,12 +10200,12 @@ declare void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double>, <1 x double>, <1 x do define ptr @test_v16i8_post_imm_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { -; CHECK-LABEL: test_v16i8_post_imm_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st1.16b { v0, v1 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st1.16b { v0, v1 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10221,12 +10221,12 @@ define ptr @test_v16i8_post_imm_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> } define ptr @test_v16i8_post_reg_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v16i8_post_reg_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st1.16b { v0, v1 }, [x0], x2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st1.16b { v0, v1 }, [x0], x2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10245,12 +10245,12 @@ declare void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8>, <16 x i8>, ptr) define ptr @test_v8i8_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { -; CHECK-LABEL: test_v8i8_post_imm_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st1.8b { v0, v1 }, [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st1.8b { v0, v1 }, [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10266,12 +10266,12 @@ define ptr @test_v8i8_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) } define ptr @test_v8i8_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i8_post_reg_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st1.8b { v0, v1 }, [x0], x2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st1.8b { v0, v1 }, [x0], x2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10290,12 +10290,12 @@ declare void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8>, <8 x i8>, ptr) define ptr @test_v8i16_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { -; CHECK-LABEL: test_v8i16_post_imm_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st1.8h { v0, v1 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st1.8h { v0, v1 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10311,13 +10311,13 @@ define ptr @test_v8i16_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> } define ptr @test_v8i16_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i16_post_reg_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st1.8h { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st1.8h { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10336,12 +10336,12 @@ declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr) define ptr @test_v4i16_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { -; CHECK-LABEL: test_v4i16_post_imm_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st1.4h { v0, v1 }, [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st1.4h { v0, v1 }, [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10357,13 +10357,13 @@ define ptr @test_v4i16_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> } define ptr @test_v4i16_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i16_post_reg_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st1.4h { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st1.4h { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10382,12 +10382,12 @@ declare void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16>, <4 x i16>, ptr) define ptr @test_v4i32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { -; CHECK-LABEL: test_v4i32_post_imm_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st1.4s { v0, v1 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st1.4s { v0, v1 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10403,13 +10403,13 @@ define ptr @test_v4i32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> } define ptr @test_v4i32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i32_post_reg_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st1.4s { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st1.4s { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10428,12 +10428,12 @@ declare void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32>, <4 x i32>, ptr) define ptr @test_v2i32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { -; CHECK-LABEL: test_v2i32_post_imm_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st1.2s { v0, v1 }, [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st1.2s { v0, v1 }, [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10449,13 +10449,13 @@ define ptr @test_v2i32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> } define ptr @test_v2i32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i32_post_reg_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st1.2s { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st1.2s { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10474,12 +10474,12 @@ declare void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32>, <2 x i32>, ptr) define ptr @test_v2i64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { -; CHECK-LABEL: test_v2i64_post_imm_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st1.2d { v0, v1 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st1.2d { v0, v1 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10495,13 +10495,13 @@ define ptr @test_v2i64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> } define ptr @test_v2i64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i64_post_reg_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st1.2d { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st1.2d { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10520,12 +10520,12 @@ declare void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64>, <2 x i64>, ptr) define ptr @test_v1i64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { -; CHECK-LABEL: test_v1i64_post_imm_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st1.1d { v0, v1 }, [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st1.1d { v0, v1 }, [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10541,13 +10541,13 @@ define ptr @test_v1i64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> } define ptr @test_v1i64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v1i64_post_reg_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st1.1d { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st1.1d { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10566,12 +10566,12 @@ declare void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64>, <1 x i64>, ptr) define ptr @test_v4f32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind { -; CHECK-LABEL: test_v4f32_post_imm_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st1.4s { v0, v1 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st1.4s { v0, v1 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10587,13 +10587,13 @@ define ptr @test_v4f32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo } define ptr @test_v4f32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v4f32_post_reg_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st1.4s { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st1.4s { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10612,12 +10612,12 @@ declare void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float>, <4 x float>, ptr) define ptr @test_v2f32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind { -; CHECK-LABEL: test_v2f32_post_imm_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st1.2s { v0, v1 }, [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st1.2s { v0, v1 }, [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10633,13 +10633,13 @@ define ptr @test_v2f32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo } define ptr @test_v2f32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f32_post_reg_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st1.2s { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st1.2s { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10658,12 +10658,12 @@ declare void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float>, <2 x float>, ptr) define ptr @test_v2f64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind { -; CHECK-LABEL: test_v2f64_post_imm_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st1.2d { v0, v1 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st1.2d { v0, v1 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10679,13 +10679,13 @@ define ptr @test_v2f64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x do } define ptr @test_v2f64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f64_post_reg_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st1.2d { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st1.2d { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10704,12 +10704,12 @@ declare void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double>, <2 x double>, ptr) define ptr @test_v1f64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind { -; CHECK-LABEL: test_v1f64_post_imm_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st1.1d { v0, v1 }, [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st1.1d { v0, v1 }, [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10725,13 +10725,13 @@ define ptr @test_v1f64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x do } define ptr @test_v1f64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v1f64_post_reg_st1x2: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-NEXT: st1.1d { v0, v1 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_st1x2: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; SDAG-NEXT: st1.1d { v0, v1 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_st1x2: ; CHECK-GISEL: ; %bb.0: @@ -10750,13 +10750,13 @@ declare void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double>, <1 x double>, ptr) define ptr @test_v16i8_post_imm_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { -; CHECK-LABEL: test_v16i8_post_imm_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st1.16b { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st1.16b { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -10773,13 +10773,13 @@ define ptr @test_v16i8_post_imm_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> } define ptr @test_v16i8_post_reg_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v16i8_post_reg_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st1.16b { v0, v1, v2 }, [x0], x2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st1.16b { v0, v1, v2 }, [x0], x2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -10799,13 +10799,13 @@ declare void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, define ptr @test_v8i8_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { -; CHECK-LABEL: test_v8i8_post_imm_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st1.8b { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st1.8b { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -10822,13 +10822,13 @@ define ptr @test_v8i8_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, } define ptr @test_v8i8_post_reg_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i8_post_reg_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st1.8b { v0, v1, v2 }, [x0], x2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st1.8b { v0, v1, v2 }, [x0], x2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -10848,13 +10848,13 @@ declare void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr) define ptr @test_v8i16_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { -; CHECK-LABEL: test_v8i16_post_imm_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st1.8h { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st1.8h { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -10871,14 +10871,14 @@ define ptr @test_v8i16_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> } define ptr @test_v8i16_post_reg_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i16_post_reg_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st1.8h { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st1.8h { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -10898,13 +10898,13 @@ declare void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, define ptr @test_v4i16_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { -; CHECK-LABEL: test_v4i16_post_imm_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st1.4h { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st1.4h { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -10921,14 +10921,14 @@ define ptr @test_v4i16_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> } define ptr @test_v4i16_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i16_post_reg_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st1.4h { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st1.4h { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -10948,13 +10948,13 @@ declare void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, define ptr @test_v4i32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { -; CHECK-LABEL: test_v4i32_post_imm_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st1.4s { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st1.4s { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -10971,14 +10971,14 @@ define ptr @test_v4i32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> } define ptr @test_v4i32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i32_post_reg_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st1.4s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st1.4s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -10998,13 +10998,13 @@ declare void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, define ptr @test_v2i32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { -; CHECK-LABEL: test_v2i32_post_imm_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st1.2s { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st1.2s { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -11021,14 +11021,14 @@ define ptr @test_v2i32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> } define ptr @test_v2i32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i32_post_reg_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st1.2s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st1.2s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -11048,13 +11048,13 @@ declare void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, define ptr @test_v2i64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { -; CHECK-LABEL: test_v2i64_post_imm_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st1.2d { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st1.2d { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -11071,14 +11071,14 @@ define ptr @test_v2i64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> } define ptr @test_v2i64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i64_post_reg_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st1.2d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st1.2d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -11098,13 +11098,13 @@ declare void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, define ptr @test_v1i64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { -; CHECK-LABEL: test_v1i64_post_imm_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -11121,14 +11121,14 @@ define ptr @test_v1i64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> } define ptr @test_v1i64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v1i64_post_reg_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -11148,13 +11148,13 @@ declare void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, define ptr @test_v4f32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { -; CHECK-LABEL: test_v4f32_post_imm_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st1.4s { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st1.4s { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -11171,14 +11171,14 @@ define ptr @test_v4f32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo } define ptr @test_v4f32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v4f32_post_reg_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st1.4s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st1.4s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -11198,13 +11198,13 @@ declare void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float>, <4 x float>, <4 x fl define ptr @test_v2f32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { -; CHECK-LABEL: test_v2f32_post_imm_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st1.2s { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st1.2s { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -11221,14 +11221,14 @@ define ptr @test_v2f32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo } define ptr @test_v2f32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f32_post_reg_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st1.2s { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st1.2s { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -11248,13 +11248,13 @@ declare void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float>, <2 x float>, <2 x fl define ptr @test_v2f64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { -; CHECK-LABEL: test_v2f64_post_imm_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st1.2d { v0, v1, v2 }, [x0], #48 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st1.2d { v0, v1, v2 }, [x0], #48 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -11271,14 +11271,14 @@ define ptr @test_v2f64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x do } define ptr @test_v2f64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f64_post_reg_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st1.2d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st1.2d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -11298,13 +11298,13 @@ declare void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double>, <2 x double>, <2 x define ptr @test_v1f64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { -; CHECK-LABEL: test_v1f64_post_imm_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -11321,14 +11321,14 @@ define ptr @test_v1f64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x do } define ptr @test_v1f64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v1f64_post_reg_st1x3: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_st1x3: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; SDAG-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_st1x3: ; CHECK-GISEL: ; %bb.0: @@ -11348,14 +11348,14 @@ declare void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double>, <1 x double>, <1 x define ptr @test_v16i8_post_imm_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { -; CHECK-LABEL: test_v16i8_post_imm_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st1.16b { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st1.16b { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11373,14 +11373,14 @@ define ptr @test_v16i8_post_imm_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> } define ptr @test_v16i8_post_reg_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v16i8_post_reg_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st1.16b { v0, v1, v2, v3 }, [x0], x2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st1.16b { v0, v1, v2, v3 }, [x0], x2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11401,14 +11401,14 @@ declare void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, define ptr @test_v8i8_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { -; CHECK-LABEL: test_v8i8_post_imm_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st1.8b { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st1.8b { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11426,14 +11426,14 @@ define ptr @test_v8i8_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, } define ptr @test_v8i8_post_reg_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i8_post_reg_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st1.8b { v0, v1, v2, v3 }, [x0], x2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st1.8b { v0, v1, v2, v3 }, [x0], x2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11454,14 +11454,14 @@ declare void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x define ptr @test_v8i16_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { -; CHECK-LABEL: test_v8i16_post_imm_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st1.8h { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st1.8h { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11479,15 +11479,15 @@ define ptr @test_v8i16_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> } define ptr @test_v8i16_post_reg_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i16_post_reg_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st1.8h { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st1.8h { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11508,14 +11508,14 @@ declare void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, define ptr @test_v4i16_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { -; CHECK-LABEL: test_v4i16_post_imm_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st1.4h { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st1.4h { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11533,15 +11533,15 @@ define ptr @test_v4i16_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> } define ptr @test_v4i16_post_reg_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i16_post_reg_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st1.4h { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st1.4h { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11562,14 +11562,14 @@ declare void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,< define ptr @test_v4i32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { -; CHECK-LABEL: test_v4i32_post_imm_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11587,15 +11587,15 @@ define ptr @test_v4i32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> } define ptr @test_v4i32_post_reg_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i32_post_reg_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11616,14 +11616,14 @@ declare void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,< define ptr @test_v2i32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { -; CHECK-LABEL: test_v2i32_post_imm_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11641,15 +11641,15 @@ define ptr @test_v2i32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> } define ptr @test_v2i32_post_reg_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i32_post_reg_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11670,14 +11670,14 @@ declare void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, define ptr @test_v2i64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { -; CHECK-LABEL: test_v2i64_post_imm_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11695,15 +11695,15 @@ define ptr @test_v2i64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> } define ptr @test_v2i64_post_reg_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i64_post_reg_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11724,14 +11724,14 @@ declare void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,< define ptr @test_v1i64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { -; CHECK-LABEL: test_v1i64_post_imm_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11749,15 +11749,15 @@ define ptr @test_v1i64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> } define ptr @test_v1i64_post_reg_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v1i64_post_reg_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11778,14 +11778,14 @@ declare void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,< define ptr @test_v4f32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { -; CHECK-LABEL: test_v4f32_post_imm_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11803,15 +11803,15 @@ define ptr @test_v4f32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo } define ptr @test_v4f32_post_reg_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v4f32_post_reg_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11832,14 +11832,14 @@ declare void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float>, <4 x float>, <4 x fl define ptr @test_v2f32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { -; CHECK-LABEL: test_v2f32_post_imm_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11857,15 +11857,15 @@ define ptr @test_v2f32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo } define ptr @test_v2f32_post_reg_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f32_post_reg_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11886,14 +11886,14 @@ declare void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float>, <2 x float>, <2 x fl define ptr @test_v2f64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { -; CHECK-LABEL: test_v2f64_post_imm_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], #64 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], #64 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11911,15 +11911,15 @@ define ptr @test_v2f64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x do } define ptr @test_v2f64_post_reg_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f64_post_reg_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11940,14 +11940,14 @@ declare void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double>, <2 x double>, <2 x define ptr @test_v1f64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { -; CHECK-LABEL: test_v1f64_post_imm_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11965,15 +11965,15 @@ define ptr @test_v1f64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x do } define ptr @test_v1f64_post_reg_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v1f64_post_reg_st1x4: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_st1x4: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; SDAG-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_st1x4: ; CHECK-GISEL: ; %bb.0: @@ -11993,12 +11993,12 @@ define ptr @test_v1f64_post_reg_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x do declare void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, <1 x double>, ptr) define ptr @test_v16i8_post_imm_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { -; CHECK-LABEL: test_v16i8_post_imm_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.b { v0, v1 }[0], [x0], #2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.b { v0, v1 }[0], [x0], #2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12014,12 +12014,12 @@ define ptr @test_v16i8_post_imm_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8 } define ptr @test_v16i8_post_reg_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v16i8_post_reg_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.b { v0, v1 }[0], [x0], x2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.b { v0, v1 }[0], [x0], x2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12038,12 +12038,12 @@ declare void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8>, <16 x i8>, i64, ptr) define ptr @test_v8i8_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { -; CHECK-LABEL: test_v8i8_post_imm_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.b { v0, v1 }[0], [x0], #2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.b { v0, v1 }[0], [x0], #2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12059,12 +12059,12 @@ define ptr @test_v8i8_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> % } define ptr @test_v8i8_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i8_post_reg_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.b { v0, v1 }[0], [x0], x2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.b { v0, v1 }[0], [x0], x2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12083,12 +12083,12 @@ declare void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8>, <8 x i8>, i64, ptr) define ptr @test_v8i16_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { -; CHECK-LABEL: test_v8i16_post_imm_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.h { v0, v1 }[0], [x0], #4 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.h { v0, v1 }[0], [x0], #4 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12104,13 +12104,13 @@ define ptr @test_v8i16_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16 } define ptr @test_v8i16_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i16_post_reg_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.h { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.h { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12129,12 +12129,12 @@ declare void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16>, <8 x i16>, i64, ptr) define ptr @test_v4i16_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { -; CHECK-LABEL: test_v4i16_post_imm_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.h { v0, v1 }[0], [x0], #4 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.h { v0, v1 }[0], [x0], #4 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12150,13 +12150,13 @@ define ptr @test_v4i16_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16 } define ptr @test_v4i16_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i16_post_reg_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.h { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.h { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12175,12 +12175,12 @@ declare void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16>, <4 x i16>, i64, ptr) define ptr @test_v4i32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { -; CHECK-LABEL: test_v4i32_post_imm_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.s { v0, v1 }[0], [x0], #8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.s { v0, v1 }[0], [x0], #8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12196,13 +12196,13 @@ define ptr @test_v4i32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32 } define ptr @test_v4i32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i32_post_reg_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.s { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.s { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12221,12 +12221,12 @@ declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr) define ptr @test_v2i32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { -; CHECK-LABEL: test_v2i32_post_imm_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.s { v0, v1 }[0], [x0], #8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.s { v0, v1 }[0], [x0], #8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12242,13 +12242,13 @@ define ptr @test_v2i32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32 } define ptr @test_v2i32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i32_post_reg_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.s { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.s { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12267,12 +12267,12 @@ declare void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32>, <2 x i32>, i64, ptr) define ptr @test_v2i64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { -; CHECK-LABEL: test_v2i64_post_imm_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.d { v0, v1 }[0], [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.d { v0, v1 }[0], [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12288,13 +12288,13 @@ define ptr @test_v2i64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64 } define ptr @test_v2i64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i64_post_reg_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.d { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.d { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12313,12 +12313,12 @@ declare void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr) define ptr @test_v1i64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { -; CHECK-LABEL: test_v1i64_post_imm_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.d { v0, v1 }[0], [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.d { v0, v1 }[0], [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12334,13 +12334,13 @@ define ptr @test_v1i64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64 } define ptr @test_v1i64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v1i64_post_reg_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.d { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.d { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12359,12 +12359,12 @@ declare void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64>, <1 x i64>, i64, ptr) define ptr @test_v4f32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind { -; CHECK-LABEL: test_v4f32_post_imm_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.s { v0, v1 }[0], [x0], #8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.s { v0, v1 }[0], [x0], #8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12380,13 +12380,13 @@ define ptr @test_v4f32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f } define ptr @test_v4f32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v4f32_post_reg_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.s { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.s { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12405,12 +12405,12 @@ declare void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float>, <4 x float>, i64, define ptr @test_v2f32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind { -; CHECK-LABEL: test_v2f32_post_imm_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.s { v0, v1 }[0], [x0], #8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.s { v0, v1 }[0], [x0], #8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12426,13 +12426,13 @@ define ptr @test_v2f32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f } define ptr @test_v2f32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f32_post_reg_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.s { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.s { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12451,12 +12451,12 @@ declare void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float>, <2 x float>, i64, define ptr @test_v2f64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind { -; CHECK-LABEL: test_v2f64_post_imm_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.d { v0, v1 }[0], [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.d { v0, v1 }[0], [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12472,13 +12472,13 @@ define ptr @test_v2f64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x } define ptr @test_v2f64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f64_post_reg_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.d { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.d { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12497,12 +12497,12 @@ declare void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double>, <2 x double>, i64 define ptr @test_v1f64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind { -; CHECK-LABEL: test_v1f64_post_imm_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.d { v0, v1 }[0], [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.d { v0, v1 }[0], [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12518,13 +12518,13 @@ define ptr @test_v1f64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x } define ptr @test_v1f64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind { -; CHECK-LABEL: test_v1f64_post_reg_st2lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: st2.d { v0, v1 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_st2lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; SDAG-NEXT: st2.d { v0, v1 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_st2lane: ; CHECK-GISEL: ; %bb.0: @@ -12543,13 +12543,13 @@ declare void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double>, <1 x double>, i64 define ptr @test_v16i8_post_imm_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { -; CHECK-LABEL: test_v16i8_post_imm_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.b { v0, v1, v2 }[0], [x0], #3 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.b { v0, v1, v2 }[0], [x0], #3 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -12565,14 +12565,14 @@ define ptr @test_v16i8_post_imm_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8 ret ptr %tmp } -define ptr @test_v16i8_post_reg_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v16i8_post_reg_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.b { v0, v1, v2 }[0], [x0], x2 -; CHECK-NEXT: ret +define ptr @test_v16i8_post_reg_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { +; SDAG-LABEL: test_v16i8_post_reg_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.b { v0, v1, v2 }[0], [x0], x2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -12592,13 +12592,13 @@ declare void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8> define ptr @test_v8i8_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { -; CHECK-LABEL: test_v8i8_post_imm_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.b { v0, v1, v2 }[0], [x0], #3 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.b { v0, v1, v2 }[0], [x0], #3 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -12615,13 +12615,13 @@ define ptr @test_v8i8_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> % } define ptr @test_v8i8_post_reg_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i8_post_reg_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.b { v0, v1, v2 }[0], [x0], x2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.b { v0, v1, v2 }[0], [x0], x2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -12641,13 +12641,13 @@ declare void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, i6 define ptr @test_v8i16_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { -; CHECK-LABEL: test_v8i16_post_imm_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.h { v0, v1, v2 }[0], [x0], #6 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.h { v0, v1, v2 }[0], [x0], #6 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -12664,14 +12664,14 @@ define ptr @test_v8i16_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16 } define ptr @test_v8i16_post_reg_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i16_post_reg_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.h { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.h { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -12691,13 +12691,13 @@ declare void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16> define ptr @test_v4i16_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { -; CHECK-LABEL: test_v4i16_post_imm_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.h { v0, v1, v2 }[0], [x0], #6 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.h { v0, v1, v2 }[0], [x0], #6 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -12714,14 +12714,14 @@ define ptr @test_v4i16_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16 } define ptr @test_v4i16_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i16_post_reg_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.h { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.h { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -12741,13 +12741,13 @@ declare void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16> define ptr @test_v4i32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { -; CHECK-LABEL: test_v4i32_post_imm_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -12764,14 +12764,14 @@ define ptr @test_v4i32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32 } define ptr @test_v4i32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i32_post_reg_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -12791,13 +12791,13 @@ declare void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32> define ptr @test_v2i32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { -; CHECK-LABEL: test_v2i32_post_imm_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -12814,14 +12814,14 @@ define ptr @test_v2i32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32 } define ptr @test_v2i32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i32_post_reg_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -12841,13 +12841,13 @@ declare void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32> define ptr @test_v2i64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { -; CHECK-LABEL: test_v2i64_post_imm_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -12864,14 +12864,14 @@ define ptr @test_v2i64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64 } define ptr @test_v2i64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i64_post_reg_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -12891,13 +12891,13 @@ declare void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64> define ptr @test_v1i64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { -; CHECK-LABEL: test_v1i64_post_imm_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -12914,14 +12914,14 @@ define ptr @test_v1i64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64 } define ptr @test_v1i64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v1i64_post_reg_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -12941,13 +12941,13 @@ declare void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64> define ptr @test_v4f32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { -; CHECK-LABEL: test_v4f32_post_imm_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -12964,14 +12964,14 @@ define ptr @test_v4f32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f } define ptr @test_v4f32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v4f32_post_reg_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -12991,13 +12991,13 @@ declare void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float>, <4 x float>, <4 x define ptr @test_v2f32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { -; CHECK-LABEL: test_v2f32_post_imm_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -13014,14 +13014,14 @@ define ptr @test_v2f32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f } define ptr @test_v2f32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f32_post_reg_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -13041,13 +13041,13 @@ declare void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float>, <2 x float>, <2 x define ptr @test_v2f64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { -; CHECK-LABEL: test_v2f64_post_imm_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -13064,14 +13064,14 @@ define ptr @test_v2f64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x } define ptr @test_v2f64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f64_post_reg_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -13091,13 +13091,13 @@ declare void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double>, <2 x double>, <2 define ptr @test_v1f64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { -; CHECK-LABEL: test_v1f64_post_imm_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -13114,14 +13114,14 @@ define ptr @test_v1f64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x } define ptr @test_v1f64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { -; CHECK-LABEL: test_v1f64_post_reg_st3lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_st3lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; SDAG-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_st3lane: ; CHECK-GISEL: ; %bb.0: @@ -13141,14 +13141,14 @@ declare void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double>, <1 x double>, <1 define ptr @test_v16i8_post_imm_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { -; CHECK-LABEL: test_v16i8_post_imm_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], #4 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], #4 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13166,14 +13166,14 @@ define ptr @test_v16i8_post_imm_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8 } define ptr @test_v16i8_post_reg_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v16i8_post_reg_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], x2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], x2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13194,14 +13194,14 @@ declare void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8> define ptr @test_v8i8_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { -; CHECK-LABEL: test_v8i8_post_imm_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], #4 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], #4 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13219,14 +13219,14 @@ define ptr @test_v8i8_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> % } define ptr @test_v8i8_post_reg_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i8_post_reg_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], x2 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], x2 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13247,14 +13247,14 @@ declare void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 define ptr @test_v8i16_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { -; CHECK-LABEL: test_v8i16_post_imm_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], #8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], #8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13272,15 +13272,15 @@ define ptr @test_v8i16_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16 } define ptr @test_v8i16_post_reg_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v8i16_post_reg_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13301,14 +13301,14 @@ declare void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16> define ptr @test_v4i16_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { -; CHECK-LABEL: test_v4i16_post_imm_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], #8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], #8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13326,15 +13326,15 @@ define ptr @test_v4i16_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16 } define ptr @test_v4i16_post_reg_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i16_post_reg_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13355,14 +13355,14 @@ declare void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16> define ptr @test_v4i32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { -; CHECK-LABEL: test_v4i32_post_imm_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13380,15 +13380,15 @@ define ptr @test_v4i32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32 } define ptr @test_v4i32_post_reg_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v4i32_post_reg_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13409,14 +13409,14 @@ declare void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32> define ptr @test_v2i32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { -; CHECK-LABEL: test_v2i32_post_imm_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13434,15 +13434,15 @@ define ptr @test_v2i32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32 } define ptr @test_v2i32_post_reg_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i32_post_reg_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13463,14 +13463,14 @@ declare void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32> define ptr @test_v2i64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { -; CHECK-LABEL: test_v2i64_post_imm_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13488,15 +13488,15 @@ define ptr @test_v2i64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64 } define ptr @test_v2i64_post_reg_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v2i64_post_reg_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13517,14 +13517,14 @@ declare void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64> define ptr @test_v1i64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { -; CHECK-LABEL: test_v1i64_post_imm_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_imm_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_imm_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13542,15 +13542,15 @@ define ptr @test_v1i64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64 } define ptr @test_v1i64_post_reg_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v1i64_post_reg_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1i64_post_reg_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1i64_post_reg_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13571,14 +13571,14 @@ declare void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64> define ptr @test_v4f32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { -; CHECK-LABEL: test_v4f32_post_imm_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13596,15 +13596,15 @@ define ptr @test_v4f32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f } define ptr @test_v4f32_post_reg_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v4f32_post_reg_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13625,14 +13625,14 @@ declare void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float>, <4 x float>, <4 x define ptr @test_v2f32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { -; CHECK-LABEL: test_v2f32_post_imm_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13650,15 +13650,15 @@ define ptr @test_v2f32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f } define ptr @test_v2f32_post_reg_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f32_post_reg_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13679,14 +13679,14 @@ declare void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float>, <2 x float>, <2 x define ptr @test_v2f64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { -; CHECK-LABEL: test_v2f64_post_imm_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13704,15 +13704,15 @@ define ptr @test_v2f64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x } define ptr @test_v2f64_post_reg_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v2f64_post_reg_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13733,14 +13733,14 @@ declare void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double>, <2 x double>, <2 define ptr @test_v1f64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { -; CHECK-LABEL: test_v1f64_post_imm_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_imm_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_imm_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13758,15 +13758,15 @@ define ptr @test_v1f64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x } define ptr @test_v1f64_post_reg_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { -; CHECK-LABEL: test_v1f64_post_reg_st4lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v1f64_post_reg_st4lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; SDAG-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v1f64_post_reg_st4lane: ; CHECK-GISEL: ; %bb.0: @@ -13791,12 +13791,6 @@ define <16 x i8> @test_v16i8_post_imm_ld1r(ptr %bar, ptr %ptr) { ; CHECK-NEXT: ld1r.16b { v0 }, [x0], #1 ; CHECK-NEXT: str x0, [x1] ; CHECK-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld1r: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0], #1 -; CHECK-GISEL-NEXT: str x0, [x1] -; CHECK-GISEL-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <16 x i8> , i8 %tmp1, i32 0 %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1 @@ -13825,12 +13819,6 @@ define <16 x i8> @test_v16i8_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { ; CHECK-NEXT: ld1r.16b { v0 }, [x0], x2 ; CHECK-NEXT: str x0, [x1] ; CHECK-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld1r: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0], x2 -; CHECK-GISEL-NEXT: str x0, [x1] -; CHECK-GISEL-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <16 x i8> , i8 %tmp1, i32 0 %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1 @@ -13859,12 +13847,6 @@ define <8 x i8> @test_v8i8_post_imm_ld1r(ptr %bar, ptr %ptr) { ; CHECK-NEXT: ld1r.8b { v0 }, [x0], #1 ; CHECK-NEXT: str x0, [x1] ; CHECK-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld1r: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0], #1 -; CHECK-GISEL-NEXT: str x0, [x1] -; CHECK-GISEL-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <8 x i8> , i8 %tmp1, i32 0 %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1 @@ -13885,12 +13867,6 @@ define <8 x i8> @test_v8i8_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { ; CHECK-NEXT: ld1r.8b { v0 }, [x0], x2 ; CHECK-NEXT: str x0, [x1] ; CHECK-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld1r: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0], x2 -; CHECK-GISEL-NEXT: str x0, [x1] -; CHECK-GISEL-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <8 x i8> , i8 %tmp1, i32 0 %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1 @@ -13906,11 +13882,11 @@ define <8 x i8> @test_v8i8_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { } define <8 x i16> @test_v8i16_post_imm_ld1r(ptr %bar, ptr %ptr) { -; CHECK-LABEL: test_v8i16_post_imm_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1r.8h { v0 }, [x0], #2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1r.8h { v0 }, [x0], #2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_ld1r: ; CHECK-GISEL: ; %bb.0: @@ -13933,12 +13909,12 @@ define <8 x i16> @test_v8i16_post_imm_ld1r(ptr %bar, ptr %ptr) { } define <8 x i16> @test_v8i16_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v8i16_post_reg_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld1r.8h { v0 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld1r.8h { v0 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_ld1r: ; CHECK-GISEL: ; %bb.0: @@ -13961,11 +13937,11 @@ define <8 x i16> @test_v8i16_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { } define <4 x i16> @test_v4i16_post_imm_ld1r(ptr %bar, ptr %ptr) { -; CHECK-LABEL: test_v4i16_post_imm_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1r.4h { v0 }, [x0], #2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1r.4h { v0 }, [x0], #2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld1r: ; CHECK-GISEL: ; %bb.0: @@ -13984,12 +13960,12 @@ define <4 x i16> @test_v4i16_post_imm_ld1r(ptr %bar, ptr %ptr) { } define <4 x i16> @test_v4i16_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4i16_post_reg_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld1r.4h { v0 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld1r.4h { v0 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld1r: ; CHECK-GISEL: ; %bb.0: @@ -14008,11 +13984,11 @@ define <4 x i16> @test_v4i16_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { } define <4 x i32> @test_v4i32_post_imm_ld1r(ptr %bar, ptr %ptr) { -; CHECK-LABEL: test_v4i32_post_imm_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1r.4s { v0 }, [x0], #4 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1r.4s { v0 }, [x0], #4 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_ld1r: ; CHECK-GISEL: ; %bb.0: @@ -14031,12 +14007,12 @@ define <4 x i32> @test_v4i32_post_imm_ld1r(ptr %bar, ptr %ptr) { } define <4 x i32> @test_v4i32_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4i32_post_reg_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld1r.4s { v0 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld1r.4s { v0 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_ld1r: ; CHECK-GISEL: ; %bb.0: @@ -14055,11 +14031,11 @@ define <4 x i32> @test_v4i32_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { } define <2 x i32> @test_v2i32_post_imm_ld1r(ptr %bar, ptr %ptr) { -; CHECK-LABEL: test_v2i32_post_imm_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1r.2s { v0 }, [x0], #4 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1r.2s { v0 }, [x0], #4 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld1r: ; CHECK-GISEL: ; %bb.0: @@ -14076,12 +14052,12 @@ define <2 x i32> @test_v2i32_post_imm_ld1r(ptr %bar, ptr %ptr) { } define <2 x i32> @test_v2i32_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2i32_post_reg_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld1r.2s { v0 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld1r.2s { v0 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_ld1r: ; CHECK-GISEL: ; %bb.0: @@ -14098,11 +14074,11 @@ define <2 x i32> @test_v2i32_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { } define <2 x i64> @test_v2i64_post_imm_ld1r(ptr %bar, ptr %ptr) { -; CHECK-LABEL: test_v2i64_post_imm_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1r.2d { v0 }, [x0], #8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1r.2d { v0 }, [x0], #8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_ld1r: ; CHECK-GISEL: ; %bb.0: @@ -14119,12 +14095,12 @@ define <2 x i64> @test_v2i64_post_imm_ld1r(ptr %bar, ptr %ptr) { } define <2 x i64> @test_v2i64_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2i64_post_reg_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1r.2d { v0 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1r.2d { v0 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_ld1r: ; CHECK-GISEL: ; %bb.0: @@ -14141,11 +14117,11 @@ define <2 x i64> @test_v2i64_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { } define <4 x float> @test_v4f32_post_imm_ld1r(ptr %bar, ptr %ptr) { -; CHECK-LABEL: test_v4f32_post_imm_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1r.4s { v0 }, [x0], #4 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1r.4s { v0 }, [x0], #4 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_ld1r: ; CHECK-GISEL: ; %bb.0: @@ -14164,12 +14140,12 @@ define <4 x float> @test_v4f32_post_imm_ld1r(ptr %bar, ptr %ptr) { } define <4 x float> @test_v4f32_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v4f32_post_reg_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld1r.4s { v0 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld1r.4s { v0 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_ld1r: ; CHECK-GISEL: ; %bb.0: @@ -14188,11 +14164,11 @@ define <4 x float> @test_v4f32_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { } define <2 x float> @test_v2f32_post_imm_ld1r(ptr %bar, ptr %ptr) { -; CHECK-LABEL: test_v2f32_post_imm_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1r.2s { v0 }, [x0], #4 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1r.2s { v0 }, [x0], #4 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld1r: ; CHECK-GISEL: ; %bb.0: @@ -14209,12 +14185,12 @@ define <2 x float> @test_v2f32_post_imm_ld1r(ptr %bar, ptr %ptr) { } define <2 x float> @test_v2f32_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2f32_post_reg_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld1r.2s { v0 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld1r.2s { v0 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_ld1r: ; CHECK-GISEL: ; %bb.0: @@ -14231,11 +14207,11 @@ define <2 x float> @test_v2f32_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { } define <2 x double> @test_v2f64_post_imm_ld1r(ptr %bar, ptr %ptr) { -; CHECK-LABEL: test_v2f64_post_imm_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1r.2d { v0 }, [x0], #8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1r.2d { v0 }, [x0], #8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_ld1r: ; CHECK-GISEL: ; %bb.0: @@ -14252,12 +14228,12 @@ define <2 x double> @test_v2f64_post_imm_ld1r(ptr %bar, ptr %ptr) { } define <2 x double> @test_v2f64_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v2f64_post_reg_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1r.2d { v0 }, [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1r.2d { v0 }, [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_ld1r: ; CHECK-GISEL: ; %bb.0: @@ -14274,11 +14250,11 @@ define <2 x double> @test_v2f64_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { } define <16 x i8> @test_v16i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <16 x i8> %A) { -; CHECK-LABEL: test_v16i8_post_imm_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.b { v0 }[1], [x0], #1 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.b { v0 }[1], [x0], #1 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14295,11 +14271,11 @@ define <16 x i8> @test_v16i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <16 x i8> %A) } define <16 x i8> @test_v16i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <16 x i8> %A) { -; CHECK-LABEL: test_v16i8_post_reg_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.b { v0 }[1], [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.b { v0 }[1], [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14316,13 +14292,13 @@ define <16 x i8> @test_v16i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <16 } define <8 x i8> @test_v8i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i8> %A) { -; CHECK-LABEL: test_v8i8_post_imm_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: ld1.b { v0 }[1], [x0], #1 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d0 killed $d0 def $q0 +; SDAG-NEXT: ld1.b { v0 }[1], [x0], #1 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14341,13 +14317,13 @@ define <8 x i8> @test_v8i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i8> %A) { } define <8 x i8> @test_v8i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x i8> %A) { -; CHECK-LABEL: test_v8i8_post_reg_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: ld1.b { v0 }[1], [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d0 killed $d0 def $q0 +; SDAG-NEXT: ld1.b { v0 }[1], [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14366,11 +14342,11 @@ define <8 x i8> @test_v8i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x i } define <8 x i16> @test_v8i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i16> %A) { -; CHECK-LABEL: test_v8i16_post_imm_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.h { v0 }[1], [x0], #2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_imm_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.h { v0 }[1], [x0], #2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14387,12 +14363,12 @@ define <8 x i16> @test_v8i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i16> %A) } define <8 x i16> @test_v8i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x i16> %A) { -; CHECK-LABEL: test_v8i16_post_reg_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ld1.h { v0 }[1], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i16_post_reg_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ld1.h { v0 }[1], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14409,13 +14385,13 @@ define <8 x i16> @test_v8i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x } define <4 x i16> @test_v4i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i16> %A) { -; CHECK-LABEL: test_v4i16_post_imm_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: ld1.h { v0 }[1], [x0], #2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_imm_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d0 killed $d0 def $q0 +; SDAG-NEXT: ld1.h { v0 }[1], [x0], #2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14434,14 +14410,14 @@ define <4 x i16> @test_v4i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i16> %A) } define <4 x i16> @test_v4i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x i16> %A) { -; CHECK-LABEL: test_v4i16_post_reg_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: ld1.h { v0 }[1], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 def $q0 +; SDAG-NEXT: ld1.h { v0 }[1], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14460,11 +14436,11 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x } define <4 x i32> @test_v4i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i32> %A) { -; CHECK-LABEL: test_v4i32_post_imm_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.s { v0 }[1], [x0], #4 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_imm_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.s { v0 }[1], [x0], #4 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14481,12 +14457,12 @@ define <4 x i32> @test_v4i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i32> %A) } define <4 x i32> @test_v4i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x i32> %A) { -; CHECK-LABEL: test_v4i32_post_reg_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld1.s { v0 }[1], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i32_post_reg_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld1.s { v0 }[1], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14503,13 +14479,13 @@ define <4 x i32> @test_v4i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x } define <2 x i32> @test_v2i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i32> %A) { -; CHECK-LABEL: test_v2i32_post_imm_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: ld1.s { v0 }[1], [x0], #4 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_imm_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d0 killed $d0 def $q0 +; SDAG-NEXT: ld1.s { v0 }[1], [x0], #4 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14528,14 +14504,14 @@ define <2 x i32> @test_v2i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i32> %A) } define <2 x i32> @test_v2i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x i32> %A) { -; CHECK-LABEL: test_v2i32_post_reg_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: ld1.s { v0 }[1], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i32_post_reg_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 def $q0 +; SDAG-NEXT: ld1.s { v0 }[1], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14554,11 +14530,11 @@ define <2 x i32> @test_v2i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x } define <2 x i64> @test_v2i64_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i64> %A) { -; CHECK-LABEL: test_v2i64_post_imm_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.d { v0 }[1], [x0], #8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_imm_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.d { v0 }[1], [x0], #8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14575,12 +14551,12 @@ define <2 x i64> @test_v2i64_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i64> %A) } define <2 x i64> @test_v2i64_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x i64> %A) { -; CHECK-LABEL: test_v2i64_post_reg_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.d { v0 }[1], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2i64_post_reg_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.d { v0 }[1], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14597,11 +14573,11 @@ define <2 x i64> @test_v2i64_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x } define <4 x float> @test_v4f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x float> %A) { -; CHECK-LABEL: test_v4f32_post_imm_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.s { v0 }[1], [x0], #4 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_imm_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.s { v0 }[1], [x0], #4 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14618,12 +14594,12 @@ define <4 x float> @test_v4f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x float> } define <4 x float> @test_v4f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x float> %A) { -; CHECK-LABEL: test_v4f32_post_reg_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ld1.s { v0 }[1], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4f32_post_reg_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ld1.s { v0 }[1], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14640,13 +14616,13 @@ define <4 x float> @test_v4f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 } define <2 x float> @test_v2f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x float> %A) { -; CHECK-LABEL: test_v2f32_post_imm_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: ld1.s { v0 }[1], [x0], #4 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_imm_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $d0 killed $d0 def $q0 +; SDAG-NEXT: ld1.s { v0 }[1], [x0], #4 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14665,14 +14641,14 @@ define <2 x float> @test_v2f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x float> } define <2 x float> @test_v2f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x float> %A) { -; CHECK-LABEL: test_v2f32_post_reg_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: ld1.s { v0 }[1], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f32_post_reg_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #2 +; SDAG-NEXT: ; kill: def $d0 killed $d0 def $q0 +; SDAG-NEXT: ld1.s { v0 }[1], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14691,11 +14667,11 @@ define <2 x float> @test_v2f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 } define <2 x double> @test_v2f64_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x double> %A) { -; CHECK-LABEL: test_v2f64_post_imm_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.d { v0 }[1], [x0], #8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_imm_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.d { v0 }[1], [x0], #8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14712,12 +14688,12 @@ define <2 x double> @test_v2f64_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x double } define <2 x double> @test_v2f64_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x double> %A) { -; CHECK-LABEL: test_v2f64_post_reg_ld1lane: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 -; CHECK-NEXT: ld1.d { v0 }[1], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v2f64_post_reg_ld1lane: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #3 +; SDAG-NEXT: ld1.d { v0 }[1], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: @@ -14744,16 +14720,6 @@ define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(ptr %bar, ptr %p ; CHECK-NEXT: str x8, [x1] ; CHECK-NEXT: mov.s v0[1], v1[0] ; CHECK-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v4f32_post_reg_ld1lane_dep_vec_on_load: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr s1, [x0] -; CHECK-GISEL-NEXT: str q0, [x3] -; CHECK-GISEL-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GISEL-NEXT: ldr q0, [x4] -; CHECK-GISEL-NEXT: str x8, [x1] -; CHECK-GISEL-NEXT: mov.s v0[1], v1[0] -; CHECK-GISEL-NEXT: ret %tmp1 = load float, ptr %bar store <4 x float> %vec, ptr %dep_ptr_1, align 16 %A = load <4 x float>, ptr %dep_ptr_2, align 16 @@ -14771,19 +14737,19 @@ define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(ptr %bar, ptr %p ; legalizer to run. We achieve that using the ctpop. ; PR23265 define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(ptr %bar, ptr %ptr, i64 %inc, <4 x i16> %A, ptr %d) { -; CHECK-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: ld1.h { v0 }[1], [x0], x8 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ldr d1, [x3] -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: cnt.8b v1, v1 -; CHECK-NEXT: uaddlp.4h v1, v1 -; CHECK-NEXT: uaddlp.2s v1, v1 -; CHECK-NEXT: str d1, [x3] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow: +; SDAG: ; %bb.0: +; SDAG-NEXT: lsl x8, x2, #1 +; SDAG-NEXT: ; kill: def $d0 killed $d0 def $q0 +; SDAG-NEXT: ld1.h { v0 }[1], [x0], x8 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ldr d1, [x3] +; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; SDAG-NEXT: cnt.8b v1, v1 +; SDAG-NEXT: uaddlp.4h v1, v1 +; SDAG-NEXT: uaddlp.2s v1, v1 +; SDAG-NEXT: str d1, [x3] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow: ; CHECK-GISEL: ; %bb.0: @@ -14812,15 +14778,15 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(ptr %bar, ptr %ptr, declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) define void @test_ld1lane_build(ptr %ptr0, ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %out) { -; CHECK-LABEL: test_ld1lane_build: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr s0, [x2] -; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: ld1.s { v0 }[1], [x3] -; CHECK-NEXT: ld1.s { v1 }[1], [x1] -; CHECK-NEXT: sub.2s v0, v1, v0 -; CHECK-NEXT: str d0, [x4] -; CHECK-NEXT: ret +; SDAG-LABEL: test_ld1lane_build: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr s0, [x2] +; SDAG-NEXT: ldr s1, [x0] +; SDAG-NEXT: ld1.s { v0 }[1], [x3] +; SDAG-NEXT: ld1.s { v1 }[1], [x1] +; SDAG-NEXT: sub.2s v0, v1, v0 +; SDAG-NEXT: str d0, [x4] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_ld1lane_build: ; CHECK-GISEL: ; %bb.0: @@ -14849,15 +14815,15 @@ define void @test_ld1lane_build(ptr %ptr0, ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr } define void @test_ld1lane_build_i16(ptr %a, ptr %b, ptr %c, ptr %d, <4 x i16> %e, ptr %p) { -; CHECK-LABEL: test_ld1lane_build_i16: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr h1, [x0] -; CHECK-NEXT: ld1.h { v1 }[1], [x1] -; CHECK-NEXT: ld1.h { v1 }[2], [x2] -; CHECK-NEXT: ld1.h { v1 }[3], [x3] -; CHECK-NEXT: sub.4h v0, v1, v0 -; CHECK-NEXT: str d0, [x4] -; CHECK-NEXT: ret +; SDAG-LABEL: test_ld1lane_build_i16: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr h1, [x0] +; SDAG-NEXT: ld1.h { v1 }[1], [x1] +; SDAG-NEXT: ld1.h { v1 }[2], [x2] +; SDAG-NEXT: ld1.h { v1 }[3], [x3] +; SDAG-NEXT: sub.4h v0, v1, v0 +; SDAG-NEXT: str d0, [x4] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_ld1lane_build_i16: ; CHECK-GISEL: ; %bb.0: @@ -14885,18 +14851,18 @@ define void @test_ld1lane_build_i16(ptr %a, ptr %b, ptr %c, ptr %d, <4 x i16> % } define void @test_ld1lane_build_half(ptr %a, ptr %b, ptr %c, ptr %d, <4 x half> %e, ptr %p) { -; CHECK-LABEL: test_ld1lane_build_half: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr h1, [x0] -; CHECK-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NEXT: ld1.h { v1 }[1], [x1] -; CHECK-NEXT: ld1.h { v1 }[2], [x2] -; CHECK-NEXT: ld1.h { v1 }[3], [x3] -; CHECK-NEXT: fcvtl v1.4s, v1.4h -; CHECK-NEXT: fsub.4s v0, v1, v0 -; CHECK-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NEXT: str d0, [x4] -; CHECK-NEXT: ret +; SDAG-LABEL: test_ld1lane_build_half: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr h1, [x0] +; SDAG-NEXT: fcvtl v0.4s, v0.4h +; SDAG-NEXT: ld1.h { v1 }[1], [x1] +; SDAG-NEXT: ld1.h { v1 }[2], [x2] +; SDAG-NEXT: ld1.h { v1 }[3], [x3] +; SDAG-NEXT: fcvtl v1.4s, v1.4h +; SDAG-NEXT: fsub.4s v0, v1, v0 +; SDAG-NEXT: fcvtn v0.4h, v0.4s +; SDAG-NEXT: str d0, [x4] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_ld1lane_build_half: ; CHECK-GISEL: ; %bb.0: @@ -14927,20 +14893,20 @@ define void @test_ld1lane_build_half(ptr %a, ptr %b, ptr %c, ptr %d, <4 x half> } define void @test_ld1lane_build_i8(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr %f, ptr %g, ptr %h, <8 x i8> %v, ptr %p) { -; CHECK-LABEL: test_ld1lane_build_i8: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr b1, [x0] -; CHECK-NEXT: ldr x8, [sp] -; CHECK-NEXT: ld1.b { v1 }[1], [x1] -; CHECK-NEXT: ld1.b { v1 }[2], [x2] -; CHECK-NEXT: ld1.b { v1 }[3], [x3] -; CHECK-NEXT: ld1.b { v1 }[4], [x4] -; CHECK-NEXT: ld1.b { v1 }[5], [x5] -; CHECK-NEXT: ld1.b { v1 }[6], [x6] -; CHECK-NEXT: ld1.b { v1 }[7], [x7] -; CHECK-NEXT: sub.8b v0, v1, v0 -; CHECK-NEXT: str d0, [x8] -; CHECK-NEXT: ret +; SDAG-LABEL: test_ld1lane_build_i8: +; SDAG: ; %bb.0: +; SDAG-NEXT: ldr b1, [x0] +; SDAG-NEXT: ldr x8, [sp] +; SDAG-NEXT: ld1.b { v1 }[1], [x1] +; SDAG-NEXT: ld1.b { v1 }[2], [x2] +; SDAG-NEXT: ld1.b { v1 }[3], [x3] +; SDAG-NEXT: ld1.b { v1 }[4], [x4] +; SDAG-NEXT: ld1.b { v1 }[5], [x5] +; SDAG-NEXT: ld1.b { v1 }[6], [x6] +; SDAG-NEXT: ld1.b { v1 }[7], [x7] +; SDAG-NEXT: sub.8b v0, v1, v0 +; SDAG-NEXT: str d0, [x8] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_ld1lane_build_i8: ; CHECK-GISEL: ; %bb.0: @@ -14985,14 +14951,14 @@ define void @test_ld1lane_build_i8(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr } define <4 x i32> @test_inc_cycle(<4 x i32> %vec, ptr %in) { -; CHECK-LABEL: test_inc_cycle: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1.s { v0 }[0], [x0] -; CHECK-NEXT: adrp x9, _var@PAGE -; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: add x8, x0, x8, lsl #2 -; CHECK-NEXT: str x8, [x9, _var@PAGEOFF] -; CHECK-NEXT: ret +; SDAG-LABEL: test_inc_cycle: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1.s { v0 }[0], [x0] +; SDAG-NEXT: adrp x9, _var@PAGE +; SDAG-NEXT: fmov x8, d0 +; SDAG-NEXT: add x8, x0, x8, lsl #2 +; SDAG-NEXT: str x8, [x9, _var@PAGEOFF] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_inc_cycle: ; CHECK-GISEL: ; %bb.0: @@ -15019,18 +14985,18 @@ define <4 x i32> @test_inc_cycle(<4 x i32> %vec, ptr %in) { @var = global ptr null define i8 @load_single_extract_variable_index_i8(ptr %A, i32 %idx) { -; CHECK-LABEL: load_single_extract_variable_index_i8: -; CHECK: ; %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: bfxil x8, x1, #0, #4 -; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: ldrb w0, [x8] -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ret +; SDAG-LABEL: load_single_extract_variable_index_i8: +; SDAG: ; %bb.0: +; SDAG-NEXT: sub sp, sp, #16 +; SDAG-NEXT: .cfi_def_cfa_offset 16 +; SDAG-NEXT: mov x8, sp +; SDAG-NEXT: ldr q0, [x0] +; SDAG-NEXT: ; kill: def $w1 killed $w1 def $x1 +; SDAG-NEXT: bfxil x8, x1, #0, #4 +; SDAG-NEXT: str q0, [sp] +; SDAG-NEXT: ldrb w0, [x8] +; SDAG-NEXT: add sp, sp, #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: load_single_extract_variable_index_i8: ; CHECK-GISEL: ; %bb.0: @@ -15052,18 +15018,18 @@ define i8 @load_single_extract_variable_index_i8(ptr %A, i32 %idx) { } define i16 @load_single_extract_variable_index_i16(ptr %A, i32 %idx) { -; CHECK-LABEL: load_single_extract_variable_index_i16: -; CHECK: ; %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: bfi x8, x1, #1, #3 -; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: ldrh w0, [x8] -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ret +; SDAG-LABEL: load_single_extract_variable_index_i16: +; SDAG: ; %bb.0: +; SDAG-NEXT: sub sp, sp, #16 +; SDAG-NEXT: .cfi_def_cfa_offset 16 +; SDAG-NEXT: mov x8, sp +; SDAG-NEXT: ldr q0, [x0] +; SDAG-NEXT: ; kill: def $w1 killed $w1 def $x1 +; SDAG-NEXT: bfi x8, x1, #1, #3 +; SDAG-NEXT: str q0, [sp] +; SDAG-NEXT: ldrh w0, [x8] +; SDAG-NEXT: add sp, sp, #16 +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: load_single_extract_variable_index_i16: ; CHECK-GISEL: ; %bb.0: @@ -15083,12 +15049,12 @@ define i16 @load_single_extract_variable_index_i16(ptr %A, i32 %idx) { } define i32 @load_single_extract_variable_index_i32(ptr %A, i32 %idx) { -; CHECK-LABEL: load_single_extract_variable_index_i32: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: and x8, x1, #0x3 -; CHECK-NEXT: ldr w0, [x0, x8, lsl #2] -; CHECK-NEXT: ret +; SDAG-LABEL: load_single_extract_variable_index_i32: +; SDAG: ; %bb.0: +; SDAG-NEXT: ; kill: def $w1 killed $w1 def $x1 +; SDAG-NEXT: and x8, x1, #0x3 +; SDAG-NEXT: ldr w0, [x0, x8, lsl #2] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: load_single_extract_variable_index_i32: ; CHECK-GISEL: ; %bb.0: @@ -15116,15 +15082,6 @@ define i32 @load_single_extract_variable_index_v3i32_small_align(ptr %A, i32 %id ; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: ldr w0, [x0, x8, lsl #2] ; CHECK-NEXT: ret -; -; CHECK-GISEL-LABEL: load_single_extract_variable_index_v3i32_small_align: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: mov w9, w1 -; CHECK-GISEL-NEXT: mov w8, #2 ; =0x2 -; CHECK-GISEL-NEXT: cmp x9, #2 -; CHECK-GISEL-NEXT: csel x8, x9, x8, lo -; CHECK-GISEL-NEXT: ldr w0, [x0, x8, lsl #2] -; CHECK-GISEL-NEXT: ret %lv = load <3 x i32>, ptr %A, align 2 %e = extractelement <3 x i32> %lv, i32 %idx ret i32 %e @@ -15139,15 +15096,6 @@ define i32 @load_single_extract_variable_index_v3i32_default_align(ptr %A, i32 % ; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: ldr w0, [x0, x8, lsl #2] ; CHECK-NEXT: ret -; -; CHECK-GISEL-LABEL: load_single_extract_variable_index_v3i32_default_align: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: mov w9, w1 -; CHECK-GISEL-NEXT: mov w8, #2 ; =0x2 -; CHECK-GISEL-NEXT: cmp x9, #2 -; CHECK-GISEL-NEXT: csel x8, x9, x8, lo -; CHECK-GISEL-NEXT: ldr w0, [x0, x8, lsl #2] -; CHECK-GISEL-NEXT: ret %lv = load <3 x i32>, ptr %A %e = extractelement <3 x i32> %lv, i32 %idx ret i32 %e @@ -15158,22 +15106,17 @@ define i32 @load_single_extract_valid_const_index_v3i32(ptr %A, i32 %idx) { ; CHECK: ; %bb.0: ; CHECK-NEXT: ldr w0, [x0, #8] ; CHECK-NEXT: ret -; -; CHECK-GISEL-LABEL: load_single_extract_valid_const_index_v3i32: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr w0, [x0, #8] -; CHECK-GISEL-NEXT: ret %lv = load <3 x i32>, ptr %A %e = extractelement <3 x i32> %lv, i32 2 ret i32 %e } define i32 @load_single_extract_variable_index_masked_i32(ptr %A, i32 %idx) { -; CHECK-LABEL: load_single_extract_variable_index_masked_i32: -; CHECK: ; %bb.0: -; CHECK-NEXT: and w8, w1, #0x3 -; CHECK-NEXT: ldr w0, [x0, w8, uxtw #2] -; CHECK-NEXT: ret +; SDAG-LABEL: load_single_extract_variable_index_masked_i32: +; SDAG: ; %bb.0: +; SDAG-NEXT: and w8, w1, #0x3 +; SDAG-NEXT: ldr w0, [x0, w8, uxtw #2] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: load_single_extract_variable_index_masked_i32: ; CHECK-GISEL: ; %bb.0: @@ -15193,11 +15136,11 @@ define i32 @load_single_extract_variable_index_masked_i32(ptr %A, i32 %idx) { } define i32 @load_single_extract_variable_index_masked2_i32(ptr %A, i32 %idx) { -; CHECK-LABEL: load_single_extract_variable_index_masked2_i32: -; CHECK: ; %bb.0: -; CHECK-NEXT: and w8, w1, #0x1 -; CHECK-NEXT: ldr w0, [x0, w8, uxtw #2] -; CHECK-NEXT: ret +; SDAG-LABEL: load_single_extract_variable_index_masked2_i32: +; SDAG: ; %bb.0: +; SDAG-NEXT: and w8, w1, #0x1 +; SDAG-NEXT: ldr w0, [x0, w8, uxtw #2] +; SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: load_single_extract_variable_index_masked2_i32: ; CHECK-GISEL: ; %bb.0: From 25d93f3f6843d0e2b8b6c1920a12b59d9dc6bf60 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Fri, 13 Oct 2023 16:51:39 -0700 Subject: [PATCH 113/720] NFC: Precommit GISel checks for arm64-indexed-memory.ll --- .../CodeGen/AArch64/arm64-indexed-memory.ll | 706 ++++++++++++++---- 1 file changed, 564 insertions(+), 142 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll index d1747e7ca1315..bb18d6d4866ca 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll @@ -1,12 +1,25 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm64-apple-ios -aarch64-redzone | FileCheck %s --check-prefixes=CHECK,CHECK64 +; RUN: llc < %s -mtriple=arm64-apple-ios -aarch64-redzone -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=CHECK,GISEL ; RUN: llc < %s -mtriple=arm64_32-apple-ios -aarch64-redzone | FileCheck %s --check-prefixes=CHECK,CHECK32 define ptr @store64(ptr %ptr, i64 %index, i64 %spacing) { -; CHECK-LABEL: store64: -; CHECK: ; %bb.0: -; CHECK-NEXT: str x2, [x0], #8 -; CHECK-NEXT: ret +; CHECK64-LABEL: store64: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: str x2, [x0], #8 +; CHECK64-NEXT: ret +; +; GISEL-LABEL: store64: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #8 +; GISEL-NEXT: str x2, [x8] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: store64: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: str x2, [x0], #8 +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i64, ptr %ptr, i64 1 store i64 %spacing, ptr %ptr, align 4 ret ptr %incdec.ptr @@ -25,20 +38,44 @@ define ptr @store64idxpos256(ptr %ptr, i64 %index, i64 %spacing) { } define ptr @store64idxneg256(ptr %ptr, i64 %index, i64 %spacing) { -; CHECK-LABEL: store64idxneg256: -; CHECK: ; %bb.0: -; CHECK-NEXT: str x2, [x0], #-256 -; CHECK-NEXT: ret +; CHECK64-LABEL: store64idxneg256: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: str x2, [x0], #-256 +; CHECK64-NEXT: ret +; +; GISEL-LABEL: store64idxneg256: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: sub x0, x0, #256 +; GISEL-NEXT: str x2, [x8] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: store64idxneg256: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: str x2, [x0], #-256 +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i64, ptr %ptr, i64 -32 store i64 %spacing, ptr %ptr, align 4 ret ptr %incdec.ptr } define ptr @store32(ptr %ptr, i32 %index, i32 %spacing) { -; CHECK-LABEL: store32: -; CHECK: ; %bb.0: -; CHECK-NEXT: str w2, [x0], #4 -; CHECK-NEXT: ret +; CHECK64-LABEL: store32: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: str w2, [x0], #4 +; CHECK64-NEXT: ret +; +; GISEL-LABEL: store32: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #4 +; GISEL-NEXT: str w2, [x8] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: store32: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: str w2, [x0], #4 +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i32, ptr %ptr, i64 1 store i32 %spacing, ptr %ptr, align 4 ret ptr %incdec.ptr @@ -57,20 +94,44 @@ define ptr @store32idxpos256(ptr %ptr, i32 %index, i32 %spacing) { } define ptr @store32idxneg256(ptr %ptr, i32 %index, i32 %spacing) { -; CHECK-LABEL: store32idxneg256: -; CHECK: ; %bb.0: -; CHECK-NEXT: str w2, [x0], #-256 -; CHECK-NEXT: ret +; CHECK64-LABEL: store32idxneg256: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: str w2, [x0], #-256 +; CHECK64-NEXT: ret +; +; GISEL-LABEL: store32idxneg256: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: sub x0, x0, #256 +; GISEL-NEXT: str w2, [x8] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: store32idxneg256: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: str w2, [x0], #-256 +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i32, ptr %ptr, i64 -64 store i32 %spacing, ptr %ptr, align 4 ret ptr %incdec.ptr } define ptr @store16(ptr %ptr, i16 %index, i16 %spacing) { -; CHECK-LABEL: store16: -; CHECK: ; %bb.0: -; CHECK-NEXT: strh w2, [x0], #2 -; CHECK-NEXT: ret +; CHECK64-LABEL: store16: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: strh w2, [x0], #2 +; CHECK64-NEXT: ret +; +; GISEL-LABEL: store16: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #2 +; GISEL-NEXT: strh w2, [x8] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: store16: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: strh w2, [x0], #2 +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i16, ptr %ptr, i64 1 store i16 %spacing, ptr %ptr, align 4 ret ptr %incdec.ptr @@ -89,20 +150,44 @@ define ptr @store16idxpos256(ptr %ptr, i16 %index, i16 %spacing) { } define ptr @store16idxneg256(ptr %ptr, i16 %index, i16 %spacing) { -; CHECK-LABEL: store16idxneg256: -; CHECK: ; %bb.0: -; CHECK-NEXT: strh w2, [x0], #-256 -; CHECK-NEXT: ret +; CHECK64-LABEL: store16idxneg256: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: strh w2, [x0], #-256 +; CHECK64-NEXT: ret +; +; GISEL-LABEL: store16idxneg256: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: sub x0, x0, #256 +; GISEL-NEXT: strh w2, [x8] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: store16idxneg256: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: strh w2, [x0], #-256 +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i16, ptr %ptr, i64 -128 store i16 %spacing, ptr %ptr, align 4 ret ptr %incdec.ptr } define ptr @store8(ptr %ptr, i8 %index, i8 %spacing) { -; CHECK-LABEL: store8: -; CHECK: ; %bb.0: -; CHECK-NEXT: strb w2, [x0], #1 -; CHECK-NEXT: ret +; CHECK64-LABEL: store8: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: strb w2, [x0], #1 +; CHECK64-NEXT: ret +; +; GISEL-LABEL: store8: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #1 +; GISEL-NEXT: strb w2, [x8] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: store8: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: strb w2, [x0], #1 +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i8, ptr %ptr, i64 1 store i8 %spacing, ptr %ptr, align 4 ret ptr %incdec.ptr @@ -121,20 +206,44 @@ define ptr @store8idxpos256(ptr %ptr, i8 %index, i8 %spacing) { } define ptr @store8idxneg256(ptr %ptr, i8 %index, i8 %spacing) { -; CHECK-LABEL: store8idxneg256: -; CHECK: ; %bb.0: -; CHECK-NEXT: strb w2, [x0], #-256 -; CHECK-NEXT: ret +; CHECK64-LABEL: store8idxneg256: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: strb w2, [x0], #-256 +; CHECK64-NEXT: ret +; +; GISEL-LABEL: store8idxneg256: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: sub x0, x0, #256 +; GISEL-NEXT: strb w2, [x8] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: store8idxneg256: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: strb w2, [x0], #-256 +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i8, ptr %ptr, i64 -256 store i8 %spacing, ptr %ptr, align 4 ret ptr %incdec.ptr } define ptr @truncst64to32(ptr %ptr, i32 %index, i64 %spacing) { -; CHECK-LABEL: truncst64to32: -; CHECK: ; %bb.0: -; CHECK-NEXT: str w2, [x0], #4 -; CHECK-NEXT: ret +; CHECK64-LABEL: truncst64to32: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: str w2, [x0], #4 +; CHECK64-NEXT: ret +; +; GISEL-LABEL: truncst64to32: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #4 +; GISEL-NEXT: str w2, [x8] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: truncst64to32: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: str w2, [x0], #4 +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i32, ptr %ptr, i64 1 %trunc = trunc i64 %spacing to i32 store i32 %trunc, ptr %ptr, align 4 @@ -142,10 +251,22 @@ define ptr @truncst64to32(ptr %ptr, i32 %index, i64 %spacing) { } define ptr @truncst64to16(ptr %ptr, i16 %index, i64 %spacing) { -; CHECK-LABEL: truncst64to16: -; CHECK: ; %bb.0: -; CHECK-NEXT: strh w2, [x0], #2 -; CHECK-NEXT: ret +; CHECK64-LABEL: truncst64to16: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: strh w2, [x0], #2 +; CHECK64-NEXT: ret +; +; GISEL-LABEL: truncst64to16: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #2 +; GISEL-NEXT: strh w2, [x8] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: truncst64to16: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: strh w2, [x0], #2 +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i16, ptr %ptr, i64 1 %trunc = trunc i64 %spacing to i16 store i16 %trunc, ptr %ptr, align 4 @@ -153,10 +274,22 @@ define ptr @truncst64to16(ptr %ptr, i16 %index, i64 %spacing) { } define ptr @truncst64to8(ptr %ptr, i8 %index, i64 %spacing) { -; CHECK-LABEL: truncst64to8: -; CHECK: ; %bb.0: -; CHECK-NEXT: strb w2, [x0], #1 -; CHECK-NEXT: ret +; CHECK64-LABEL: truncst64to8: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: strb w2, [x0], #1 +; CHECK64-NEXT: ret +; +; GISEL-LABEL: truncst64to8: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #1 +; GISEL-NEXT: strb w2, [x8] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: truncst64to8: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: strb w2, [x0], #1 +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i8, ptr %ptr, i64 1 %trunc = trunc i64 %spacing to i8 store i8 %trunc, ptr %ptr, align 4 @@ -165,30 +298,66 @@ define ptr @truncst64to8(ptr %ptr, i8 %index, i64 %spacing) { define ptr @storef16(ptr %ptr, half %index, half %spacing) nounwind { -; CHECK-LABEL: storef16: -; CHECK: ; %bb.0: -; CHECK-NEXT: str h1, [x0], #2 -; CHECK-NEXT: ret +; CHECK64-LABEL: storef16: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: str h1, [x0], #2 +; CHECK64-NEXT: ret +; +; GISEL-LABEL: storef16: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #2 +; GISEL-NEXT: str h1, [x8] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: storef16: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: str h1, [x0], #2 +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds half, ptr %ptr, i64 1 store half %spacing, ptr %ptr, align 2 ret ptr %incdec.ptr } define ptr @storef32(ptr %ptr, float %index, float %spacing) { -; CHECK-LABEL: storef32: -; CHECK: ; %bb.0: -; CHECK-NEXT: str s1, [x0], #4 -; CHECK-NEXT: ret +; CHECK64-LABEL: storef32: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: str s1, [x0], #4 +; CHECK64-NEXT: ret +; +; GISEL-LABEL: storef32: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #4 +; GISEL-NEXT: str s1, [x8] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: storef32: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: str s1, [x0], #4 +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds float, ptr %ptr, i64 1 store float %spacing, ptr %ptr, align 4 ret ptr %incdec.ptr } define ptr @storef64(ptr %ptr, double %index, double %spacing) { -; CHECK-LABEL: storef64: -; CHECK: ; %bb.0: -; CHECK-NEXT: str d1, [x0], #8 -; CHECK-NEXT: ret +; CHECK64-LABEL: storef64: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: str d1, [x0], #8 +; CHECK64-NEXT: ret +; +; GISEL-LABEL: storef64: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #8 +; GISEL-NEXT: str d1, [x8] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: storef64: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: str d1, [x0], #8 +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds double, ptr %ptr, i64 1 store double %spacing, ptr %ptr, align 4 ret ptr %incdec.ptr @@ -196,40 +365,88 @@ define ptr @storef64(ptr %ptr, double %index, double %spacing) { define ptr @pref64(ptr %ptr, double %spacing) { -; CHECK-LABEL: pref64: -; CHECK: ; %bb.0: -; CHECK-NEXT: str d0, [x0, #32]! -; CHECK-NEXT: ret +; CHECK64-LABEL: pref64: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: str d0, [x0, #32]! +; CHECK64-NEXT: ret +; +; GISEL-LABEL: pref64: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #32 +; GISEL-NEXT: str d0, [x8, #32] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: pref64: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: str d0, [x0, #32]! +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds double, ptr %ptr, i64 4 store double %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr } define ptr @pref32(ptr %ptr, float %spacing) { -; CHECK-LABEL: pref32: -; CHECK: ; %bb.0: -; CHECK-NEXT: str s0, [x0, #12]! -; CHECK-NEXT: ret +; CHECK64-LABEL: pref32: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: str s0, [x0, #12]! +; CHECK64-NEXT: ret +; +; GISEL-LABEL: pref32: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #12 +; GISEL-NEXT: str s0, [x8, #12] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: pref32: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: str s0, [x0, #12]! +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds float, ptr %ptr, i64 3 store float %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr } define ptr @pref16(ptr %ptr, half %spacing) nounwind { -; CHECK-LABEL: pref16: -; CHECK: ; %bb.0: -; CHECK-NEXT: str h0, [x0, #6]! -; CHECK-NEXT: ret +; CHECK64-LABEL: pref16: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: str h0, [x0, #6]! +; CHECK64-NEXT: ret +; +; GISEL-LABEL: pref16: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #6 +; GISEL-NEXT: str h0, [x8, #6] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: pref16: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: str h0, [x0, #6]! +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds half, ptr %ptr, i64 3 store half %spacing, ptr %incdec.ptr, align 2 ret ptr %incdec.ptr } define ptr @pre64(ptr %ptr, i64 %spacing) { -; CHECK-LABEL: pre64: -; CHECK: ; %bb.0: -; CHECK-NEXT: str x1, [x0, #16]! -; CHECK-NEXT: ret +; CHECK64-LABEL: pre64: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: str x1, [x0, #16]! +; CHECK64-NEXT: ret +; +; GISEL-LABEL: pre64: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #16 +; GISEL-NEXT: str x1, [x8, #16] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: pre64: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: str x1, [x0, #16]! +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i64, ptr %ptr, i64 2 store i64 %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr @@ -248,20 +465,44 @@ define ptr @pre64idxpos256(ptr %ptr, i64 %spacing) { } define ptr @pre64idxneg256(ptr %ptr, i64 %spacing) { -; CHECK-LABEL: pre64idxneg256: -; CHECK: ; %bb.0: -; CHECK-NEXT: str x1, [x0, #-256]! -; CHECK-NEXT: ret +; CHECK64-LABEL: pre64idxneg256: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: str x1, [x0, #-256]! +; CHECK64-NEXT: ret +; +; GISEL-LABEL: pre64idxneg256: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: sub x0, x0, #256 +; GISEL-NEXT: stur x1, [x8, #-256] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: pre64idxneg256: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: str x1, [x0, #-256]! +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i64, ptr %ptr, i64 -32 store i64 %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr } define ptr @pre32(ptr %ptr, i32 %spacing) { -; CHECK-LABEL: pre32: -; CHECK: ; %bb.0: -; CHECK-NEXT: str w1, [x0, #8]! -; CHECK-NEXT: ret +; CHECK64-LABEL: pre32: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: str w1, [x0, #8]! +; CHECK64-NEXT: ret +; +; GISEL-LABEL: pre32: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #8 +; GISEL-NEXT: str w1, [x8, #8] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: pre32: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: str w1, [x0, #8]! +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i32, ptr %ptr, i64 2 store i32 %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr @@ -280,20 +521,44 @@ define ptr @pre32idxpos256(ptr %ptr, i32 %spacing) { } define ptr @pre32idxneg256(ptr %ptr, i32 %spacing) { -; CHECK-LABEL: pre32idxneg256: -; CHECK: ; %bb.0: -; CHECK-NEXT: str w1, [x0, #-256]! -; CHECK-NEXT: ret +; CHECK64-LABEL: pre32idxneg256: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: str w1, [x0, #-256]! +; CHECK64-NEXT: ret +; +; GISEL-LABEL: pre32idxneg256: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: sub x0, x0, #256 +; GISEL-NEXT: stur w1, [x8, #-256] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: pre32idxneg256: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: str w1, [x0, #-256]! +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i32, ptr %ptr, i64 -64 store i32 %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr } define ptr @pre16(ptr %ptr, i16 %spacing) { -; CHECK-LABEL: pre16: -; CHECK: ; %bb.0: -; CHECK-NEXT: strh w1, [x0, #4]! -; CHECK-NEXT: ret +; CHECK64-LABEL: pre16: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: strh w1, [x0, #4]! +; CHECK64-NEXT: ret +; +; GISEL-LABEL: pre16: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #4 +; GISEL-NEXT: strh w1, [x8, #4] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: pre16: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: strh w1, [x0, #4]! +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i16, ptr %ptr, i64 2 store i16 %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr @@ -312,20 +577,44 @@ define ptr @pre16idxpos256(ptr %ptr, i16 %spacing) { } define ptr @pre16idxneg256(ptr %ptr, i16 %spacing) { -; CHECK-LABEL: pre16idxneg256: -; CHECK: ; %bb.0: -; CHECK-NEXT: strh w1, [x0, #-256]! -; CHECK-NEXT: ret +; CHECK64-LABEL: pre16idxneg256: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: strh w1, [x0, #-256]! +; CHECK64-NEXT: ret +; +; GISEL-LABEL: pre16idxneg256: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: sub x0, x0, #256 +; GISEL-NEXT: sturh w1, [x8, #-256] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: pre16idxneg256: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: strh w1, [x0, #-256]! +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i16, ptr %ptr, i64 -128 store i16 %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr } define ptr @pre8(ptr %ptr, i8 %spacing) { -; CHECK-LABEL: pre8: -; CHECK: ; %bb.0: -; CHECK-NEXT: strb w1, [x0, #2]! -; CHECK-NEXT: ret +; CHECK64-LABEL: pre8: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: strb w1, [x0, #2]! +; CHECK64-NEXT: ret +; +; GISEL-LABEL: pre8: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #2 +; GISEL-NEXT: strb w1, [x8, #2] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: pre8: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: strb w1, [x0, #2]! +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i8, ptr %ptr, i64 2 store i8 %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr @@ -344,20 +633,44 @@ define ptr @pre8idxpos256(ptr %ptr, i8 %spacing) { } define ptr @pre8idxneg256(ptr %ptr, i8 %spacing) { -; CHECK-LABEL: pre8idxneg256: -; CHECK: ; %bb.0: -; CHECK-NEXT: strb w1, [x0, #-256]! -; CHECK-NEXT: ret +; CHECK64-LABEL: pre8idxneg256: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: strb w1, [x0, #-256]! +; CHECK64-NEXT: ret +; +; GISEL-LABEL: pre8idxneg256: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: sub x0, x0, #256 +; GISEL-NEXT: sturb w1, [x8, #-256] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: pre8idxneg256: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: strb w1, [x0, #-256]! +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i8, ptr %ptr, i64 -256 store i8 %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr } define ptr @pretrunc64to32(ptr %ptr, i64 %spacing) { -; CHECK-LABEL: pretrunc64to32: -; CHECK: ; %bb.0: -; CHECK-NEXT: str w1, [x0, #8]! -; CHECK-NEXT: ret +; CHECK64-LABEL: pretrunc64to32: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: str w1, [x0, #8]! +; CHECK64-NEXT: ret +; +; GISEL-LABEL: pretrunc64to32: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #8 +; GISEL-NEXT: str w1, [x8, #8] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: pretrunc64to32: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: str w1, [x0, #8]! +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i32, ptr %ptr, i64 2 %trunc = trunc i64 %spacing to i32 store i32 %trunc, ptr %incdec.ptr, align 4 @@ -365,10 +678,22 @@ define ptr @pretrunc64to32(ptr %ptr, i64 %spacing) { } define ptr @pretrunc64to16(ptr %ptr, i64 %spacing) { -; CHECK-LABEL: pretrunc64to16: -; CHECK: ; %bb.0: -; CHECK-NEXT: strh w1, [x0, #4]! -; CHECK-NEXT: ret +; CHECK64-LABEL: pretrunc64to16: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: strh w1, [x0, #4]! +; CHECK64-NEXT: ret +; +; GISEL-LABEL: pretrunc64to16: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #4 +; GISEL-NEXT: strh w1, [x8, #4] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: pretrunc64to16: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: strh w1, [x0, #4]! +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i16, ptr %ptr, i64 2 %trunc = trunc i64 %spacing to i16 store i16 %trunc, ptr %incdec.ptr, align 4 @@ -376,10 +701,22 @@ define ptr @pretrunc64to16(ptr %ptr, i64 %spacing) { } define ptr @pretrunc64to8(ptr %ptr, i64 %spacing) { -; CHECK-LABEL: pretrunc64to8: -; CHECK: ; %bb.0: -; CHECK-NEXT: strb w1, [x0, #2]! -; CHECK-NEXT: ret +; CHECK64-LABEL: pretrunc64to8: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: strb w1, [x0, #2]! +; CHECK64-NEXT: ret +; +; GISEL-LABEL: pretrunc64to8: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #2 +; GISEL-NEXT: strb w1, [x8, #2] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: pretrunc64to8: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: strb w1, [x0, #2]! +; CHECK32-NEXT: ret %incdec.ptr = getelementptr inbounds i8, ptr %ptr, i64 2 %trunc = trunc i64 %spacing to i8 store i8 %trunc, ptr %incdec.ptr, align 4 @@ -414,11 +751,24 @@ define ptr @preidxf32(ptr %src, ptr %out) { } define ptr @preidxf16(ptr %src, ptr %out) { -; CHECK-LABEL: preidxf16: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr h0, [x0, #2]! -; CHECK-NEXT: str h0, [x1] -; CHECK-NEXT: ret +; CHECK64-LABEL: preidxf16: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: ldr h0, [x0, #2]! +; CHECK64-NEXT: str h0, [x1] +; CHECK64-NEXT: ret +; +; GISEL-LABEL: preidxf16: +; GISEL: ; %bb.0: +; GISEL-NEXT: ldr h0, [x0, #2] +; GISEL-NEXT: add x0, x0, #2 +; GISEL-NEXT: str h0, [x1] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: preidxf16: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: ldr h0, [x0, #2]! +; CHECK32-NEXT: str h0, [x1] +; CHECK32-NEXT: ret %ptr = getelementptr inbounds half, ptr %src, i64 1 %tmp = load half, ptr %ptr, align 2 store half %tmp, ptr %out, align 2 @@ -502,11 +852,24 @@ define ptr @preidx8zext64(ptr %src, ptr %out) { } define ptr @preidx32sext64(ptr %src, ptr %out) { -; CHECK-LABEL: preidx32sext64: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldrsw x8, [x0, #4]! -; CHECK-NEXT: str x8, [x1] -; CHECK-NEXT: ret +; CHECK64-LABEL: preidx32sext64: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: ldrsw x8, [x0, #4]! +; CHECK64-NEXT: str x8, [x1] +; CHECK64-NEXT: ret +; +; GISEL-LABEL: preidx32sext64: +; GISEL: ; %bb.0: +; GISEL-NEXT: ldrsw x8, [x0, #4] +; GISEL-NEXT: add x0, x0, #4 +; GISEL-NEXT: str x8, [x1] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: preidx32sext64: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: ldrsw x8, [x0, #4]! +; CHECK32-NEXT: str x8, [x1] +; CHECK32-NEXT: ret %ptr = getelementptr inbounds i32, ptr %src, i64 1 %tmp = load i32, ptr %ptr, align 4 %ext = sext i32 %tmp to i64 @@ -515,11 +878,24 @@ define ptr @preidx32sext64(ptr %src, ptr %out) { } define ptr @preidx16sext32(ptr %src, ptr %out) { -; CHECK-LABEL: preidx16sext32: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldrsh w8, [x0, #2]! -; CHECK-NEXT: str w8, [x1] -; CHECK-NEXT: ret +; CHECK64-LABEL: preidx16sext32: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: ldrsh w8, [x0, #2]! +; CHECK64-NEXT: str w8, [x1] +; CHECK64-NEXT: ret +; +; GISEL-LABEL: preidx16sext32: +; GISEL: ; %bb.0: +; GISEL-NEXT: ldrsh w8, [x0, #2] +; GISEL-NEXT: add x0, x0, #2 +; GISEL-NEXT: str w8, [x1] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: preidx16sext32: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: ldrsh w8, [x0, #2]! +; CHECK32-NEXT: str w8, [x1] +; CHECK32-NEXT: ret %ptr = getelementptr inbounds i16, ptr %src, i64 1 %tmp = load i16, ptr %ptr, align 4 %ext = sext i16 %tmp to i32 @@ -528,11 +904,24 @@ define ptr @preidx16sext32(ptr %src, ptr %out) { } define ptr @preidx16sext64(ptr %src, ptr %out) { -; CHECK-LABEL: preidx16sext64: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldrsh x8, [x0, #2]! -; CHECK-NEXT: str x8, [x1] -; CHECK-NEXT: ret +; CHECK64-LABEL: preidx16sext64: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: ldrsh x8, [x0, #2]! +; CHECK64-NEXT: str x8, [x1] +; CHECK64-NEXT: ret +; +; GISEL-LABEL: preidx16sext64: +; GISEL: ; %bb.0: +; GISEL-NEXT: ldrsh x8, [x0, #2] +; GISEL-NEXT: add x0, x0, #2 +; GISEL-NEXT: str x8, [x1] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: preidx16sext64: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: ldrsh x8, [x0, #2]! +; CHECK32-NEXT: str x8, [x1] +; CHECK32-NEXT: ret %ptr = getelementptr inbounds i16, ptr %src, i64 1 %tmp = load i16, ptr %ptr, align 4 %ext = sext i16 %tmp to i64 @@ -541,11 +930,24 @@ define ptr @preidx16sext64(ptr %src, ptr %out) { } define ptr @preidx8sext32(ptr %src, ptr %out) { -; CHECK-LABEL: preidx8sext32: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldrsb w8, [x0, #1]! -; CHECK-NEXT: str w8, [x1] -; CHECK-NEXT: ret +; CHECK64-LABEL: preidx8sext32: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: ldrsb w8, [x0, #1]! +; CHECK64-NEXT: str w8, [x1] +; CHECK64-NEXT: ret +; +; GISEL-LABEL: preidx8sext32: +; GISEL: ; %bb.0: +; GISEL-NEXT: ldrsb w8, [x0, #1] +; GISEL-NEXT: add x0, x0, #1 +; GISEL-NEXT: str w8, [x1] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: preidx8sext32: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: ldrsb w8, [x0, #1]! +; CHECK32-NEXT: str w8, [x1] +; CHECK32-NEXT: ret %ptr = getelementptr inbounds i8, ptr %src, i64 1 %tmp = load i8, ptr %ptr, align 4 %ext = sext i8 %tmp to i32 @@ -554,11 +956,24 @@ define ptr @preidx8sext32(ptr %src, ptr %out) { } define ptr @preidx8sext64(ptr %src, ptr %out) { -; CHECK-LABEL: preidx8sext64: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldrsb x8, [x0, #1]! -; CHECK-NEXT: str x8, [x1] -; CHECK-NEXT: ret +; CHECK64-LABEL: preidx8sext64: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: ldrsb x8, [x0, #1]! +; CHECK64-NEXT: str x8, [x1] +; CHECK64-NEXT: ret +; +; GISEL-LABEL: preidx8sext64: +; GISEL: ; %bb.0: +; GISEL-NEXT: ldrsb x8, [x0, #1] +; GISEL-NEXT: add x0, x0, #1 +; GISEL-NEXT: str x8, [x1] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: preidx8sext64: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: ldrsb x8, [x0, #1]! +; CHECK32-NEXT: str x8, [x1] +; CHECK32-NEXT: ret %ptr = getelementptr inbounds i8, ptr %src, i64 1 %tmp = load i8, ptr %ptr, align 4 %ext = sext i8 %tmp to i64 @@ -576,6 +991,13 @@ define ptr @postidx_clobber(ptr %addr) nounwind noinline ssp { ; CHECK64-NEXT: mov x0, x8 ; CHECK64-NEXT: ret ; +; GISEL-LABEL: postidx_clobber: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov x8, x0 +; GISEL-NEXT: add x0, x0, #8 +; GISEL-NEXT: str x8, [x8] +; GISEL-NEXT: ret +; ; CHECK32-LABEL: postidx_clobber: ; CHECK32: ; %bb.0: ; CHECK32-NEXT: mov x8, x0 From f2b79ed9c6c858426b15a0374103ab901b5b2ef3 Mon Sep 17 00:00:00 2001 From: Anton Rydahl <44206479+AntonRydahl@users.noreply.github.com> Date: Fri, 13 Oct 2023 17:08:15 -0700 Subject: [PATCH 114/720] [libcxx] Refactoring SIMD function names in PSTL CPU backend (#69029) This PR addresses a smaller detail discussed in the code review for https://github.com/llvm/llvm-project/pull/66968. Currently, some functions in the `libc++` PSTL CPU backend have been appended with a digit to indicate the number of input iterator arguments. However, there is no need to change the name for each version as overloading can be used instead. This PR will make the naming more consistent in the the CPU and the proposed OpenMP backend. --- .../__algorithm/pstl_backends/cpu_backends/for_each.h | 4 ++-- .../__algorithm/pstl_backends/cpu_backends/transform.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/for_each.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/for_each.h index 6cfef932aa48d..81fd4526b8dbf 100644 --- a/libcxx/include/__algorithm/pstl_backends/cpu_backends/for_each.h +++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/for_each.h @@ -26,7 +26,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -_LIBCPP_HIDE_FROM_ABI _Iterator __simd_walk_1(_Iterator __first, _DifferenceType __n, _Function __f) noexcept { +_LIBCPP_HIDE_FROM_ABI _Iterator __simd_walk(_Iterator __first, _DifferenceType __n, _Function __f) noexcept { _PSTL_PRAGMA_SIMD for (_DifferenceType __i = 0; __i < __n; ++__i) __f(__first[__i]); @@ -47,7 +47,7 @@ __pstl_for_each(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __ }); } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> && __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { - std::__simd_walk_1(__first, __last - __first, __func); + std::__simd_walk(__first, __last - __first, __func); return __empty{}; } else { std::for_each(__first, __last, __func); diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform.h index 2c7647d61a2b0..fdf1a2e78dad9 100644 --- a/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform.h +++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform.h @@ -32,7 +32,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template _LIBCPP_HIDE_FROM_ABI _Iterator2 -__simd_walk_2(_Iterator1 __first1, _DifferenceType __n, _Iterator2 __first2, _Function __f) noexcept { +__simd_walk(_Iterator1 __first1, _DifferenceType __n, _Iterator2 __first2, _Function __f) noexcept { _PSTL_PRAGMA_SIMD for (_DifferenceType __i = 0; __i < __n; ++__i) __f(__first1[__i], __first2[__i]); @@ -60,7 +60,7 @@ _LIBCPP_HIDE_FROM_ABI optional<_ForwardOutIterator> __pstl_transform( } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> && __has_random_access_iterator_category_or_concept<_ForwardIterator>::value && __has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) { - return std::__simd_walk_2( + return std::__simd_walk( __first, __last - __first, __result, @@ -73,7 +73,7 @@ _LIBCPP_HIDE_FROM_ABI optional<_ForwardOutIterator> __pstl_transform( } template -_LIBCPP_HIDE_FROM_ABI _Iterator3 __simd_walk_3( +_LIBCPP_HIDE_FROM_ABI _Iterator3 __simd_walk( _Iterator1 __first1, _DifferenceType __n, _Iterator2 __first2, _Iterator3 __first3, _Function __f) noexcept { _PSTL_PRAGMA_SIMD for (_DifferenceType __i = 0; __i < __n; ++__i) @@ -116,7 +116,7 @@ _LIBCPP_HIDE_FROM_ABI optional<_ForwardOutIterator> __pstl_transform( __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value && __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value && __has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) { - return std::__simd_walk_3( + return std::__simd_walk( __first1, __last1 - __first1, __first2, From 70fedaf89b35c38f4f32fb50d1321e6d473801ab Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 13 Oct 2023 17:27:44 -0700 Subject: [PATCH 115/720] [libc++][NFC] Fix slightly incorrect comment in PSTL documentation --- libcxx/include/__algorithm/pstl_backend.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/libcxx/include/__algorithm/pstl_backend.h b/libcxx/include/__algorithm/pstl_backend.h index 94644e5c47b39..0bf2cca5eef48 100644 --- a/libcxx/include/__algorithm/pstl_backend.h +++ b/libcxx/include/__algorithm/pstl_backend.h @@ -41,13 +41,13 @@ A PSTL parallel backend is a tag type to which the following functions are assoc class _ForwardIterator2, class _ForwardOutIterator, class _Comp> - _ForwardOutIterator __pstl_merge(_Backend, - _ForwardIterator1 __first1, - _ForwardIterator1 __last1, - _ForwardIterator2 __first2, - _ForwardIterator2 __last2, - _ForwardOutIterator __result, - _Comp __comp); + optional<_ForwardOutIterator> __pstl_merge(_Backend, + _ForwardIterator1 __first1, + _ForwardIterator1 __last1, + _ForwardIterator2 __first2, + _ForwardIterator2 __last2, + _ForwardOutIterator __result, + _Comp __comp); template optional<_OutIterator> From f3cfd3812b4a721fcf1be0e242a31d547c908459 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 13 Oct 2023 18:22:42 -0700 Subject: [PATCH 116/720] [AST] Stop including llvm/ADT/StringMap.h (NFC) The last use of StringMap was removed by: commit 20157410862d376c624cc24bffd9730290a16142 Author: Vince Bridgers Date: Thu Jul 16 12:55:32 2020 -0500 --- clang/unittests/AST/ASTImporterTest.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp index 393ed44de3f18..325c585e0e116 100644 --- a/clang/unittests/AST/ASTImporterTest.cpp +++ b/clang/unittests/AST/ASTImporterTest.cpp @@ -12,7 +12,6 @@ #include "clang/AST/RecordLayout.h" #include "clang/ASTMatchers/ASTMatchers.h" -#include "llvm/ADT/StringMap.h" #include "llvm/Support/SmallVectorMemoryBuffer.h" #include "clang/AST/DeclContextInternals.h" @@ -26,7 +25,6 @@ namespace ast_matchers { using internal::Matcher; using internal::BindableMatcher; -using llvm::StringMap; static const RecordDecl *getRecordDeclOfFriend(FriendDecl *FD) { QualType Ty = FD->getFriendType()->getType().getCanonicalType(); From eab5d337f0f62828a991ad7ed7e4257735c48e11 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 13 Oct 2023 18:22:44 -0700 Subject: [PATCH 117/720] [BOLT] Use llvm::erase_if (NFC) --- bolt/lib/Core/HashUtilities.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/bolt/lib/Core/HashUtilities.cpp b/bolt/lib/Core/HashUtilities.cpp index 88f01e4f936d3..6c7570dcc44e8 100644 --- a/bolt/lib/Core/HashUtilities.cpp +++ b/bolt/lib/Core/HashUtilities.cpp @@ -155,10 +155,7 @@ std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) { } std::string Mnemonic = BC.InstPrinter->getMnemonic(&Inst).first; - Mnemonic.erase( - std::remove_if(Mnemonic.begin(), Mnemonic.end(), - [](unsigned char ch) { return std::isspace(ch); }), - Mnemonic.end()); + llvm::erase_if(Mnemonic, [](unsigned char ch) { return std::isspace(ch); }); Opcodes.insert(Mnemonic); } From 18dc8dcd768fd99f29d21d3fa1603d299c686da1 Mon Sep 17 00:00:00 2001 From: Kai Luo Date: Sat, 14 Oct 2023 10:57:03 +0800 Subject: [PATCH 118/720] [PowerPC][JITLink] Support R_PPC64_GOT_TLSGD_PCREL34 (#68660) `R_PPC64_GOT_TLSGD_PCREL34` is generated for pwr10+. --- .../Linux/ppc64/Inputs/trivial-tls-main.cpp | 30 +++++++++++++++++++ .../Linux/ppc64/Inputs/trivial-tls-pwr10.cpp | 5 ++++ .../Linux/ppc64/trivial-tls-pwr10.test | 9 ++++++ .../llvm/ExecutionEngine/JITLink/ppc64.h | 5 ++++ .../lib/ExecutionEngine/JITLink/ELF_ppc64.cpp | 21 ++++++++++--- llvm/lib/ExecutionEngine/JITLink/ppc64.cpp | 2 ++ .../JITLink/ppc64/ELF_ppc64_relocations.s | 19 ++++++++++++ 7 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 compiler-rt/test/orc/TestCases/Linux/ppc64/Inputs/trivial-tls-main.cpp create mode 100644 compiler-rt/test/orc/TestCases/Linux/ppc64/Inputs/trivial-tls-pwr10.cpp create mode 100644 compiler-rt/test/orc/TestCases/Linux/ppc64/trivial-tls-pwr10.test diff --git a/compiler-rt/test/orc/TestCases/Linux/ppc64/Inputs/trivial-tls-main.cpp b/compiler-rt/test/orc/TestCases/Linux/ppc64/Inputs/trivial-tls-main.cpp new file mode 100644 index 0000000000000..d6757fdd4154c --- /dev/null +++ b/compiler-rt/test/orc/TestCases/Linux/ppc64/Inputs/trivial-tls-main.cpp @@ -0,0 +1,30 @@ +#include +#include +#include + +thread_local int x = 0; +thread_local int y = 1; +thread_local int z = -1; + +extern int TestPOWER10(); + +int Test() { return x + y + z; } + +static bool CPUModelIsPOWER10() { + std::string line; + std::ifstream cpuinfo("/proc/cpuinfo", std::ios::in); + if (!cpuinfo.is_open()) + return false; + while (std::getline(cpuinfo, line)) { + if (line.find("cpu") != std::string::npos && + line.find("POWER10") != std::string::npos) + return true; + } + return false; +} + +int main() { + if (CPUModelIsPOWER10()) + return TestPOWER10(); + return Test(); +} diff --git a/compiler-rt/test/orc/TestCases/Linux/ppc64/Inputs/trivial-tls-pwr10.cpp b/compiler-rt/test/orc/TestCases/Linux/ppc64/Inputs/trivial-tls-pwr10.cpp new file mode 100644 index 0000000000000..a6fb3088af629 --- /dev/null +++ b/compiler-rt/test/orc/TestCases/Linux/ppc64/Inputs/trivial-tls-pwr10.cpp @@ -0,0 +1,5 @@ +extern thread_local int x; +extern thread_local int y; +extern thread_local int z; + +int __attribute__((target("arch=pwr10"))) TestPOWER10() { return x + y + z; } diff --git a/compiler-rt/test/orc/TestCases/Linux/ppc64/trivial-tls-pwr10.test b/compiler-rt/test/orc/TestCases/Linux/ppc64/trivial-tls-pwr10.test new file mode 100644 index 0000000000000..93561b1645c33 --- /dev/null +++ b/compiler-rt/test/orc/TestCases/Linux/ppc64/trivial-tls-pwr10.test @@ -0,0 +1,9 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: %clangxx -fPIC -c -o %t/main.o %S/Inputs/trivial-tls-main.cpp +// RUN: %clangxx -fPIC -c -o %t/pwr10.o %S/Inputs/trivial-tls-pwr10.cpp +// RUN: %llvm_jitlink %t/main.o %t/pwr10.o +// FIXME: We seperate pwr10 code from main object file due to currrent +// implementation only supports one PLT stub for the same symbol. +// For example, `bl __tls_get_addr` in one object file has only one PLT stub, +// however we need another different PLT stub for `bl __tls_get_addr@notoc` +// whose target symbol is also `__tls_get_addr`. diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ppc64.h b/llvm/include/llvm/ExecutionEngine/JITLink/ppc64.h index ff932f6022bdc..b723914455986 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/ppc64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ppc64.h @@ -61,6 +61,7 @@ enum EdgeKind_ppc64 : Edge::Kind { RequestCallNoTOC, RequestTLSDescInGOTAndTransformToTOCDelta16HA, RequestTLSDescInGOTAndTransformToTOCDelta16LO, + RequestTLSDescInGOTAndTransformToDelta34, }; enum PLTCallStubKind { @@ -202,6 +203,10 @@ class PLTTableManager : public TableManager> { static StringRef getSectionName() { return "$__STUBS"; } + // FIXME: One external symbol can only have one PLT stub in a object file. + // This is a limitation when we need different PLT stubs for the same symbol. + // For example, we need two different PLT stubs for `bl __tls_get_addr` and + // `bl __tls_get_addr@notoc`. bool visitEdge(LinkGraph &G, Block *B, Edge &E) { bool isExternal = E.getTarget().isExternal(); Edge::Kind K = E.getKind(); diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp index bf1d22ac9a430..25b1dd9d3d125 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp @@ -43,17 +43,22 @@ class TLSInfoTableManager_ELF_ppc64 bool visitEdge(LinkGraph &G, Block *B, Edge &E) { Edge::Kind K = E.getKind(); - if (K == ppc64::RequestTLSDescInGOTAndTransformToTOCDelta16HA) { + switch (K) { + case ppc64::RequestTLSDescInGOTAndTransformToTOCDelta16HA: E.setKind(ppc64::TOCDelta16HA); E.setTarget(this->getEntryForTarget(G, E.getTarget())); return true; - } - if (K == ppc64::RequestTLSDescInGOTAndTransformToTOCDelta16LO) { + case ppc64::RequestTLSDescInGOTAndTransformToTOCDelta16LO: E.setKind(ppc64::TOCDelta16LO); E.setTarget(this->getEntryForTarget(G, E.getTarget())); return true; + case ppc64::RequestTLSDescInGOTAndTransformToDelta34: + E.setKind(ppc64::Delta34); + E.setTarget(this->getEntryForTarget(G, E.getTarget())); + return true; + default: + return false; } - return false; } Symbol &createEntry(LinkGraph &G, Symbol &Target) { @@ -234,10 +239,15 @@ class ELFLinkGraphBuilder_ppc64 if (ELFReloc == ELF::R_PPC64_TLSLD) return make_error("Local-dynamic TLS model is not supported", inconvertibleErrorCode()); + if (ELFReloc == ELF::R_PPC64_PCREL_OPT) // TODO: Support PCREL optimization, now ignore it. return Error::success(); + if (ELFReloc == ELF::R_PPC64_TPREL34) + return make_error("Local-exec TLS model is not supported", + inconvertibleErrorCode()); + auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec); if (!ObjSymbol) return ObjSymbol.takeError(); @@ -372,6 +382,9 @@ class ELFLinkGraphBuilder_ppc64 case ELF::R_PPC64_GOT_TLSGD16_LO: Kind = ppc64::RequestTLSDescInGOTAndTransformToTOCDelta16LO; break; + case ELF::R_PPC64_GOT_TLSGD_PCREL34: + Kind = ppc64::RequestTLSDescInGOTAndTransformToDelta34; + break; } Edge GE(Kind, Offset, *GraphSymbol, Addend); diff --git a/llvm/lib/ExecutionEngine/JITLink/ppc64.cpp b/llvm/lib/ExecutionEngine/JITLink/ppc64.cpp index ac4a62a503919..27484aaf20590 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ppc64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ppc64.cpp @@ -134,6 +134,8 @@ const char *getEdgeKindName(Edge::Kind K) { return "RequestTLSDescInGOTAndTransformToTOCDelta16HA"; case RequestTLSDescInGOTAndTransformToTOCDelta16LO: return "RequestTLSDescInGOTAndTransformToTOCDelta16LO"; + case RequestTLSDescInGOTAndTransformToDelta34: + return "RequestTLSDescInGOTAndTransformToDelta34"; default: return getGenericEdgeKindName(static_cast(K)); } diff --git a/llvm/test/ExecutionEngine/JITLink/ppc64/ELF_ppc64_relocations.s b/llvm/test/ExecutionEngine/JITLink/ppc64/ELF_ppc64_relocations.s index bcee29d1d34f6..8f28a8662cbd6 100644 --- a/llvm/test/ExecutionEngine/JITLink/ppc64/ELF_ppc64_relocations.s +++ b/llvm/test/ExecutionEngine/JITLink/ppc64/ELF_ppc64_relocations.s @@ -1,3 +1,4 @@ +# REQUIRES: system-linux # RUN: rm -rf %t && mkdir -p %t # RUN: llvm-mc --triple=powerpc64le-unknown-linux-gnu --filetype=obj -o \ # RUN: %t/elf_reloc.o --defsym LE=1 %s @@ -9,6 +10,7 @@ # RUN: --abs external_addr16_data=0x6000 \ # RUN: --abs external_addr32_data=0x36668840 \ # RUN: --abs pcrel_external_var=0x36668860 \ +# RUN: --abs pcrel_external_tls=0x36668880 \ # RUN: --check %s %t/elf_reloc.o # RUN: llvm-mc --triple=powerpc64-unknown-linux-gnu --filetype=obj -o \ # RUN: %t/elf_reloc.o %s @@ -20,6 +22,7 @@ # RUN: --abs external_addr16_data=0x6000 \ # RUN: --abs external_addr32_data=0x36668840 \ # RUN: --abs pcrel_external_var=0x36668860 \ +# RUN: --abs pcrel_external_tls=0x36668880 \ # RUN: --check %s %t/elf_reloc.o # jitlink-check: section_addr(elf_reloc.o, $__GOT) + 0x8000 = __TOC__ @@ -255,6 +258,22 @@ reloc_got_pcrel34: blr .size reloc_got_pcrel34,.-reloc_got_pcrel34 + .global reloc_tlsgd_pcrel34 + .p2align 4 + .type reloc_tlsgd_pcrel34,@function +reloc_tlsgd_pcrel34: + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + paddi 3, 0, pcrel_external_tls@got@tlsgd@pcrel, 1 + bl __tls_get_addr@notoc(a@tlsgd) + lwa 3, 0(3) + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + blr + .size reloc_tlsgd_pcrel34,.-reloc_tlsgd_pcrel34 + .type .L.str,@object .section .rodata.str1.1,"aMS",@progbits,1 .L.str: From 3743c53dd19fd2f935dfd4dec17ca1b1f7911ddb Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 13 Oct 2023 20:09:31 -0700 Subject: [PATCH 119/720] [clang] Remove unused using decls (NFC) Identified with misc-unused-using-decls. --- clang/unittests/AST/ASTImporterTest.cpp | 1 - .../Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp | 1 - clang/unittests/Format/FormatTestRawStrings.cpp | 3 --- clang/unittests/Tooling/RangeSelectorTest.cpp | 1 - 4 files changed, 6 deletions(-) diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp index 325c585e0e116..1dc314eafc4ef 100644 --- a/clang/unittests/AST/ASTImporterTest.cpp +++ b/clang/unittests/AST/ASTImporterTest.cpp @@ -24,7 +24,6 @@ namespace clang { namespace ast_matchers { using internal::Matcher; -using internal::BindableMatcher; static const RecordDecl *getRecordDeclOfFriend(FriendDecl *FD) { QualType Ty = FD->getFriendType()->getType().getCanonicalType(); diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp index 2425bb8711bdb..edd87b798198b 100644 --- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp @@ -47,7 +47,6 @@ using namespace test; using namespace ast_matchers; using llvm::IsStringMapEntry; using ::testing::DescribeMatcher; -using ::testing::ElementsAre; using ::testing::IsEmpty; using ::testing::NotNull; using ::testing::Test; diff --git a/clang/unittests/Format/FormatTestRawStrings.cpp b/clang/unittests/Format/FormatTestRawStrings.cpp index 6f9a0d650ba2d..10f341cc8f799 100644 --- a/clang/unittests/Format/FormatTestRawStrings.cpp +++ b/clang/unittests/Format/FormatTestRawStrings.cpp @@ -17,9 +17,6 @@ #define DEBUG_TYPE "format-test" -using clang::tooling::ReplacementTest; -using clang::tooling::toReplacements; - namespace clang { namespace format { namespace { diff --git a/clang/unittests/Tooling/RangeSelectorTest.cpp b/clang/unittests/Tooling/RangeSelectorTest.cpp index cbb8e25bb92f2..03ab66235e43c 100644 --- a/clang/unittests/Tooling/RangeSelectorTest.cpp +++ b/clang/unittests/Tooling/RangeSelectorTest.cpp @@ -28,7 +28,6 @@ using ::llvm::HasValue; using ::llvm::StringError; using ::testing::AllOf; using ::testing::HasSubstr; -using ::testing::Property; using MatchResult = MatchFinder::MatchResult; From 6e8013a1301ef31f3592035eae2ee08319edd318 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 13 Oct 2023 20:09:33 -0700 Subject: [PATCH 120/720] [llvm] Stop including llvm/ADT/StringMap.h (NFC) These source files do not use StringMap. --- llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp | 1 - llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 1 - llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h | 1 - llvm/lib/Object/ModuleSymbolTable.cpp | 1 - llvm/lib/Transforms/IPO/FunctionImport.cpp | 1 - 5 files changed, 5 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp index 8f936037d1325..88d5487427774 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -13,7 +13,6 @@ #include "llvm/CodeGen/AccelTable.h" #include "DwarfCompileUnit.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index b2e570c5e67ec..78d7e62797ce5 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -13,7 +13,6 @@ #include "llvm/CodeGen/MIRParser/MIRParser.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" diff --git a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h index 3adaa447d9cb8..e5f3ce8c53f5e 100644 --- a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h @@ -14,7 +14,6 @@ #define LIB_EXECUTIONENGINE_JITLINK_COFFLINKGRAPHBUILDER_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ExecutionEngine/JITLink/JITLink.h" #include "llvm/Object/COFF.h" diff --git a/llvm/lib/Object/ModuleSymbolTable.cpp b/llvm/lib/Object/ModuleSymbolTable.cpp index 0290a819e5de5..dc73937863e6d 100644 --- a/llvm/lib/Object/ModuleSymbolTable.cpp +++ b/llvm/lib/Object/ModuleSymbolTable.cpp @@ -15,7 +15,6 @@ #include "llvm/Object/ModuleSymbolTable.h" #include "RecordStreamer.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index 80c360b8dd0f7..3c07101d87e9d 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -16,7 +16,6 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/AutoUpgrade.h" From 3750558ee1b0b1cb2242de9dee54c788dcfab9c4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 13 Oct 2023 20:34:45 -0700 Subject: [PATCH 121/720] [RISCV][GISel] Legalize G_SMULO/G_UMULO (#67635) Update `LegalizerHelper::widenScalarMulo` to not create a mulo if we aren't going to use the overflow flag. This prevents needing to legalize the widened operation. This generates better code when we need to make a libcall for multiply. --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 20 +- .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 18 + .../legalizer/rv32/legalize-mulo.mir | 348 ++++++++++++++ .../legalizer/rv64/legalize-mulo.mir | 450 ++++++++++++++++++ 4 files changed, 831 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-mulo.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-mulo.mir diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 196da03733c7d..108768494ccbb 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2141,8 +2141,20 @@ LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx, auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS}); auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS}); - auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy}, - {LeftOperand, RightOperand}); + // Multiplication cannot overflow if the WideTy is >= 2 * original width, + // so we don't need to check the overflow result of larger type Mulo. + bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth; + + unsigned MulOpc = + WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL; + + MachineInstrBuilder Mulo; + if (WideMulCanOverflow) + Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy}, + {LeftOperand, RightOperand}); + else + Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand}); + auto Mul = Mulo->getOperand(0); MIRBuilder.buildTrunc(Result, Mul); @@ -2160,9 +2172,7 @@ LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx, ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth); } - // Multiplication cannot overflow if the WideTy is >= 2 * original width, - // so we don't need to check the overflow result of larger type Mulo. - if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) { + if (WideMulCanOverflow) { auto Overflow = MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult); // Finally check if the multiplication in the larger type itself overflowed. diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 4479bccfd45e3..3ec3359884883 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -145,6 +145,10 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) { .legalFor({XLenLLT}) .lower(); // clang-format on + + getActionDefinitionsBuilder({G_SMULO, G_UMULO}) + .minScalar(0, XLenLLT) + .lower(); } else { getActionDefinitionsBuilder(G_MUL) .libcallFor({XLenLLT, DoubleXLenLLT}) @@ -152,6 +156,20 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) { .clampScalar(0, XLenLLT, DoubleXLenLLT); getActionDefinitionsBuilder({G_SMULH, G_UMULH}).lowerFor({XLenLLT}); + + getActionDefinitionsBuilder({G_SMULO, G_UMULO}) + .minScalar(0, XLenLLT) + // Widen XLenLLT to DoubleXLenLLT so we can use a single libcall to get + // the low bits for the mul result and high bits to do the overflow + // check. + .widenScalarIf( + [=](const LegalityQuery &Query) { + return Query.Types[0] == XLenLLT; + }, + [=](const LegalityQuery &Query) { + return std::make_pair(0, DoubleXLenLLT); + }) + .lower(); } if (ST.hasStdExtM()) { diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-mulo.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-mulo.mir new file mode 100644 index 0000000000000..43fd1f99fdcaf --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-mulo.mir @@ -0,0 +1,348 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mattr=+m -mtriple=riscv32 -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s +# RUN: llc -mattr=+zmmul -mtriple=riscv32 -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s +# RUN: llc -mtriple=riscv32 -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s --check-prefix=LIBCALL + +--- +name: smulo_i8 +body: | + bb.1: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: smulo_i8 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C1]](s32) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[ASHR]], [[ASHR1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[MUL]], [[C2]](s32) + ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C2]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[MUL]](s32), [[ASHR2]] + ; CHECK-NEXT: $x10 = COPY [[MUL]](s32) + ; CHECK-NEXT: $x11 = COPY [[ICMP]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; LIBCALL-LABEL: name: smulo_i8 + ; LIBCALL: liveins: $x10, $x11 + ; LIBCALL-NEXT: {{ $}} + ; LIBCALL-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; LIBCALL-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; LIBCALL-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; LIBCALL-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; LIBCALL-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; LIBCALL-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; LIBCALL-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; LIBCALL-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C1]](s32) + ; LIBCALL-NEXT: $x10 = COPY [[ASHR]](s32) + ; LIBCALL-NEXT: $x11 = COPY [[ASHR1]](s32) + ; LIBCALL-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; LIBCALL-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x10 + ; LIBCALL-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; LIBCALL-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C2]](s32) + ; LIBCALL-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C2]](s32) + ; LIBCALL-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[COPY2]](s32), [[ASHR2]] + ; LIBCALL-NEXT: $x10 = COPY [[COPY2]](s32) + ; LIBCALL-NEXT: $x11 = COPY [[ICMP]](s32) + ; LIBCALL-NEXT: PseudoRET implicit $x10, implicit $x11 + %2:_(s32) = COPY $x10 + %0:_(s8) = G_TRUNC %2(s32) + %3:_(s32) = COPY $x11 + %1:_(s8) = G_TRUNC %3(s32) + %4:_(s8), %5:_(s1) = G_SMULO %0, %1 + %6:_(s32) = G_ANYEXT %4(s8) + %7:_(s32) = G_ANYEXT %5(s1) + $x10 = COPY %6(s32) + $x11 = COPY %7(s32) + PseudoRET implicit $x10, implicit $x11 + +... +--- +name: smulo_i16 +body: | + bb.1: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: smulo_i16 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C1]](s32) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[ASHR]], [[ASHR1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[MUL]], [[C2]](s32) + ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C2]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[MUL]](s32), [[ASHR2]] + ; CHECK-NEXT: $x10 = COPY [[MUL]](s32) + ; CHECK-NEXT: $x11 = COPY [[ICMP]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; LIBCALL-LABEL: name: smulo_i16 + ; LIBCALL: liveins: $x10, $x11 + ; LIBCALL-NEXT: {{ $}} + ; LIBCALL-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; LIBCALL-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; LIBCALL-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; LIBCALL-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; LIBCALL-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; LIBCALL-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; LIBCALL-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; LIBCALL-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C1]](s32) + ; LIBCALL-NEXT: $x10 = COPY [[ASHR]](s32) + ; LIBCALL-NEXT: $x11 = COPY [[ASHR1]](s32) + ; LIBCALL-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; LIBCALL-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x10 + ; LIBCALL-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; LIBCALL-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C2]](s32) + ; LIBCALL-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C2]](s32) + ; LIBCALL-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[COPY2]](s32), [[ASHR2]] + ; LIBCALL-NEXT: $x10 = COPY [[COPY2]](s32) + ; LIBCALL-NEXT: $x11 = COPY [[ICMP]](s32) + ; LIBCALL-NEXT: PseudoRET implicit $x10, implicit $x11 + %2:_(s32) = COPY $x10 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $x11 + %1:_(s16) = G_TRUNC %3(s32) + %4:_(s16), %5:_(s1) = G_SMULO %0, %1 + %6:_(s32) = G_ANYEXT %4(s16) + %7:_(s32) = G_ANYEXT %5(s1) + $x10 = COPY %6(s32) + $x11 = COPY %7(s32) + PseudoRET implicit $x10, implicit $x11 + +... +--- +name: smulo_i32 +body: | + bb.1: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: smulo_i32 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[SMULH:%[0-9]+]]:_(s32) = G_SMULH [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[MUL]], [[C]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SMULH]](s32), [[ASHR]] + ; CHECK-NEXT: $x10 = COPY [[MUL]](s32) + ; CHECK-NEXT: $x11 = COPY [[ICMP]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; LIBCALL-LABEL: name: smulo_i32 + ; LIBCALL: liveins: $x10, $x11 + ; LIBCALL-NEXT: {{ $}} + ; LIBCALL-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; LIBCALL-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; LIBCALL-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; LIBCALL-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; LIBCALL-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; LIBCALL-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C1]](s32) + ; LIBCALL-NEXT: $x10 = COPY [[COPY]](s32) + ; LIBCALL-NEXT: $x11 = COPY [[ASHR]](s32) + ; LIBCALL-NEXT: $x12 = COPY [[COPY1]](s32) + ; LIBCALL-NEXT: $x13 = COPY [[ASHR1]](s32) + ; LIBCALL-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit-def $x10, implicit-def $x11 + ; LIBCALL-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x10 + ; LIBCALL-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x11 + ; LIBCALL-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; LIBCALL-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C2]](s32) + ; LIBCALL-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C2]](s32) + ; LIBCALL-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; LIBCALL-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR2]], [[C3]](s32) + ; LIBCALL-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; LIBCALL-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[ASHR2]] + ; LIBCALL-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY3]], [[ASHR3]] + ; LIBCALL-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR]], [[XOR1]] + ; LIBCALL-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[OR]](s32), [[C4]] + ; LIBCALL-NEXT: $x10 = COPY [[COPY2]](s32) + ; LIBCALL-NEXT: $x11 = COPY [[ICMP]](s32) + ; LIBCALL-NEXT: PseudoRET implicit $x10, implicit $x11 + %0:_(s32) = COPY $x10 + %1:_(s32) = COPY $x11 + %2:_(s32), %3:_(s1) = G_SMULO %0, %1 + %4:_(s32) = G_ANYEXT %3(s1) + $x10 = COPY %2(s32) + $x11 = COPY %4(s32) + PseudoRET implicit $x10, implicit $x11 + +... +--- +name: umulo_i8 +body: | + bb.1: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: umulo_i8 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; CHECK-NEXT: $x10 = COPY [[MUL]](s32) + ; CHECK-NEXT: $x11 = COPY [[ICMP]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; LIBCALL-LABEL: name: umulo_i8 + ; LIBCALL: liveins: $x10, $x11 + ; LIBCALL-NEXT: {{ $}} + ; LIBCALL-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; LIBCALL-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; LIBCALL-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; LIBCALL-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; LIBCALL-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; LIBCALL-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; LIBCALL-NEXT: $x10 = COPY [[AND]](s32) + ; LIBCALL-NEXT: $x11 = COPY [[AND1]](s32) + ; LIBCALL-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; LIBCALL-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x10 + ; LIBCALL-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; LIBCALL-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; LIBCALL-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[COPY2]](s32), [[AND2]] + ; LIBCALL-NEXT: $x10 = COPY [[COPY2]](s32) + ; LIBCALL-NEXT: $x11 = COPY [[ICMP]](s32) + ; LIBCALL-NEXT: PseudoRET implicit $x10, implicit $x11 + %2:_(s32) = COPY $x10 + %0:_(s8) = G_TRUNC %2(s32) + %3:_(s32) = COPY $x11 + %1:_(s8) = G_TRUNC %3(s32) + %4:_(s8), %5:_(s1) = G_UMULO %0, %1 + %6:_(s32) = G_ANYEXT %4(s8) + %7:_(s32) = G_ANYEXT %5(s1) + $x10 = COPY %6(s32) + $x11 = COPY %7(s32) + PseudoRET implicit $x10, implicit $x11 + +... +--- +name: umulo_i16 +body: | + bb.1: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: umulo_i16 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; CHECK-NEXT: $x10 = COPY [[MUL]](s32) + ; CHECK-NEXT: $x11 = COPY [[ICMP]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; LIBCALL-LABEL: name: umulo_i16 + ; LIBCALL: liveins: $x10, $x11 + ; LIBCALL-NEXT: {{ $}} + ; LIBCALL-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; LIBCALL-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; LIBCALL-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; LIBCALL-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; LIBCALL-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; LIBCALL-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; LIBCALL-NEXT: $x10 = COPY [[AND]](s32) + ; LIBCALL-NEXT: $x11 = COPY [[AND1]](s32) + ; LIBCALL-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; LIBCALL-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x10 + ; LIBCALL-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; LIBCALL-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; LIBCALL-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[COPY2]](s32), [[AND2]] + ; LIBCALL-NEXT: $x10 = COPY [[COPY2]](s32) + ; LIBCALL-NEXT: $x11 = COPY [[ICMP]](s32) + ; LIBCALL-NEXT: PseudoRET implicit $x10, implicit $x11 + %2:_(s32) = COPY $x10 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $x11 + %1:_(s16) = G_TRUNC %3(s32) + %4:_(s16), %5:_(s1) = G_UMULO %0, %1 + %6:_(s32) = G_ANYEXT %4(s16) + %7:_(s32) = G_ANYEXT %5(s1) + $x10 = COPY %6(s32) + $x11 = COPY %7(s32) + PseudoRET implicit $x10, implicit $x11 + +... +--- +name: umulo_i32 +body: | + bb.1: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: umulo_i32 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] + ; CHECK-NEXT: $x10 = COPY [[MUL]](s32) + ; CHECK-NEXT: $x11 = COPY [[ICMP]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; LIBCALL-LABEL: name: umulo_i32 + ; LIBCALL: liveins: $x10, $x11 + ; LIBCALL-NEXT: {{ $}} + ; LIBCALL-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; LIBCALL-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; LIBCALL-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; LIBCALL-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; LIBCALL-NEXT: $x10 = COPY [[COPY]](s32) + ; LIBCALL-NEXT: $x11 = COPY [[C]](s32) + ; LIBCALL-NEXT: $x12 = COPY [[COPY1]](s32) + ; LIBCALL-NEXT: $x13 = COPY [[C1]](s32) + ; LIBCALL-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit-def $x10, implicit-def $x11 + ; LIBCALL-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x10 + ; LIBCALL-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x11 + ; LIBCALL-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; LIBCALL-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; LIBCALL-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; LIBCALL-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; LIBCALL-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; LIBCALL-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[AND]] + ; LIBCALL-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY3]], [[AND1]] + ; LIBCALL-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR]], [[XOR1]] + ; LIBCALL-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[OR]](s32), [[C4]] + ; LIBCALL-NEXT: $x10 = COPY [[COPY2]](s32) + ; LIBCALL-NEXT: $x11 = COPY [[ICMP]](s32) + ; LIBCALL-NEXT: PseudoRET implicit $x10, implicit $x11 + %0:_(s32) = COPY $x10 + %1:_(s32) = COPY $x11 + %2:_(s32), %3:_(s1) = G_UMULO %0, %1 + %4:_(s32) = G_ANYEXT %3(s1) + $x10 = COPY %2(s32) + $x11 = COPY %4(s32) + PseudoRET implicit $x10, implicit $x11 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-mulo.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-mulo.mir new file mode 100644 index 0000000000000..7e1ec1e0961d7 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-mulo.mir @@ -0,0 +1,450 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mattr=+m -mtriple=riscv64 -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s +# RUN: llc -mattr=+zmmul -mtriple=riscv64 -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s +# RUN: llc -mtriple=riscv64 -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s --check-prefix=LIBCALL + +--- +name: smulo_i8 +body: | + bb.1: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: smulo_i8 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[COPY1]], [[C1]](s64) + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[C1]](s64) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ASHR]], [[ASHR1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[MUL]], [[C2]](s64) + ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[SHL2]], [[C2]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[MUL]](s64), [[ASHR2]] + ; CHECK-NEXT: $x10 = COPY [[MUL]](s64) + ; CHECK-NEXT: $x11 = COPY [[ICMP]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; LIBCALL-LABEL: name: smulo_i8 + ; LIBCALL: liveins: $x10, $x11 + ; LIBCALL-NEXT: {{ $}} + ; LIBCALL-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; LIBCALL-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; LIBCALL-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; LIBCALL-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; LIBCALL-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; LIBCALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; LIBCALL-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[COPY1]], [[C1]](s64) + ; LIBCALL-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[C1]](s64) + ; LIBCALL-NEXT: $x10 = COPY [[ASHR]](s64) + ; LIBCALL-NEXT: $x11 = COPY [[ASHR1]](s64) + ; LIBCALL-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; LIBCALL-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x10 + ; LIBCALL-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; LIBCALL-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[COPY2]], [[C2]](s64) + ; LIBCALL-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[SHL2]], [[C2]](s64) + ; LIBCALL-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[COPY2]](s64), [[ASHR2]] + ; LIBCALL-NEXT: $x10 = COPY [[COPY2]](s64) + ; LIBCALL-NEXT: $x11 = COPY [[ICMP]](s64) + ; LIBCALL-NEXT: PseudoRET implicit $x10, implicit $x11 + %2:_(s64) = COPY $x10 + %0:_(s8) = G_TRUNC %2(s64) + %3:_(s64) = COPY $x11 + %1:_(s8) = G_TRUNC %3(s64) + %4:_(s8), %5:_(s1) = G_SMULO %0, %1 + %6:_(s64) = G_ANYEXT %4(s8) + %7:_(s64) = G_ANYEXT %5(s1) + $x10 = COPY %6(s64) + $x11 = COPY %7(s64) + PseudoRET implicit $x10, implicit $x11 + +... +--- +name: smulo_i16 +body: | + bb.1: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: smulo_i16 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[COPY1]], [[C1]](s64) + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[C1]](s64) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ASHR]], [[ASHR1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[MUL]], [[C2]](s64) + ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[SHL2]], [[C2]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[MUL]](s64), [[ASHR2]] + ; CHECK-NEXT: $x10 = COPY [[MUL]](s64) + ; CHECK-NEXT: $x11 = COPY [[ICMP]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; LIBCALL-LABEL: name: smulo_i16 + ; LIBCALL: liveins: $x10, $x11 + ; LIBCALL-NEXT: {{ $}} + ; LIBCALL-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; LIBCALL-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; LIBCALL-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; LIBCALL-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; LIBCALL-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; LIBCALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; LIBCALL-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[COPY1]], [[C1]](s64) + ; LIBCALL-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[C1]](s64) + ; LIBCALL-NEXT: $x10 = COPY [[ASHR]](s64) + ; LIBCALL-NEXT: $x11 = COPY [[ASHR1]](s64) + ; LIBCALL-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; LIBCALL-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x10 + ; LIBCALL-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; LIBCALL-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[COPY2]], [[C2]](s64) + ; LIBCALL-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[SHL2]], [[C2]](s64) + ; LIBCALL-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[COPY2]](s64), [[ASHR2]] + ; LIBCALL-NEXT: $x10 = COPY [[COPY2]](s64) + ; LIBCALL-NEXT: $x11 = COPY [[ICMP]](s64) + ; LIBCALL-NEXT: PseudoRET implicit $x10, implicit $x11 + %2:_(s64) = COPY $x10 + %0:_(s16) = G_TRUNC %2(s64) + %3:_(s64) = COPY $x11 + %1:_(s16) = G_TRUNC %3(s64) + %4:_(s16), %5:_(s1) = G_SMULO %0, %1 + %6:_(s64) = G_ANYEXT %4(s16) + %7:_(s64) = G_ANYEXT %5(s1) + $x10 = COPY %6(s64) + $x11 = COPY %7(s64) + PseudoRET implicit $x10, implicit $x11 + +... +--- +name: smulo_i32 +body: | + bb.1: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: smulo_i32 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 32 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MUL]], 32 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[MUL]](s64), [[SEXT_INREG2]] + ; CHECK-NEXT: $x10 = COPY [[MUL]](s64) + ; CHECK-NEXT: $x11 = COPY [[ICMP]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; LIBCALL-LABEL: name: smulo_i32 + ; LIBCALL: liveins: $x10, $x11 + ; LIBCALL-NEXT: {{ $}} + ; LIBCALL-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; LIBCALL-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; LIBCALL-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 + ; LIBCALL-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 32 + ; LIBCALL-NEXT: $x10 = COPY [[SEXT_INREG]](s64) + ; LIBCALL-NEXT: $x11 = COPY [[SEXT_INREG1]](s64) + ; LIBCALL-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; LIBCALL-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x10 + ; LIBCALL-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY2]], 32 + ; LIBCALL-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[COPY2]](s64), [[SEXT_INREG2]] + ; LIBCALL-NEXT: $x10 = COPY [[COPY2]](s64) + ; LIBCALL-NEXT: $x11 = COPY [[ICMP]](s64) + ; LIBCALL-NEXT: PseudoRET implicit $x10, implicit $x11 + %2:_(s64) = COPY $x10 + %0:_(s32) = G_TRUNC %2(s64) + %3:_(s64) = COPY $x11 + %1:_(s32) = G_TRUNC %3(s64) + %4:_(s32), %5:_(s1) = G_SMULO %0, %1 + %6:_(s64) = G_ANYEXT %4(s32) + %7:_(s64) = G_ANYEXT %5(s1) + $x10 = COPY %6(s64) + $x11 = COPY %7(s64) + PseudoRET implicit $x10, implicit $x11 + +... +--- +name: smulo_i64 +body: | + bb.1: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: smulo_i64 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[SMULH:%[0-9]+]]:_(s64) = G_SMULH [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MUL]], [[C]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[SMULH]](s64), [[ASHR]] + ; CHECK-NEXT: $x10 = COPY [[MUL]](s64) + ; CHECK-NEXT: $x11 = COPY [[ICMP]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; LIBCALL-LABEL: name: smulo_i64 + ; LIBCALL: liveins: $x10, $x11 + ; LIBCALL-NEXT: {{ $}} + ; LIBCALL-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; LIBCALL-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; LIBCALL-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; LIBCALL-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s64) + ; LIBCALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; LIBCALL-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C1]](s64) + ; LIBCALL-NEXT: $x10 = COPY [[COPY]](s64) + ; LIBCALL-NEXT: $x11 = COPY [[ASHR]](s64) + ; LIBCALL-NEXT: $x12 = COPY [[COPY1]](s64) + ; LIBCALL-NEXT: $x13 = COPY [[ASHR1]](s64) + ; LIBCALL-NEXT: PseudoCALL target-flags(riscv-call) &__multi3, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit-def $x10, implicit-def $x11 + ; LIBCALL-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x10 + ; LIBCALL-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x11 + ; LIBCALL-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; LIBCALL-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY2]], [[C2]](s64) + ; LIBCALL-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C2]](s64) + ; LIBCALL-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; LIBCALL-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[ASHR2]], [[C3]](s64) + ; LIBCALL-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; LIBCALL-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[ASHR2]] + ; LIBCALL-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[COPY3]], [[ASHR3]] + ; LIBCALL-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[XOR1]] + ; LIBCALL-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[OR]](s64), [[C4]] + ; LIBCALL-NEXT: $x10 = COPY [[COPY2]](s64) + ; LIBCALL-NEXT: $x11 = COPY [[ICMP]](s64) + ; LIBCALL-NEXT: PseudoRET implicit $x10, implicit $x11 + %0:_(s64) = COPY $x10 + %1:_(s64) = COPY $x11 + %2:_(s64), %3:_(s1) = G_SMULO %0, %1 + %4:_(s64) = G_ANYEXT %3(s1) + $x10 = COPY %2(s64) + $x11 = COPY %4(s64) + PseudoRET implicit $x10, implicit $x11 + +... +--- +name: umulo_i8 +body: | + bb.1: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: umulo_i8 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND]], [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[MUL]], [[C2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[MUL]](s64), [[AND2]] + ; CHECK-NEXT: $x10 = COPY [[MUL]](s64) + ; CHECK-NEXT: $x11 = COPY [[ICMP]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; LIBCALL-LABEL: name: umulo_i8 + ; LIBCALL: liveins: $x10, $x11 + ; LIBCALL-NEXT: {{ $}} + ; LIBCALL-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; LIBCALL-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; LIBCALL-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; LIBCALL-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; LIBCALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; LIBCALL-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; LIBCALL-NEXT: $x10 = COPY [[AND]](s64) + ; LIBCALL-NEXT: $x11 = COPY [[AND1]](s64) + ; LIBCALL-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; LIBCALL-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x10 + ; LIBCALL-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; LIBCALL-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C2]] + ; LIBCALL-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[COPY2]](s64), [[AND2]] + ; LIBCALL-NEXT: $x10 = COPY [[COPY2]](s64) + ; LIBCALL-NEXT: $x11 = COPY [[ICMP]](s64) + ; LIBCALL-NEXT: PseudoRET implicit $x10, implicit $x11 + %2:_(s64) = COPY $x10 + %0:_(s8) = G_TRUNC %2(s64) + %3:_(s64) = COPY $x11 + %1:_(s8) = G_TRUNC %3(s64) + %4:_(s8), %5:_(s1) = G_UMULO %0, %1 + %6:_(s64) = G_ANYEXT %4(s8) + %7:_(s64) = G_ANYEXT %5(s1) + $x10 = COPY %6(s64) + $x11 = COPY %7(s64) + PseudoRET implicit $x10, implicit $x11 + +... +--- +name: umulo_i16 +body: | + bb.1: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: umulo_i16 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND]], [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[MUL]], [[C2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[MUL]](s64), [[AND2]] + ; CHECK-NEXT: $x10 = COPY [[MUL]](s64) + ; CHECK-NEXT: $x11 = COPY [[ICMP]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; LIBCALL-LABEL: name: umulo_i16 + ; LIBCALL: liveins: $x10, $x11 + ; LIBCALL-NEXT: {{ $}} + ; LIBCALL-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; LIBCALL-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; LIBCALL-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; LIBCALL-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; LIBCALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; LIBCALL-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; LIBCALL-NEXT: $x10 = COPY [[AND]](s64) + ; LIBCALL-NEXT: $x11 = COPY [[AND1]](s64) + ; LIBCALL-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; LIBCALL-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x10 + ; LIBCALL-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; LIBCALL-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C2]] + ; LIBCALL-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[COPY2]](s64), [[AND2]] + ; LIBCALL-NEXT: $x10 = COPY [[COPY2]](s64) + ; LIBCALL-NEXT: $x11 = COPY [[ICMP]](s64) + ; LIBCALL-NEXT: PseudoRET implicit $x10, implicit $x11 + %2:_(s64) = COPY $x10 + %0:_(s16) = G_TRUNC %2(s64) + %3:_(s64) = COPY $x11 + %1:_(s16) = G_TRUNC %3(s64) + %4:_(s16), %5:_(s1) = G_UMULO %0, %1 + %6:_(s64) = G_ANYEXT %4(s16) + %7:_(s64) = G_ANYEXT %5(s1) + $x10 = COPY %6(s64) + $x11 = COPY %7(s64) + PseudoRET implicit $x10, implicit $x11 + +... +--- +name: umulo_i32 +body: | + bb.1: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: umulo_i32 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND]], [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[MUL]], [[C2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[MUL]](s64), [[AND2]] + ; CHECK-NEXT: $x10 = COPY [[MUL]](s64) + ; CHECK-NEXT: $x11 = COPY [[ICMP]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; LIBCALL-LABEL: name: umulo_i32 + ; LIBCALL: liveins: $x10, $x11 + ; LIBCALL-NEXT: {{ $}} + ; LIBCALL-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; LIBCALL-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; LIBCALL-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; LIBCALL-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; LIBCALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; LIBCALL-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; LIBCALL-NEXT: $x10 = COPY [[AND]](s64) + ; LIBCALL-NEXT: $x11 = COPY [[AND1]](s64) + ; LIBCALL-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; LIBCALL-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x10 + ; LIBCALL-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; LIBCALL-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C2]] + ; LIBCALL-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[COPY2]](s64), [[AND2]] + ; LIBCALL-NEXT: $x10 = COPY [[COPY2]](s64) + ; LIBCALL-NEXT: $x11 = COPY [[ICMP]](s64) + ; LIBCALL-NEXT: PseudoRET implicit $x10, implicit $x11 + %2:_(s64) = COPY $x10 + %0:_(s32) = G_TRUNC %2(s64) + %3:_(s64) = COPY $x11 + %1:_(s32) = G_TRUNC %3(s64) + %4:_(s32), %5:_(s1) = G_UMULO %0, %1 + %6:_(s64) = G_ANYEXT %4(s32) + %7:_(s64) = G_ANYEXT %5(s1) + $x10 = COPY %6(s64) + $x11 = COPY %7(s64) + PseudoRET implicit $x10, implicit $x11 + +... +--- +name: umulo_i64 +body: | + bb.1: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: umulo_i64 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]] + ; CHECK-NEXT: $x10 = COPY [[MUL]](s64) + ; CHECK-NEXT: $x11 = COPY [[ICMP]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; LIBCALL-LABEL: name: umulo_i64 + ; LIBCALL: liveins: $x10, $x11 + ; LIBCALL-NEXT: {{ $}} + ; LIBCALL-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; LIBCALL-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; LIBCALL-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; LIBCALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; LIBCALL-NEXT: $x10 = COPY [[COPY]](s64) + ; LIBCALL-NEXT: $x11 = COPY [[C]](s64) + ; LIBCALL-NEXT: $x12 = COPY [[COPY1]](s64) + ; LIBCALL-NEXT: $x13 = COPY [[C1]](s64) + ; LIBCALL-NEXT: PseudoCALL target-flags(riscv-call) &__multi3, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit-def $x10, implicit-def $x11 + ; LIBCALL-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x10 + ; LIBCALL-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x11 + ; LIBCALL-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; LIBCALL-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; LIBCALL-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C2]] + ; LIBCALL-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY3]], [[C3]] + ; LIBCALL-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; LIBCALL-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[AND]] + ; LIBCALL-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[COPY3]], [[AND1]] + ; LIBCALL-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[XOR1]] + ; LIBCALL-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[OR]](s64), [[C4]] + ; LIBCALL-NEXT: $x10 = COPY [[COPY2]](s64) + ; LIBCALL-NEXT: $x11 = COPY [[ICMP]](s64) + ; LIBCALL-NEXT: PseudoRET implicit $x10, implicit $x11 + %0:_(s64) = COPY $x10 + %1:_(s64) = COPY $x11 + %2:_(s64), %3:_(s1) = G_UMULO %0, %1 + %4:_(s64) = G_ANYEXT %3(s1) + $x10 = COPY %2(s64) + $x11 = COPY %4(s64) + PseudoRET implicit $x10, implicit $x11 + +... From 18d199116fe2150549110da68ac0ca8cfd80f9c8 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 13 Oct 2023 20:50:58 -0700 Subject: [PATCH 122/720] Stop including llvm/ADT/STLFunctionalExtras.h (NFC) These source files do not use function_ref. --- clang/include/clang/Analysis/FlowSensitive/Formula.h | 1 - clang/lib/ExtractAPI/API.cpp | 1 - llvm/include/llvm/IRReader/IRReader.h | 1 - llvm/include/llvm/Support/ThreadSafeAllocator.h | 1 - llvm/tools/llvm-readobj/ObjDumper.h | 1 - openmp/libomptarget/include/Utilities.h | 1 - 6 files changed, 6 deletions(-) diff --git a/clang/include/clang/Analysis/FlowSensitive/Formula.h b/clang/include/clang/Analysis/FlowSensitive/Formula.h index 51264444fda84..9a6c6d2b2f45f 100644 --- a/clang/include/clang/Analysis/FlowSensitive/Formula.h +++ b/clang/include/clang/Analysis/FlowSensitive/Formula.h @@ -13,7 +13,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/clang/lib/ExtractAPI/API.cpp b/clang/lib/ExtractAPI/API.cpp index 2973a31345c9b..71c655ba5b5b3 100644 --- a/clang/lib/ExtractAPI/API.cpp +++ b/clang/lib/ExtractAPI/API.cpp @@ -17,7 +17,6 @@ #include "clang/AST/CommentLexer.h" #include "clang/AST/RawCommentList.h" #include "clang/Index/USRGeneration.h" -#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/StringRef.h" #include diff --git a/llvm/include/llvm/IRReader/IRReader.h b/llvm/include/llvm/IRReader/IRReader.h index 644fea82bfbe0..4d690dcaf1c43 100644 --- a/llvm/include/llvm/IRReader/IRReader.h +++ b/llvm/include/llvm/IRReader/IRReader.h @@ -14,7 +14,6 @@ #ifndef LLVM_IRREADER_IRREADER_H #define LLVM_IRREADER_IRREADER_H -#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Bitcode/BitcodeReader.h" #include diff --git a/llvm/include/llvm/Support/ThreadSafeAllocator.h b/llvm/include/llvm/Support/ThreadSafeAllocator.h index 3092287e691f7..8c56bb6e5803d 100644 --- a/llvm/include/llvm/Support/ThreadSafeAllocator.h +++ b/llvm/include/llvm/Support/ThreadSafeAllocator.h @@ -10,7 +10,6 @@ #define LLVM_SUPPORT_THREADSAFEALLOCATOR_H #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Support/Allocator.h" #include diff --git a/llvm/tools/llvm-readobj/ObjDumper.h b/llvm/tools/llvm-readobj/ObjDumper.h index a44fa42b85c9b..fe588047e962c 100644 --- a/llvm/tools/llvm-readobj/ObjDumper.h +++ b/llvm/tools/llvm-readobj/ObjDumper.h @@ -13,7 +13,6 @@ #include #include -#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" diff --git a/openmp/libomptarget/include/Utilities.h b/openmp/libomptarget/include/Utilities.h index 7f2884ed7ea06..82593e206e4d0 100644 --- a/openmp/libomptarget/include/Utilities.h +++ b/openmp/libomptarget/include/Utilities.h @@ -14,7 +14,6 @@ #ifndef OPENMP_LIBOMPTARGET_INCLUDE_UTILITIES_H #define OPENMP_LIBOMPTARGET_INCLUDE_UTILITIES_H -#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" From 643b2ccd8296a3f8a2950421e72aa5ca59e4fecc Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 13 Oct 2023 20:50:59 -0700 Subject: [PATCH 123/720] [tools] Stop including llvm/ADT/StringMap.h (NFC) These source files do not use StringMap.h. --- llvm/tools/dsymutil/DwarfLinkerForBinary.cpp | 1 - llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp | 1 - llvm/tools/llvm-exegesis/lib/BenchmarkResult.h | 1 - llvm/tools/llvm-exegesis/lib/LlvmState.h | 1 - llvm/tools/llvm-readobj/ObjDumper.h | 1 - llvm/tools/llvm-xray/xray-graph-diff.h | 1 - 6 files changed, 6 deletions(-) diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp index 9057a2d64092b..39776ae5a9200 100644 --- a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp +++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp @@ -21,7 +21,6 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp index b8e53de57bff2..02c4da11e032d 100644 --- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp @@ -11,7 +11,6 @@ #include "Error.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/bit.h" #include "llvm/ObjectYAML/YAML.h" diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h index 77e0994fe0208..8a7faa0176e32 100644 --- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h @@ -17,7 +17,6 @@ #include "LlvmState.h" #include "RegisterValue.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/MC/MCInst.h" diff --git a/llvm/tools/llvm-exegesis/lib/LlvmState.h b/llvm/tools/llvm-exegesis/lib/LlvmState.h index 137ba1b5a54fa..16f0def518256 100644 --- a/llvm/tools/llvm-exegesis/lib/LlvmState.h +++ b/llvm/tools/llvm-exegesis/lib/LlvmState.h @@ -16,7 +16,6 @@ #include "MCInstrDescView.h" #include "RegisterAliasing.h" -#include "llvm/ADT/StringMap.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" diff --git a/llvm/tools/llvm-readobj/ObjDumper.h b/llvm/tools/llvm-readobj/ObjDumper.h index fe588047e962c..1d679453581bc 100644 --- a/llvm/tools/llvm-readobj/ObjDumper.h +++ b/llvm/tools/llvm-readobj/ObjDumper.h @@ -14,7 +14,6 @@ #include #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/CommandLine.h" diff --git a/llvm/tools/llvm-xray/xray-graph-diff.h b/llvm/tools/llvm-xray/xray-graph-diff.h index 5d12c563f47c4..c2b2a938bfbc6 100644 --- a/llvm/tools/llvm-xray/xray-graph-diff.h +++ b/llvm/tools/llvm-xray/xray-graph-diff.h @@ -15,7 +15,6 @@ #define XRAY_GRAPH_DIFF_H #include "xray-graph.h" -#include "llvm/ADT/StringMap.h" #include "llvm/XRay/Graph.h" namespace llvm { From 64e7207ea5c7731f41e29291d1114e898f056248 Mon Sep 17 00:00:00 2001 From: LiqinWeng Date: Sat, 14 Oct 2023 12:18:43 +0800 Subject: [PATCH 124/720] [Test] Pre-submit tests for #68972 (#69040) --- .../CodeGen/RISCV/riscv-shifted-extend.ll | 124 ++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll diff --git a/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll new file mode 100644 index 0000000000000..957f44f9f669d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64 %s + +define void @test(ptr nocapture noundef writeonly %array1, i32 noundef signext %a, i32 noundef signext %b) { +; RV64-LABEL: test: +; RV64: # %bb.0: # %entry +; RV64-NEXT: addiw a3, a1, 5 +; RV64-NEXT: slli a4, a3, 2 +; RV64-NEXT: add a4, a0, a4 +; RV64-NEXT: sw a2, 0(a4) +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sw a2, 24(a0) +; RV64-NEXT: sw a3, 140(a0) +; RV64-NEXT: ret +entry: + %add = add nsw i32 %a, 5 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32, ptr %array1, i64 %idxprom + store i32 %b, ptr %arrayidx, align 4 + %add3 = add nsw i32 %a, 6 + %idxprom4 = sext i32 %add3 to i64 + %arrayidx5 = getelementptr inbounds i32, ptr %array1, i64 %idxprom4 + store i32 %b, ptr %arrayidx5, align 4 + %add6 = add nsw i32 %a, 35 + %idxprom7 = sext i32 %add6 to i64 + %arrayidx8 = getelementptr inbounds i32, ptr %array1, i64 %idxprom7 + store i32 %add, ptr %arrayidx8, align 4 + ret void +} + +; test of jumpping, find add's operand has one more use can simplified +define void @test1(ptr nocapture noundef %array1, i32 noundef signext %a, i32 noundef signext %b, i32 noundef signext %x) { +; RV64-LABEL: test1: +; RV64: # %bb.0: # %entry +; RV64-NEXT: addiw a4, a1, 5 +; RV64-NEXT: slli a5, a4, 2 +; RV64-NEXT: add a5, a0, a5 +; RV64-NEXT: mv a6, a4 +; RV64-NEXT: bgtz a3, .LBB1_2 +; RV64-NEXT: # %bb.1: # %entry +; RV64-NEXT: mv a6, a2 +; RV64-NEXT: .LBB1_2: # %entry +; RV64-NEXT: sw a6, 0(a5) +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sw a6, 24(a0) +; RV64-NEXT: sw a4, 140(a0) +; RV64-NEXT: ret +entry: + %add = add nsw i32 %a, 5 + %cmp = icmp sgt i32 %x, 0 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32, ptr %array1, i64 %idxprom + %add.b = select i1 %cmp, i32 %add, i32 %b + store i32 %add.b, ptr %arrayidx, align 4 + %add5 = add nsw i32 %a, 6 + %idxprom6 = sext i32 %add5 to i64 + %arrayidx7 = getelementptr inbounds i32, ptr %array1, i64 %idxprom6 + store i32 %add.b, ptr %arrayidx7, align 4 + %add8 = add nsw i32 %a, 35 + %idxprom9 = sext i32 %add8 to i64 + %arrayidx10 = getelementptr inbounds i32, ptr %array1, i64 %idxprom9 + store i32 %add, ptr %arrayidx10, align 4 + ret void +} + +define void @test2(ptr nocapture noundef writeonly %array1, i64 noundef %a, i64 noundef %b) local_unnamed_addr #0 { +; RV64-LABEL: test2: +; RV64: # %bb.0: # %entry +; RV64-NEXT: addi a3, a1, 5 +; RV64-NEXT: slli a4, a3, 3 +; RV64-NEXT: add a4, a0, a4 +; RV64-NEXT: sd a2, 0(a4) +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sd a2, 48(a0) +; RV64-NEXT: sd a3, 280(a0) +; RV64-NEXT: ret +entry: + %add = add nsw i64 %a, 5 + %arrayidx = getelementptr inbounds i64, ptr %array1, i64 %add + store i64 %b, ptr %arrayidx, align 8 + %add2 = add nsw i64 %a, 6 + %arrayidx3 = getelementptr inbounds i64, ptr %array1, i64 %add2 + store i64 %b, ptr %arrayidx3, align 8 + %add4 = add nsw i64 %a, 35 + %arrayidx5 = getelementptr inbounds i64, ptr %array1, i64 %add4 + store i64 %add, ptr %arrayidx5, align 8 + ret void +} + +define void @test3(ptr nocapture noundef %array1, i64 noundef %a, i64 noundef %b, i64 noundef %x) { +; RV64-LABEL: test3: +; RV64: # %bb.0: # %entry +; RV64-NEXT: addi a4, a1, 5 +; RV64-NEXT: mv a5, a4 +; RV64-NEXT: bgtz a3, .LBB3_2 +; RV64-NEXT: # %bb.1: # %entry +; RV64-NEXT: mv a5, a2 +; RV64-NEXT: .LBB3_2: # %entry +; RV64-NEXT: slli a2, a4, 3 +; RV64-NEXT: add a2, a0, a2 +; RV64-NEXT: sd a5, 0(a2) +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sd a5, 48(a0) +; RV64-NEXT: sd a4, 280(a0) +; RV64-NEXT: ret +entry: + %add = add nsw i64 %a, 5 + %cmp = icmp sgt i64 %x, 0 + %spec.select = select i1 %cmp, i64 %add, i64 %b + %0 = getelementptr inbounds i64, ptr %array1, i64 %add + store i64 %spec.select, ptr %0, align 8 + %add3 = add nsw i64 %a, 6 + %arrayidx4 = getelementptr inbounds i64, ptr %array1, i64 %add3 + store i64 %spec.select, ptr %arrayidx4, align 8 + %add5 = add nsw i64 %a, 35 + %arrayidx6 = getelementptr inbounds i64, ptr %array1, i64 %add5 + store i64 %add, ptr %arrayidx6, align 8 + ret void +} From ece5dd101c7e4dc2fd23428abd312f75fd3d3eaf Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 13 Oct 2023 21:34:23 -0700 Subject: [PATCH 125/720] [clang] Stop including llvm/ADT/StringMap.h (NFC) These source files do not use StringMap.h. --- clang/lib/ASTMatchers/GtestMatchers.cpp | 1 - clang/lib/Basic/Sarif.cpp | 1 - clang/lib/Driver/Multilib.cpp | 1 - clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp | 1 - clang/lib/Support/RISCVVIntrinsicUtils.cpp | 1 - clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp | 1 - 6 files changed, 6 deletions(-) diff --git a/clang/lib/ASTMatchers/GtestMatchers.cpp b/clang/lib/ASTMatchers/GtestMatchers.cpp index 6e4c12f319692..a556d8ef2da06 100644 --- a/clang/lib/ASTMatchers/GtestMatchers.cpp +++ b/clang/lib/ASTMatchers/GtestMatchers.cpp @@ -21,7 +21,6 @@ #include "clang/AST/RecursiveASTVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" namespace clang { diff --git a/clang/lib/Basic/Sarif.cpp b/clang/lib/Basic/Sarif.cpp index bef948181ec01..3476103cc39d4 100644 --- a/clang/lib/Basic/Sarif.cpp +++ b/clang/lib/Basic/Sarif.cpp @@ -20,7 +20,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/JSON.h" diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp index a37dffc8a6f1d..ba466af39e2dc 100644 --- a/clang/lib/Driver/Multilib.cpp +++ b/clang/lib/Driver/Multilib.cpp @@ -10,7 +10,6 @@ #include "clang/Basic/LLVM.h" #include "clang/Basic/Version.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Error.h" diff --git a/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp index 66e9a501c348e..268fc742f050f 100644 --- a/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp @@ -32,7 +32,6 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringMap.h" #include "llvm/Support/ErrorHandling.h" #include #include diff --git a/clang/lib/Support/RISCVVIntrinsicUtils.cpp b/clang/lib/Support/RISCVVIntrinsicUtils.cpp index c105db434dc43..597ee194fc8d4 100644 --- a/clang/lib/Support/RISCVVIntrinsicUtils.cpp +++ b/clang/lib/Support/RISCVVIntrinsicUtils.cpp @@ -10,7 +10,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" diff --git a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp index 90c475e541f4c..7ad6c19482b11 100644 --- a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp +++ b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ArchiveWriter.h" From 514381840c6d7aa775a092556992c87f022a361f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 13 Oct 2023 22:32:06 -0700 Subject: [PATCH 126/720] [RISCV] Move hasOneUse() call after opcode check. hasOneUse can be more expensive for nodes with multiple outputs. It's better to check the opcode first to skip nodes with multiple outputs. I have not seen an issue from this, just noticed while reviewing code for a possible enhancement. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5cf5ee496656d..d7552317fd8bc 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11999,7 +11999,7 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, } // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt) - if (N0.hasOneUse() && N0.getOpcode() == ISD::SETCC && isOneConstant(N1)) { + if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) { auto *ConstN00 = dyn_cast(N0.getOperand(0)); ISD::CondCode CC = cast(N0.getOperand(2))->get(); if (ConstN00 && CC == ISD::SETLT) { From 02f67c097de12dc9f6c97a68d9e180af79a2483b Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 13 Oct 2023 23:16:25 -0700 Subject: [PATCH 127/720] Use llvm::endianness::{big,little,native} (NFC) Note that llvm::support::endianness has been renamed to llvm::endianness while becoming an enum class. This patch replaces {big,little,native} with llvm::endianness::{big,little,native}. This patch completes the migration to llvm::endianness and llvm::endianness::{big,little,native}. I'll post a separate patch to remove the migration helpers in llvm/Support/Endian.h: using endianness = llvm::endianness; constexpr llvm::endianness big = llvm::endianness::big; constexpr llvm::endianness little = llvm::endianness::little; constexpr llvm::endianness native = llvm::endianness::native; --- clang/lib/CodeGen/CodeGenPGO.cpp | 6 +- clang/lib/Serialization/ASTReader.cpp | 89 ++++++----- clang/lib/Serialization/ASTWriter.cpp | 25 +-- clang/lib/Serialization/GlobalModuleIndex.cpp | 18 ++- .../lib/Serialization/MultiOnDiskHashTable.h | 8 +- llvm/include/llvm/Bitstream/BitstreamWriter.h | 13 +- .../llvm/ProfileData/InstrProfReader.h | 6 +- llvm/include/llvm/ProfileData/MemProf.h | 44 ++++-- llvm/include/llvm/Support/Endian.h | 147 ++++++++++++------ llvm/include/llvm/Support/MD5.h | 4 +- llvm/include/llvm/Support/OnDiskHashTable.h | 21 ++- llvm/lib/ExecutionEngine/JITLink/aarch32.cpp | 6 +- llvm/lib/MC/MCPseudoProbe.cpp | 2 +- llvm/lib/ProfileData/InstrProf.cpp | 11 +- llvm/lib/ProfileData/InstrProfReader.cpp | 72 +++++---- llvm/lib/ProfileData/InstrProfWriter.cpp | 7 +- llvm/lib/ProfileData/MemProf.cpp | 23 +-- llvm/lib/ProfileData/RawMemProfReader.cpp | 18 ++- llvm/lib/ProfileData/SampleProfReader.cpp | 6 +- .../DebugInfo/MSF/MappedBlockStreamTest.cpp | 6 +- .../unittests/DebugInfo/PDB/HashTableTest.cpp | 4 +- .../DebugInfo/PDB/StringTableBuilderTest.cpp | 4 +- .../ExecutionEngine/JITLink/AArch32Tests.cpp | 24 +-- llvm/unittests/Support/BinaryStreamTest.cpp | 5 +- llvm/unittests/Support/EndianStreamTest.cpp | 22 +-- llvm/unittests/Support/EndianTest.cpp | 72 +++++---- 26 files changed, 409 insertions(+), 254 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index df6c76cde95f8..63cdd0a047bcd 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -755,7 +755,8 @@ void PGOHash::combine(HashType Type) { // Pass through MD5 if enough work has built up. if (Count && Count % NumTypesPerWord == 0) { using namespace llvm::support; - uint64_t Swapped = endian::byte_swap(Working); + uint64_t Swapped = + endian::byte_swap(Working); MD5.update(llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped))); Working = 0; } @@ -781,7 +782,8 @@ uint64_t PGOHash::finalize() { MD5.update({(uint8_t)Working}); } else { using namespace llvm::support; - uint64_t Swapped = endian::byte_swap(Working); + uint64_t Swapped = + endian::byte_swap(Working); MD5.update(llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped))); } } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 9ea8c8eacaa93..cce403d7c6c44 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -912,9 +912,10 @@ ASTSelectorLookupTrait::ReadKey(const unsigned char* d, unsigned) { using namespace llvm::support; SelectorTable &SelTable = Reader.getContext().Selectors; - unsigned N = endian::readNext(d); + unsigned N = + endian::readNext(d); IdentifierInfo *FirstII = Reader.getLocalIdentifier( - F, endian::readNext(d)); + F, endian::readNext(d)); if (N == 0) return SelTable.getNullarySelector(FirstII); else if (N == 1) @@ -924,7 +925,7 @@ ASTSelectorLookupTrait::ReadKey(const unsigned char* d, unsigned) { Args.push_back(FirstII); for (unsigned I = 1; I != N; ++I) Args.push_back(Reader.getLocalIdentifier( - F, endian::readNext(d))); + F, endian::readNext(d))); return SelTable.getSelector(N, Args.data()); } @@ -937,9 +938,11 @@ ASTSelectorLookupTrait::ReadData(Selector, const unsigned char* d, data_type Result; Result.ID = Reader.getGlobalSelectorID( - F, endian::readNext(d)); - unsigned FullInstanceBits = endian::readNext(d); - unsigned FullFactoryBits = endian::readNext(d); + F, endian::readNext(d)); + unsigned FullInstanceBits = + endian::readNext(d); + unsigned FullFactoryBits = + endian::readNext(d); Result.InstanceBits = FullInstanceBits & 0x3; Result.InstanceHasMoreThanOneDecl = (FullInstanceBits >> 2) & 0x1; Result.FactoryBits = FullFactoryBits & 0x3; @@ -950,14 +953,16 @@ ASTSelectorLookupTrait::ReadData(Selector, const unsigned char* d, // Load instance methods for (unsigned I = 0; I != NumInstanceMethods; ++I) { if (ObjCMethodDecl *Method = Reader.GetLocalDeclAs( - F, endian::readNext(d))) + F, + endian::readNext(d))) Result.Instance.push_back(Method); } // Load factory methods for (unsigned I = 0; I != NumFactoryMethods; ++I) { if (ObjCMethodDecl *Method = Reader.GetLocalDeclAs( - F, endian::readNext(d))) + F, + endian::readNext(d))) Result.Factory.push_back(Method); } @@ -998,7 +1003,8 @@ static bool readBit(unsigned &Bits) { IdentID ASTIdentifierLookupTrait::ReadIdentifierID(const unsigned char *d) { using namespace llvm::support; - unsigned RawID = endian::readNext(d); + unsigned RawID = + endian::readNext(d); return Reader.getGlobalIdentifierID(F, RawID >> 1); } @@ -1016,7 +1022,8 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k, unsigned DataLen) { using namespace llvm::support; - unsigned RawID = endian::readNext(d); + unsigned RawID = + endian::readNext(d); bool IsInteresting = RawID & 0x01; // Wipe out the "is interesting" bit. @@ -1039,8 +1046,10 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k, return II; } - unsigned ObjCOrBuiltinID = endian::readNext(d); - unsigned Bits = endian::readNext(d); + unsigned ObjCOrBuiltinID = + endian::readNext(d); + unsigned Bits = + endian::readNext(d); bool CPlusPlusOperatorKeyword = readBit(Bits); bool HasRevertedTokenIDToIdentifier = readBit(Bits); bool Poisoned = readBit(Bits); @@ -1069,7 +1078,7 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k, // definition. if (HadMacroDefinition) { uint32_t MacroDirectivesOffset = - endian::readNext(d); + endian::readNext(d); DataLen -= 4; Reader.addPendingMacro(II, &F, MacroDirectivesOffset); @@ -1083,7 +1092,8 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k, SmallVector DeclIDs; for (; DataLen > 0; DataLen -= 4) DeclIDs.push_back(Reader.getGlobalDeclID( - F, endian::readNext(d))); + F, + endian::readNext(d))); Reader.SetGloballyVisibleDecls(II, DeclIDs); } @@ -1152,7 +1162,8 @@ ModuleFile * ASTDeclContextNameLookupTrait::ReadFileRef(const unsigned char *&d) { using namespace llvm::support; - uint32_t ModuleFileID = endian::readNext(d); + uint32_t ModuleFileID = + endian::readNext(d); return Reader.getLocalModuleFile(F, ModuleFileID); } @@ -1172,15 +1183,18 @@ ASTDeclContextNameLookupTrait::ReadKey(const unsigned char *d, unsigned) { case DeclarationName::CXXLiteralOperatorName: case DeclarationName::CXXDeductionGuideName: Data = (uint64_t)Reader.getLocalIdentifier( - F, endian::readNext(d)); + F, endian::readNext(d)); break; case DeclarationName::ObjCZeroArgSelector: case DeclarationName::ObjCOneArgSelector: case DeclarationName::ObjCMultiArgSelector: Data = - (uint64_t)Reader.getLocalSelector( - F, endian::readNext( - d)).getAsOpaquePtr(); + (uint64_t)Reader + .getLocalSelector( + F, + endian::readNext( + d)) + .getAsOpaquePtr(); break; case DeclarationName::CXXOperatorName: Data = *d++; // OverloadedOperatorKind @@ -1203,7 +1217,8 @@ void ASTDeclContextNameLookupTrait::ReadDataInto(internal_key_type, using namespace llvm::support; for (unsigned NumDecls = DataLen / 4; NumDecls; --NumDecls) { - uint32_t LocalID = endian::readNext(d); + uint32_t LocalID = + endian::readNext(d); Val.insert(Reader.getGlobalDeclID(F, LocalID)); } } @@ -2010,8 +2025,10 @@ HeaderFileInfoTrait::ReadKey(const unsigned char *d, unsigned) { using namespace llvm::support; internal_key_type ikey; - ikey.Size = off_t(endian::readNext(d)); - ikey.ModTime = time_t(endian::readNext(d)); + ikey.Size = + off_t(endian::readNext(d)); + ikey.ModTime = time_t( + endian::readNext(d)); ikey.Filename = (const char *)d; ikey.Imported = true; return ikey; @@ -2039,9 +2056,9 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d, HFI.DirInfo = (Flags >> 1) & 0x07; HFI.IndexHeaderMapHeader = Flags & 0x01; HFI.ControllingMacroID = Reader.getGlobalIdentifierID( - M, endian::readNext(d)); + M, endian::readNext(d)); if (unsigned FrameworkOffset = - endian::readNext(d)) { + endian::readNext(d)) { // The framework offset is 1 greater than the actual offset, // since 0 is used as an indicator for "no framework name". StringRef FrameworkName(FrameworkStrings + FrameworkOffset - 1); @@ -2051,7 +2068,8 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d, assert((End - d) % 4 == 0 && "Wrong data length in HeaderFileInfo deserialization"); while (d != End) { - uint32_t LocalSMID = endian::readNext(d); + uint32_t LocalSMID = + endian::readNext(d); auto HeaderRole = static_cast(LocalSMID & 7); LocalSMID >>= 3; @@ -4030,8 +4048,9 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { // how it goes... using namespace llvm::support; ModuleKind Kind = static_cast( - endian::readNext(Data)); - uint16_t Len = endian::readNext(Data); + endian::readNext(Data)); + uint16_t Len = + endian::readNext(Data); StringRef Name = StringRef((const char*)Data, Len); Data += Len; ModuleFile *OM = (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule || @@ -4047,21 +4066,21 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { } SourceLocation::UIntTy SLocOffset = - endian::readNext(Data); + endian::readNext(Data); uint32_t IdentifierIDOffset = - endian::readNext(Data); + endian::readNext(Data); uint32_t MacroIDOffset = - endian::readNext(Data); + endian::readNext(Data); uint32_t PreprocessedEntityIDOffset = - endian::readNext(Data); + endian::readNext(Data); uint32_t SubmoduleIDOffset = - endian::readNext(Data); + endian::readNext(Data); uint32_t SelectorIDOffset = - endian::readNext(Data); + endian::readNext(Data); uint32_t DeclIDOffset = - endian::readNext(Data); + endian::readNext(Data); uint32_t TypeIndexOffset = - endian::readNext(Data); + endian::readNext(Data); auto mapOffset = [&](uint32_t Offset, uint32_t BaseOffset, RemapBuilder &Remap) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 27700c711d52f..739344b9a128d 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1873,7 +1873,7 @@ namespace { void EmitKey(raw_ostream& Out, key_type_ref key, unsigned KeyLen) { using namespace llvm::support; - endian::Writer LE(Out, little); + endian::Writer LE(Out, llvm::endianness::little); LE.write(key.Size); KeyLen -= 8; LE.write(key.ModTime); @@ -1885,7 +1885,7 @@ namespace { data_type_ref Data, unsigned DataLen) { using namespace llvm::support; - endian::Writer LE(Out, little); + endian::Writer LE(Out, llvm::endianness::little); uint64_t Start = Out.tell(); (void)Start; unsigned char Flags = (Data.AlreadyIncluded << 6) @@ -2053,7 +2053,7 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) { llvm::raw_svector_ostream Out(TableData); // Make sure that no bucket is at offset 0 - endian::write(Out, 0, little); + endian::write(Out, 0, llvm::endianness::little); BucketOffset = Generator.Emit(Out, GeneratorTrait); } @@ -3313,7 +3313,7 @@ class ASTMethodPoolTrait { void EmitKey(raw_ostream& Out, Selector Sel, unsigned) { using namespace llvm::support; - endian::Writer LE(Out, little); + endian::Writer LE(Out, llvm::endianness::little); uint64_t Start = Out.tell(); assert((Start >> 32) == 0 && "Selector key offset too large"); Writer.SetSelectorOffset(Sel, Start); @@ -3330,7 +3330,7 @@ class ASTMethodPoolTrait { data_type_ref Methods, unsigned DataLen) { using namespace llvm::support; - endian::Writer LE(Out, little); + endian::Writer LE(Out, llvm::endianness::little); uint64_t Start = Out.tell(); (void)Start; LE.write(Methods.ID); unsigned NumInstanceMethods = 0; @@ -3453,7 +3453,7 @@ void ASTWriter::WriteSelectors(Sema &SemaRef) { ASTMethodPoolTrait Trait(*this); llvm::raw_svector_ostream Out(MethodPool); // Make sure that no bucket is at offset 0 - endian::write(Out, 0, little); + endian::write(Out, 0, llvm::endianness::little); BucketOffset = Generator.Emit(Out, Trait); } @@ -3650,7 +3650,7 @@ class ASTIdentifierTableTrait { IdentID ID, unsigned) { using namespace llvm::support; - endian::Writer LE(Out, little); + endian::Writer LE(Out, llvm::endianness::little); auto MacroOffset = Writer.getMacroDirectivesOffset(II); if (!isInterestingIdentifier(II, MacroOffset)) { @@ -3749,7 +3749,7 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP, llvm::raw_svector_ostream Out(IdentifierTable); // Make sure that no bucket is at offset 0 - endian::write(Out, 0, little); + endian::write(Out, 0, llvm::endianness::little); BucketOffset = Generator.Emit(Out, Trait); } @@ -3844,7 +3844,8 @@ class ASTDeclContextNameLookupTrait { using namespace llvm::support; - endian::write(Out, Writer.getChain()->getModuleFileID(F), little); + endian::write(Out, Writer.getChain()->getModuleFileID(F), + llvm::endianness::little); } std::pair EmitKeyDataLength(raw_ostream &Out, @@ -3879,7 +3880,7 @@ class ASTDeclContextNameLookupTrait { void EmitKey(raw_ostream &Out, DeclarationNameKey Name, unsigned) { using namespace llvm::support; - endian::Writer LE(Out, little); + endian::Writer LE(Out, llvm::endianness::little); LE.write(Name.getKind()); switch (Name.getKind()) { case DeclarationName::Identifier: @@ -3911,7 +3912,7 @@ class ASTDeclContextNameLookupTrait { unsigned DataLen) { using namespace llvm::support; - endian::Writer LE(Out, little); + endian::Writer LE(Out, llvm::endianness::little); uint64_t Start = Out.tell(); (void)Start; for (unsigned I = Lookup.first, N = Lookup.second; I != N; ++I) LE.write(DeclIDs[I]); @@ -5024,7 +5025,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, for (ModuleFile &M : Chain->ModuleMgr) { using namespace llvm::support; - endian::Writer LE(Out, little); + endian::Writer LE(Out, llvm::endianness::little); LE.write(static_cast(M.Kind)); StringRef Name = M.isModule() ? M.ModuleName : M.FileName; LE.write(Name.size()); diff --git a/clang/lib/Serialization/GlobalModuleIndex.cpp b/clang/lib/Serialization/GlobalModuleIndex.cpp index b4a49972ace2e..fb80a1998d0ef 100644 --- a/clang/lib/Serialization/GlobalModuleIndex.cpp +++ b/clang/lib/Serialization/GlobalModuleIndex.cpp @@ -89,8 +89,10 @@ class IdentifierIndexReaderTrait { static std::pair ReadKeyDataLength(const unsigned char*& d) { using namespace llvm::support; - unsigned KeyLen = endian::readNext(d); - unsigned DataLen = endian::readNext(d); + unsigned KeyLen = + endian::readNext(d); + unsigned DataLen = + endian::readNext(d); return std::make_pair(KeyLen, DataLen); } @@ -111,7 +113,8 @@ class IdentifierIndexReaderTrait { data_type Result; while (DataLen > 0) { - unsigned ID = endian::readNext(d); + unsigned ID = + endian::readNext(d); Result.push_back(ID); DataLen -= 4; } @@ -511,7 +514,8 @@ namespace { // The first bit indicates whether this identifier is interesting. // That's all we care about. using namespace llvm::support; - unsigned RawID = endian::readNext(d); + unsigned RawID = + endian::readNext(d); bool IsInteresting = RawID & 0x01; return std::make_pair(k, IsInteresting); } @@ -729,7 +733,7 @@ class IdentifierIndexWriterTrait { std::pair EmitKeyDataLength(raw_ostream& Out, key_type_ref Key, data_type_ref Data) { using namespace llvm::support; - endian::Writer LE(Out, little); + endian::Writer LE(Out, llvm::endianness::little); unsigned KeyLen = Key.size(); unsigned DataLen = Data.size() * 4; LE.write(KeyLen); @@ -745,7 +749,7 @@ class IdentifierIndexWriterTrait { unsigned DataLen) { using namespace llvm::support; for (unsigned I = 0, N = Data.size(); I != N; ++I) - endian::write(Out, Data[I], little); + endian::write(Out, Data[I], llvm::endianness::little); } }; @@ -824,7 +828,7 @@ bool GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) { using namespace llvm::support; llvm::raw_svector_ostream Out(IdentifierTable); // Make sure that no bucket is at offset 0 - endian::write(Out, 0, little); + endian::write(Out, 0, llvm::endianness::little); BucketOffset = Generator.Emit(Out, Trait); } diff --git a/clang/lib/Serialization/MultiOnDiskHashTable.h b/clang/lib/Serialization/MultiOnDiskHashTable.h index adc97d57e0ac7..2402a628b512f 100644 --- a/clang/lib/Serialization/MultiOnDiskHashTable.h +++ b/clang/lib/Serialization/MultiOnDiskHashTable.h @@ -199,10 +199,12 @@ template class MultiOnDiskHashTable { storage_type Ptr = Data; - uint32_t BucketOffset = endian::readNext(Ptr); + uint32_t BucketOffset = + endian::readNext(Ptr); // Read the list of overridden files. - uint32_t NumFiles = endian::readNext(Ptr); + uint32_t NumFiles = + endian::readNext(Ptr); // FIXME: Add a reserve() to TinyPtrVector so that we don't need to make // an additional copy. llvm::SmallVector OverriddenFiles; @@ -311,7 +313,7 @@ class MultiOnDiskHashTableGenerator { // Write our header information. { - endian::Writer Writer(OutStream, little); + endian::Writer Writer(OutStream, llvm::endianness::little); // Reserve four bytes for the bucket offset. Writer.write(0); diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h index 8a59d0444e367..f7d362b5d70ce 100644 --- a/llvm/include/llvm/Bitstream/BitstreamWriter.h +++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h @@ -139,10 +139,11 @@ class BitstreamWriter { uint64_t NumOfFlushedBytes = GetNumOfFlushedBytes(); if (ByteNo >= NumOfFlushedBytes) { - assert((!endian::readAtBitAlignment( + assert((!endian::readAtBitAlignment( &Out[ByteNo - NumOfFlushedBytes], StartBit)) && "Expected to be patching over 0-value placeholders"); - endian::writeAtBitAlignment( + endian::writeAtBitAlignment( &Out[ByteNo - NumOfFlushedBytes], NewByte, StartBit); return; } @@ -171,14 +172,14 @@ class BitstreamWriter { assert(BytesRead >= 0 && static_cast(BytesRead) == BytesFromDisk); for (size_t i = 0; i < BytesFromBuffer; ++i) Bytes[BytesFromDisk + i] = Out[i]; - assert((!endian::readAtBitAlignment( - Bytes, StartBit)) && + assert((!endian::readAtBitAlignment(Bytes, StartBit)) && "Expected to be patching over 0-value placeholders"); } // Update Bytes in terms of bit offset and value. - endian::writeAtBitAlignment(Bytes, NewByte, - StartBit); + endian::writeAtBitAlignment( + Bytes, NewByte, StartBit); // Copy updated data back to the file FS and the buffer Out. FS->seek(ByteNo); diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index 172b4c9f61875..5f54cbeb1b01e 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -500,8 +500,10 @@ class InstrProfLookupTrait { ReadKeyDataLength(const unsigned char *&D) { using namespace support; - offset_type KeyLen = endian::readNext(D); - offset_type DataLen = endian::readNext(D); + offset_type KeyLen = + endian::readNext(D); + offset_type DataLen = + endian::readNext(D); return std::make_pair(KeyLen, DataLen); } diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index d0ba5b10be02e..1a066c10c1361 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -50,7 +50,7 @@ struct PortableMemInfoBlock { switch (Id) { #define MIBEntryDef(NameTag, Name, Type) \ case Meta::Name: { \ - Name = endian::readNext(Ptr); \ + Name = endian::readNext(Ptr); \ } break; #include "llvm/ProfileData/MIBEntryDef.inc" #undef MIBEntryDef @@ -66,7 +66,7 @@ struct PortableMemInfoBlock { void serialize(const MemProfSchema &Schema, raw_ostream &OS) const { using namespace support; - endian::Writer LE(OS, little); + endian::Writer LE(OS, llvm::endianness::little); for (const Meta Id : Schema) { switch (Id) { #define MIBEntryDef(NameTag, Name, Type) \ @@ -187,7 +187,7 @@ struct Frame { void serialize(raw_ostream &OS) const { using namespace support; - endian::Writer LE(OS, little); + endian::Writer LE(OS, llvm::endianness::little); // If the type of the GlobalValue::GUID changes, then we need to update // the reader and the writer. @@ -204,10 +204,14 @@ struct Frame { static Frame deserialize(const unsigned char *Ptr) { using namespace support; - const uint64_t F = endian::readNext(Ptr); - const uint32_t L = endian::readNext(Ptr); - const uint32_t C = endian::readNext(Ptr); - const bool I = endian::readNext(Ptr); + const uint64_t F = + endian::readNext(Ptr); + const uint32_t L = + endian::readNext(Ptr); + const uint32_t C = + endian::readNext(Ptr); + const bool I = + endian::readNext(Ptr); return Frame(/*Function=*/F, /*LineOffset=*/L, /*Column=*/C, /*IsInlineFrame=*/I); } @@ -466,14 +470,17 @@ class RecordLookupTrait { ReadKeyDataLength(const unsigned char *&D) { using namespace support; - offset_type KeyLen = endian::readNext(D); - offset_type DataLen = endian::readNext(D); + offset_type KeyLen = + endian::readNext(D); + offset_type DataLen = + endian::readNext(D); return std::make_pair(KeyLen, DataLen); } uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { using namespace support; - return endian::readNext(D); + return endian::readNext(D); } data_type ReadData(uint64_t K, const unsigned char *D, @@ -514,7 +521,7 @@ class RecordWriterTrait { EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { using namespace support; - endian::Writer LE(Out, little); + endian::Writer LE(Out, llvm::endianness::little); offset_type N = sizeof(K); LE.write(N); offset_type M = V.serializedSize(); @@ -524,7 +531,7 @@ class RecordWriterTrait { void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) { using namespace support; - endian::Writer LE(Out, little); + endian::Writer LE(Out, llvm::endianness::little); LE.write(K); } @@ -552,7 +559,7 @@ class FrameWriterTrait { static std::pair EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { using namespace support; - endian::Writer LE(Out, little); + endian::Writer LE(Out, llvm::endianness::little); offset_type N = sizeof(K); LE.write(N); offset_type M = V.serializedSize(); @@ -562,7 +569,7 @@ class FrameWriterTrait { void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) { using namespace support; - endian::Writer LE(Out, little); + endian::Writer LE(Out, llvm::endianness::little); LE.write(K); } @@ -593,14 +600,17 @@ class FrameLookupTrait { ReadKeyDataLength(const unsigned char *&D) { using namespace support; - offset_type KeyLen = endian::readNext(D); - offset_type DataLen = endian::readNext(D); + offset_type KeyLen = + endian::readNext(D); + offset_type DataLen = + endian::readNext(D); return std::make_pair(KeyLen, DataLen); } uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { using namespace support; - return endian::readNext(D); + return endian::readNext(D); } data_type ReadData(uint64_t K, const unsigned char *D, diff --git a/llvm/include/llvm/Support/Endian.h b/llvm/include/llvm/Support/Endian.h index 808446e615458..d4fc6b59e252f 100644 --- a/llvm/include/llvm/Support/Endian.h +++ b/llvm/include/llvm/Support/Endian.h @@ -53,7 +53,7 @@ constexpr endianness system_endianness() { return llvm::endianness::native; } template [[nodiscard]] inline value_type byte_swap(value_type value, endianness endian) { - if (endian != native) + if (endian != llvm::endianness::native) sys::swapByteOrder(value); return value; } @@ -273,85 +273,120 @@ struct packed_endian_specific_integral { } // end namespace detail using ulittle16_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using ulittle32_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using ulittle64_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using little16_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using little32_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using little64_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using aligned_ulittle16_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using aligned_ulittle32_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using aligned_ulittle64_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using aligned_little16_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using aligned_little32_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using aligned_little64_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using ubig16_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using ubig32_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using ubig64_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using big16_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using big32_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using big64_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using aligned_ubig16_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using aligned_ubig32_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using aligned_ubig64_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using aligned_big16_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using aligned_big32_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using aligned_big64_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using unaligned_uint16_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using unaligned_uint32_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using unaligned_uint64_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using unaligned_int16_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using unaligned_int32_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; using unaligned_int64_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; template -using little_t = detail::packed_endian_specific_integral; +using little_t = + detail::packed_endian_specific_integral; template -using big_t = detail::packed_endian_specific_integral; +using big_t = detail::packed_endian_specific_integral; template using aligned_little_t = - detail::packed_endian_specific_integral; + detail::packed_endian_specific_integral; template -using aligned_big_t = detail::packed_endian_specific_integral; +using aligned_big_t = + detail::packed_endian_specific_integral; namespace endian { @@ -380,17 +415,23 @@ template [[nodiscard]] inline uint64_t read64(const void *P) { } [[nodiscard]] inline uint16_t read16le(const void *P) { - return read16(P); + return read16(P); } [[nodiscard]] inline uint32_t read32le(const void *P) { - return read32(P); + return read32(P); } [[nodiscard]] inline uint64_t read64le(const void *P) { - return read64(P); + return read64(P); +} +[[nodiscard]] inline uint16_t read16be(const void *P) { + return read16(P); +} +[[nodiscard]] inline uint32_t read32be(const void *P) { + return read32(P); +} +[[nodiscard]] inline uint64_t read64be(const void *P) { + return read64(P); } -[[nodiscard]] inline uint16_t read16be(const void *P) { return read16(P); } -[[nodiscard]] inline uint32_t read32be(const void *P) { return read32(P); } -[[nodiscard]] inline uint64_t read64be(const void *P) { return read64(P); } template inline void write(void *P, T V) { *(detail::packed_endian_specific_integral *)P = V; @@ -416,12 +457,24 @@ template inline void write64(void *P, uint64_t V) { write(P, V); } -inline void write16le(void *P, uint16_t V) { write16(P, V); } -inline void write32le(void *P, uint32_t V) { write32(P, V); } -inline void write64le(void *P, uint64_t V) { write64(P, V); } -inline void write16be(void *P, uint16_t V) { write16(P, V); } -inline void write32be(void *P, uint32_t V) { write32(P, V); } -inline void write64be(void *P, uint64_t V) { write64(P, V); } +inline void write16le(void *P, uint16_t V) { + write16(P, V); +} +inline void write32le(void *P, uint32_t V) { + write32(P, V); +} +inline void write64le(void *P, uint64_t V) { + write64(P, V); +} +inline void write16be(void *P, uint16_t V) { + write16(P, V); +} +inline void write32be(void *P, uint32_t V) { + write32(P, V); +} +inline void write64be(void *P, uint64_t V) { + write64(P, V); +} } // end namespace endian diff --git a/llvm/include/llvm/Support/MD5.h b/llvm/include/llvm/Support/MD5.h index 61af6002696ad..0e9f22d3bfdb4 100644 --- a/llvm/include/llvm/Support/MD5.h +++ b/llvm/include/llvm/Support/MD5.h @@ -47,12 +47,12 @@ class MD5 { // Our MD5 implementation returns the result in little endian, so the low // word is first. using namespace support; - return endian::read(data()); + return endian::read(data()); } uint64_t high() const { using namespace support; - return endian::read(data() + 8); + return endian::read(data() + 8); } std::pair words() const { using namespace support; diff --git a/llvm/include/llvm/Support/OnDiskHashTable.h b/llvm/include/llvm/Support/OnDiskHashTable.h index bb90d8fc3ac7d..0a8cbbd8b1883 100644 --- a/llvm/include/llvm/Support/OnDiskHashTable.h +++ b/llvm/include/llvm/Support/OnDiskHashTable.h @@ -149,7 +149,7 @@ template class OnDiskChainedHashTableGenerator { /// Uses the provided Info instead of a stack allocated one. offset_type Emit(raw_ostream &Out, Info &InfoObj) { using namespace llvm::support; - endian::Writer LE(Out, little); + endian::Writer LE(Out, llvm::endianness::little); // Now we're done adding entries, resize the bucket list if it's // significantly too large. (This only happens if the number of @@ -304,9 +304,11 @@ template class OnDiskChainedHashTable { "buckets should be 4-byte aligned."); using namespace llvm::support; offset_type NumBuckets = - endian::readNext(Buckets); + endian::readNext( + Buckets); offset_type NumEntries = - endian::readNext(Buckets); + endian::readNext( + Buckets); return std::make_pair(NumBuckets, NumEntries); } @@ -357,19 +359,23 @@ template class OnDiskChainedHashTable { offset_type Idx = KeyHash & (NumBuckets - 1); const unsigned char *Bucket = Buckets + sizeof(offset_type) * Idx; - offset_type Offset = endian::readNext(Bucket); + offset_type Offset = + endian::readNext( + Bucket); if (Offset == 0) return iterator(); // Empty bucket. const unsigned char *Items = Base + Offset; // 'Items' starts with a 16-bit unsigned integer representing the // number of items in this bucket. - unsigned Len = endian::readNext(Items); + unsigned Len = + endian::readNext(Items); for (unsigned i = 0; i < Len; ++i) { // Read the hash. hash_value_type ItemHash = - endian::readNext(Items); + endian::readNext(Items); // Determine the length of the key and the data. const std::pair &L = @@ -467,7 +473,8 @@ class OnDiskIterableChainedHashTable : public OnDiskChainedHashTable { // 'Items' starts with a 16-bit unsigned integer representing the // number of items in this bucket. NumItemsInBucketLeft = - endian::readNext(Ptr); + endian::readNext( + Ptr); } Ptr += sizeof(hash_value_type); // Skip the hash. // Determine the length of the key and the data. diff --git a/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp b/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp index 10409b9bdb2aa..4aed649666544 100644 --- a/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp @@ -405,10 +405,10 @@ Error applyFixupData(LinkGraph &G, Block &B, const Edge &E) { auto Write32 = [FixupPtr, Endian = G.getEndianness()](int64_t Value) { assert(isInt<32>(Value) && "Must be in signed 32-bit range"); uint32_t Imm = static_cast(Value); - if (LLVM_LIKELY(Endian == little)) - endian::write32(FixupPtr, Imm); + if (LLVM_LIKELY(Endian == llvm::endianness::little)) + endian::write32(FixupPtr, Imm); else - endian::write32(FixupPtr, Imm); + endian::write32(FixupPtr, Imm); }; Edge::Kind Kind = E.getKind(); diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp index ec9d0865888e4..eb3894dbb3c25 100644 --- a/llvm/lib/MC/MCPseudoProbe.cpp +++ b/llvm/lib/MC/MCPseudoProbe.cpp @@ -343,7 +343,7 @@ template ErrorOr MCPseudoProbeDecoder::readUnencodedNumber() { if (Data + sizeof(T) > End) { return std::error_code(); } - T Val = endian::readNext(Data); + T Val = endian::readNext(Data); return ErrorOr(Val); } diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 237caaaeca5a2..ddc11304742df 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -1022,10 +1022,10 @@ template static T swapToHostOrder(const unsigned char *&D, llvm::endianness Orig) { using namespace support; - if (Orig == little) - return endian::readNext(D); + if (Orig == llvm::endianness::little) + return endian::readNext(D); else - return endian::readNext(D); + return endian::readNext(D); } static std::unique_ptr allocValueProfData(uint32_t TotalSize) { @@ -1449,7 +1449,7 @@ static inline uint64_t read(const unsigned char *Buffer, size_t Offset) { uint64_t Header::formatVersion() const { using namespace support; - return endian::byte_swap(Version); + return endian::byte_swap(Version); } Expected

Header::readFromBuffer(const unsigned char *Buffer) { @@ -1461,7 +1461,8 @@ Expected
Header::readFromBuffer(const unsigned char *Buffer) { H.Magic = read(Buffer, offsetOf(&Header::Magic)); // Check the magic number. - uint64_t Magic = endian::byte_swap(H.Magic); + uint64_t Magic = + endian::byte_swap(H.Magic); if (Magic != IndexedInstrProf::Magic) return make_error(instrprof_error::bad_magic); diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 3800b23843fa9..a920a31d0a4b2 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -112,10 +112,11 @@ readBinaryIdsInternal(const MemoryBuffer &DataBuffer, "not enough data to read binary id length"); uint64_t BILen = 0; - if (Endian == little) - BILen = endian::readNext(BI); + if (Endian == llvm::endianness::little) + BILen = + endian::readNext(BI); else - BILen = endian::readNext(BI); + BILen = endian::readNext(BI); if (BILen == 0) return make_error(instrprof_error::malformed, @@ -800,7 +801,8 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, // Read hash. if (D + sizeof(uint64_t) >= End) return data_type(); - uint64_t Hash = endian::readNext(D); + uint64_t Hash = + endian::readNext(D); // Initialize number of counters for GET_VERSION(FormatVersion) == 1. uint64_t CountsSize = N / sizeof(uint64_t) - 1; @@ -808,7 +810,8 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) { if (D + sizeof(uint64_t) > End) return data_type(); - CountsSize = endian::readNext(D); + CountsSize = + endian::readNext(D); } // Read counter values. if (D + CountsSize * sizeof(uint64_t) > End) @@ -817,7 +820,8 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, CounterBuffer.clear(); CounterBuffer.reserve(CountsSize); for (uint64_t J = 0; J < CountsSize; ++J) - CounterBuffer.push_back(endian::readNext(D)); + CounterBuffer.push_back( + endian::readNext(D)); DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer)); @@ -1001,8 +1005,8 @@ bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { if (DataBuffer.getBufferSize() < 8) return false; - uint64_t Magic = - endian::read(DataBuffer.getBufferStart()); + uint64_t Magic = endian::read( + DataBuffer.getBufferStart()); // Verify that it's magical. return Magic == IndexedInstrProf::Magic; } @@ -1016,10 +1020,10 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, if (Version >= IndexedInstrProf::Version4) { const IndexedInstrProf::Summary *SummaryInLE = reinterpret_cast(Cur); - uint64_t NFields = - endian::byte_swap(SummaryInLE->NumSummaryFields); - uint64_t NEntries = - endian::byte_swap(SummaryInLE->NumCutoffEntries); + uint64_t NFields = endian::byte_swap( + SummaryInLE->NumSummaryFields); + uint64_t NEntries = endian::byte_swap( + SummaryInLE->NumCutoffEntries); uint32_t SummarySize = IndexedInstrProf::Summary::getSize(NFields, NEntries); std::unique_ptr SummaryData = @@ -1028,7 +1032,7 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, const uint64_t *Src = reinterpret_cast(SummaryInLE); uint64_t *Dst = reinterpret_cast(SummaryData.get()); for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) - Dst[I] = endian::byte_swap(Src[I]); + Dst[I] = endian::byte_swap(Src[I]); SummaryEntryVector DetailedSummary; for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) { @@ -1085,11 +1089,12 @@ Error IndexedInstrProfReader::readHeader() { /* UseCS */ true); // Read the hash type and start offset. IndexedInstrProf::HashT HashType = static_cast( - endian::byte_swap(Header->HashType)); + endian::byte_swap(Header->HashType)); if (HashType > IndexedInstrProf::HashT::Last) return error(instrprof_error::unsupported_hash_type); - uint64_t HashOffset = endian::byte_swap(Header->HashOffset); + uint64_t HashOffset = + endian::byte_swap(Header->HashOffset); // The hash table with profile counts comes next. auto IndexPtr = std::make_unique>( @@ -1100,19 +1105,23 @@ Error IndexedInstrProfReader::readHeader() { if (GET_VERSION(Header->formatVersion()) >= 8 && Header->formatVersion() & VARIANT_MASK_MEMPROF) { uint64_t MemProfOffset = - endian::byte_swap(Header->MemProfOffset); + endian::byte_swap( + Header->MemProfOffset); const unsigned char *Ptr = Start + MemProfOffset; // The value returned from RecordTableGenerator.Emit. const uint64_t RecordTableOffset = - support::endian::readNext(Ptr); + support::endian::readNext(Ptr); // The offset in the stream right before invoking // FrameTableGenerator.Emit. const uint64_t FramePayloadOffset = - support::endian::readNext(Ptr); + support::endian::readNext(Ptr); // The value returned from FrameTableGenerator.Emit. const uint64_t FrameTableOffset = - support::endian::readNext(Ptr); + support::endian::readNext(Ptr); // Read the schema. auto SchemaOr = memprof::readMemProfSchema(Ptr); @@ -1137,10 +1146,13 @@ Error IndexedInstrProfReader::readHeader() { // is higher than 9 (when it was introduced). if (GET_VERSION(Header->formatVersion()) >= 9) { uint64_t BinaryIdOffset = - endian::byte_swap(Header->BinaryIdOffset); + endian::byte_swap( + Header->BinaryIdOffset); const unsigned char *Ptr = Start + BinaryIdOffset; // Read binary ids size. - BinaryIdsSize = support::endian::readNext(Ptr); + BinaryIdsSize = + support::endian::readNext(Ptr); if (BinaryIdsSize % sizeof(uint64_t)) return error(instrprof_error::bad_header); // Set the binary ids start. @@ -1153,31 +1165,37 @@ Error IndexedInstrProfReader::readHeader() { if (GET_VERSION(Header->formatVersion()) >= 10 && Header->formatVersion() & VARIANT_MASK_TEMPORAL_PROF) { uint64_t TemporalProfTracesOffset = - endian::byte_swap(Header->TemporalProfTracesOffset); + endian::byte_swap( + Header->TemporalProfTracesOffset); const unsigned char *Ptr = Start + TemporalProfTracesOffset; const auto *PtrEnd = (const unsigned char *)DataBuffer->getBufferEnd(); // Expect at least two 64 bit fields: NumTraces, and TraceStreamSize if (Ptr + 2 * sizeof(uint64_t) > PtrEnd) return error(instrprof_error::truncated); const uint64_t NumTraces = - support::endian::readNext(Ptr); + support::endian::readNext(Ptr); TemporalProfTraceStreamSize = - support::endian::readNext(Ptr); + support::endian::readNext(Ptr); for (unsigned i = 0; i < NumTraces; i++) { // Expect at least two 64 bit fields: Weight and NumFunctions if (Ptr + 2 * sizeof(uint64_t) > PtrEnd) return error(instrprof_error::truncated); TemporalProfTraceTy Trace; Trace.Weight = - support::endian::readNext(Ptr); + support::endian::readNext(Ptr); const uint64_t NumFunctions = - support::endian::readNext(Ptr); + support::endian::readNext(Ptr); // Expect at least NumFunctions 64 bit fields if (Ptr + NumFunctions * sizeof(uint64_t) > PtrEnd) return error(instrprof_error::truncated); for (unsigned j = 0; j < NumFunctions; j++) { const uint64_t NameRef = - support::endian::readNext(Ptr); + support::endian::readNext(Ptr); Trace.FunctionNameRefs.push_back(NameRef); } TemporalProfTraces.push_back(std::move(Trace)); diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 2873e06266e44..6892654b00ea4 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -80,7 +80,8 @@ class ProfOStream { std::string &Data = SOStream.str(); // with flush for (int K = 0; K < NItems; K++) { for (int I = 0; I < P[K].N; I++) { - uint64_t Bytes = endian::byte_swap(P[K].D[I]); + uint64_t Bytes = + endian::byte_swap(P[K].D[I]); Data.replace(P[K].Pos + I * sizeof(uint64_t), sizeof(uint64_t), (const char *)&Bytes, sizeof(uint64_t)); } @@ -120,7 +121,7 @@ class InstrProfRecordWriterTrait { EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { using namespace support; - endian::Writer LE(Out, little); + endian::Writer LE(Out, llvm::endianness::little); offset_type N = K.size(); LE.write(N); @@ -147,7 +148,7 @@ class InstrProfRecordWriterTrait { void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type) { using namespace support; - endian::Writer LE(Out, little); + endian::Writer LE(Out, llvm::endianness::little); for (const auto &ProfileData : *V) { const InstrProfRecord &ProfRecord = ProfileData.second; if (NamedInstrProfRecord::hasCSFlagInHash(ProfileData.first)) diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index 3255cba4dd0ca..db34de704a3c3 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -13,7 +13,7 @@ void IndexedMemProfRecord::serialize(const MemProfSchema &Schema, raw_ostream &OS) { using namespace support; - endian::Writer LE(OS, little); + endian::Writer LE(OS, llvm::endianness::little); LE.write(AllocSites.size()); for (const IndexedAllocationInfo &N : AllocSites) { @@ -40,13 +40,15 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, IndexedMemProfRecord Record; // Read the meminfo nodes. - const uint64_t NumNodes = endian::readNext(Ptr); + const uint64_t NumNodes = + endian::readNext(Ptr); for (uint64_t I = 0; I < NumNodes; I++) { IndexedAllocationInfo Node; const uint64_t NumFrames = - endian::readNext(Ptr); + endian::readNext(Ptr); for (uint64_t J = 0; J < NumFrames; J++) { - const FrameId Id = endian::readNext(Ptr); + const FrameId Id = + endian::readNext(Ptr); Node.CallStack.push_back(Id); } Node.Info.deserialize(Schema, Ptr); @@ -55,14 +57,16 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, } // Read the callsite information. - const uint64_t NumCtxs = endian::readNext(Ptr); + const uint64_t NumCtxs = + endian::readNext(Ptr); for (uint64_t J = 0; J < NumCtxs; J++) { const uint64_t NumFrames = - endian::readNext(Ptr); + endian::readNext(Ptr); llvm::SmallVector Frames; Frames.reserve(NumFrames); for (uint64_t K = 0; K < NumFrames; K++) { - const FrameId Id = endian::readNext(Ptr); + const FrameId Id = + endian::readNext(Ptr); Frames.push_back(Id); } Record.CallSites.push_back(Frames); @@ -90,7 +94,7 @@ Expected readMemProfSchema(const unsigned char *&Buffer) { const unsigned char *Ptr = Buffer; const uint64_t NumSchemaIds = - endian::readNext(Ptr); + endian::readNext(Ptr); if (NumSchemaIds > static_cast(Meta::Size)) { return make_error(instrprof_error::malformed, "memprof schema invalid"); @@ -98,7 +102,8 @@ Expected readMemProfSchema(const unsigned char *&Buffer) { MemProfSchema Result; for (size_t I = 0; I < NumSchemaIds; I++) { - const uint64_t Tag = endian::readNext(Ptr); + const uint64_t Tag = + endian::readNext(Ptr); if (Tag >= static_cast(Meta::Size)) { return make_error(instrprof_error::malformed, "memprof schema invalid"); diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp index 0716ec53ce3f4..284e5ec634652 100644 --- a/llvm/lib/ProfileData/RawMemProfReader.cpp +++ b/llvm/lib/ProfileData/RawMemProfReader.cpp @@ -87,7 +87,7 @@ llvm::SmallVector readSegmentEntries(const char *Ptr) { using namespace support; const uint64_t NumItemsToRead = - endian::readNext(Ptr); + endian::readNext(Ptr); llvm::SmallVector Items; for (uint64_t I = 0; I < NumItemsToRead; I++) { Items.push_back(*reinterpret_cast( @@ -101,10 +101,11 @@ readMemInfoBlocks(const char *Ptr) { using namespace support; const uint64_t NumItemsToRead = - endian::readNext(Ptr); + endian::readNext(Ptr); llvm::SmallVector> Items; for (uint64_t I = 0; I < NumItemsToRead; I++) { - const uint64_t Id = endian::readNext(Ptr); + const uint64_t Id = + endian::readNext(Ptr); const MemInfoBlock MIB = *reinterpret_cast(Ptr); Items.push_back({Id, MIB}); // Only increment by size of MIB since readNext implicitly increments. @@ -117,16 +118,19 @@ CallStackMap readStackInfo(const char *Ptr) { using namespace support; const uint64_t NumItemsToRead = - endian::readNext(Ptr); + endian::readNext(Ptr); CallStackMap Items; for (uint64_t I = 0; I < NumItemsToRead; I++) { - const uint64_t StackId = endian::readNext(Ptr); - const uint64_t NumPCs = endian::readNext(Ptr); + const uint64_t StackId = + endian::readNext(Ptr); + const uint64_t NumPCs = + endian::readNext(Ptr); SmallVector CallStack; for (uint64_t J = 0; J < NumPCs; J++) { - CallStack.push_back(endian::readNext(Ptr)); + CallStack.push_back( + endian::readNext(Ptr)); } Items[StackId] = CallStack; diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index e8e468ed7370c..256bdb833a0b1 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -502,7 +502,7 @@ ErrorOr SampleProfileReaderBinary::readUnencodedNumber() { } using namespace support; - T Val = endian::readNext(Data); + T Val = endian::readNext(Data); return Val; } @@ -531,8 +531,8 @@ SampleProfileReaderBinary::readStringFromTable(size_t *RetIdx) { if (!SR.data()) { assert(MD5NameMemStart); using namespace support; - uint64_t FID = endian::read(MD5NameMemStart + - (*Idx) * sizeof(uint64_t)); + uint64_t FID = endian::read( + MD5NameMemStart + (*Idx) * sizeof(uint64_t)); SR = MD5StringBuf.emplace_back(std::to_string(FID)); } if (RetIdx) diff --git a/llvm/unittests/DebugInfo/MSF/MappedBlockStreamTest.cpp b/llvm/unittests/DebugInfo/MSF/MappedBlockStreamTest.cpp index 3da19b90a28a9..d1f04e9b28a34 100644 --- a/llvm/unittests/DebugInfo/MSF/MappedBlockStreamTest.cpp +++ b/llvm/unittests/DebugInfo/MSF/MappedBlockStreamTest.cpp @@ -34,7 +34,9 @@ class DiscontiguousStream : public WritableBinaryStream { uint32_t block_size() const { return 1; } uint32_t block_count() const { return Blocks.size(); } - endianness getEndian() const override { return little; } + llvm::endianness getEndian() const override { + return llvm::endianness::little; + } Error readBytes(uint64_t Offset, uint64_t Size, ArrayRef &Buffer) override { @@ -412,7 +414,7 @@ TEST(MappedBlockStreamTest, TestWriteContiguousStreamRef) { F.block_size(), F.layout(), F, F.Allocator); // First write "Test Str" into the source stream. - MutableBinaryByteStream SourceStream(SrcData, little); + MutableBinaryByteStream SourceStream(SrcData, llvm::endianness::little); BinaryStreamWriter SourceWriter(SourceStream); EXPECT_THAT_ERROR(SourceWriter.writeCString("Test Str"), Succeeded()); EXPECT_EQ(SrcDataBytes, std::vector( diff --git a/llvm/unittests/DebugInfo/PDB/HashTableTest.cpp b/llvm/unittests/DebugInfo/PDB/HashTableTest.cpp index 57f0dcf23db02..6d17332f49079 100644 --- a/llvm/unittests/DebugInfo/PDB/HashTableTest.cpp +++ b/llvm/unittests/DebugInfo/PDB/HashTableTest.cpp @@ -147,7 +147,7 @@ TEST(HashTableTest, Serialization) { } std::vector Buffer(Table.calculateSerializedLength()); - MutableBinaryByteStream Stream(Buffer, little); + MutableBinaryByteStream Stream(Buffer, llvm::endianness::little); BinaryStreamWriter Writer(Stream); EXPECT_THAT_ERROR(Table.commit(Writer), Succeeded()); // We should have written precisely the number of bytes we calculated earlier. @@ -251,7 +251,7 @@ TEST(HashTableTest, NonTrivialValueType) { } std::vector Buffer(Table.calculateSerializedLength()); - MutableBinaryByteStream Stream(Buffer, little); + MutableBinaryByteStream Stream(Buffer, llvm::endianness::little); BinaryStreamWriter Writer(Stream); EXPECT_THAT_ERROR(Table.commit(Writer), Succeeded()); // We should have written precisely the number of bytes we calculated earlier. diff --git a/llvm/unittests/DebugInfo/PDB/StringTableBuilderTest.cpp b/llvm/unittests/DebugInfo/PDB/StringTableBuilderTest.cpp index 2f7c061944064..1253f7c7ead7c 100644 --- a/llvm/unittests/DebugInfo/PDB/StringTableBuilderTest.cpp +++ b/llvm/unittests/DebugInfo/PDB/StringTableBuilderTest.cpp @@ -47,12 +47,12 @@ TEST(StringTableBuilderTest, Simple) { EXPECT_EQ(6U, Distinct.size()); std::vector Buffer(Builder.calculateSerializedSize()); - MutableBinaryByteStream OutStream(Buffer, little); + MutableBinaryByteStream OutStream(Buffer, llvm::endianness::little); BinaryStreamWriter Writer(OutStream); EXPECT_THAT_ERROR(Builder.commit(Writer), Succeeded()); // Reads the contents back. - BinaryByteStream InStream(Buffer, little); + BinaryByteStream InStream(Buffer, llvm::endianness::little); BinaryStreamReader Reader(InStream); PDBStringTable Table; EXPECT_THAT_ERROR(Table.reload(Reader), Succeeded()); diff --git a/llvm/unittests/ExecutionEngine/JITLink/AArch32Tests.cpp b/llvm/unittests/ExecutionEngine/JITLink/AArch32Tests.cpp index 3f581445a2d62..dcc8d3b237ff3 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/AArch32Tests.cpp +++ b/llvm/unittests/ExecutionEngine/JITLink/AArch32Tests.cpp @@ -110,9 +110,11 @@ TEST(AArch32_Relocations, Thumb_Call_J1J2) { constexpr HalfWords ImmMask = FixupInfo::ImmMask; static std::array MemPresets{ - makeHalfWords({0xff, 0xf7, 0xfe, 0xef}), // common - makeHalfWords({0x00, 0x00, 0x00, 0x00}), // zeros - makeHalfWords({0xff, 0xff, 0xff, 0xff}), // ones + makeHalfWords( + {0xff, 0xf7, 0xfe, 0xef}), // common + makeHalfWords( + {0x00, 0x00, 0x00, 0x00}), // zeros + makeHalfWords({0xff, 0xff, 0xff, 0xff}), // ones }; auto EncodeDecode = [ImmMask](int64_t In, MutableHalfWords &Mem) { @@ -146,9 +148,11 @@ TEST(AArch32_Relocations, Thumb_Call_Bare) { constexpr HalfWords ImmMask = FixupInfo::ImmMask; static std::array MemPresets{ - makeHalfWords({0xff, 0xf7, 0xfe, 0xef}), // common - makeHalfWords({0x00, 0x00, 0x00, 0x00}), // zeros - makeHalfWords({0xff, 0xff, 0xff, 0xff}), // ones + makeHalfWords( + {0xff, 0xf7, 0xfe, 0xef}), // common + makeHalfWords( + {0x00, 0x00, 0x00, 0x00}), // zeros + makeHalfWords({0xff, 0xff, 0xff, 0xff}), // ones }; auto EncodeDecode = [ImmMask](int64_t In, MutableHalfWords &Mem) { @@ -217,9 +221,11 @@ TEST(AArch32_Relocations, Thumb_MovtAbs) { static std::array Registers{0, 5, 12}; static std::array MemPresets{ - makeHalfWords({0xff, 0xf7, 0xfe, 0xef}), // common - makeHalfWords({0x00, 0x00, 0x00, 0x00}), // zeros - makeHalfWords({0xff, 0xff, 0xff, 0xff}), // ones + makeHalfWords( + {0xff, 0xf7, 0xfe, 0xef}), // common + makeHalfWords( + {0x00, 0x00, 0x00, 0x00}), // zeros + makeHalfWords({0xff, 0xff, 0xff, 0xff}), // ones }; auto EncodeDecode = [ImmMask](uint32_t In, MutableHalfWords &Mem) { diff --git a/llvm/unittests/Support/BinaryStreamTest.cpp b/llvm/unittests/Support/BinaryStreamTest.cpp index 037aa596e7bba..70cd4036fb2a6 100644 --- a/llvm/unittests/Support/BinaryStreamTest.cpp +++ b/llvm/unittests/Support/BinaryStreamTest.cpp @@ -102,7 +102,8 @@ class BrokenStream : public WritableBinaryStream { BumpPtrAllocator Allocator; }; -constexpr endianness Endians[] = {big, little, native}; +constexpr llvm::endianness Endians[] = { + llvm::endianness::big, llvm::endianness::little, llvm::endianness::native}; constexpr uint32_t NumEndians = std::size(Endians); constexpr uint32_t NumStreams = 2 * NumEndians; @@ -931,7 +932,7 @@ TEST_F(BinaryStreamTest, BinaryItemStream) { Objects.push_back(BinaryItemStreamObject(Buffer)); } - BinaryItemStream ItemStream(big); + BinaryItemStream ItemStream(llvm::endianness::big); ItemStream.setItems(Objects); BinaryStreamReader Reader(ItemStream); diff --git a/llvm/unittests/Support/EndianStreamTest.cpp b/llvm/unittests/Support/EndianStreamTest.cpp index 1e800ff5570b9..2bab71c547b1e 100644 --- a/llvm/unittests/Support/EndianStreamTest.cpp +++ b/llvm/unittests/Support/EndianStreamTest.cpp @@ -20,7 +20,7 @@ TEST(EndianStream, WriteInt32LE) { { raw_svector_ostream OS(data); - endian::Writer LE(OS, little); + endian::Writer LE(OS, llvm::endianness::little); LE.write(static_cast(-1362446643)); } @@ -35,7 +35,7 @@ TEST(EndianStream, WriteInt32BE) { { raw_svector_ostream OS(data); - endian::Writer BE(OS, big); + endian::Writer BE(OS, llvm::endianness::big); BE.write(static_cast(-1362446643)); } @@ -51,7 +51,7 @@ TEST(EndianStream, WriteFloatLE) { { raw_svector_ostream OS(data); - endian::Writer LE(OS, little); + endian::Writer LE(OS, llvm::endianness::little); LE.write(12345.0f); } @@ -66,7 +66,7 @@ TEST(EndianStream, WriteFloatBE) { { raw_svector_ostream OS(data); - endian::Writer BE(OS, big); + endian::Writer BE(OS, llvm::endianness::big); BE.write(12345.0f); } @@ -81,7 +81,7 @@ TEST(EndianStream, WriteInt64LE) { { raw_svector_ostream OS(data); - endian::Writer LE(OS, little); + endian::Writer LE(OS, llvm::endianness::little); LE.write(static_cast(-136244664332342323)); } @@ -100,7 +100,7 @@ TEST(EndianStream, WriteInt64BE) { { raw_svector_ostream OS(data); - endian::Writer BE(OS, big); + endian::Writer BE(OS, llvm::endianness::big); BE.write(static_cast(-136244664332342323)); } @@ -119,7 +119,7 @@ TEST(EndianStream, WriteDoubleLE) { { raw_svector_ostream OS(data); - endian::Writer LE(OS, little); + endian::Writer LE(OS, llvm::endianness::little); LE.write(-2349214918.58107); } @@ -138,7 +138,7 @@ TEST(EndianStream, WriteDoubleBE) { { raw_svector_ostream OS(data); - endian::Writer BE(OS, big); + endian::Writer BE(OS, llvm::endianness::big); BE.write(-2349214918.58107); } @@ -157,7 +157,7 @@ TEST(EndianStream, WriteArrayLE) { { raw_svector_ostream OS(Data); - endian::Writer LE(OS, little); + endian::Writer LE(OS, llvm::endianness::little); LE.write({0x1234, 0x5678}); } @@ -172,7 +172,7 @@ TEST(EndianStream, WriteVectorLE) { { raw_svector_ostream OS(Data); - endian::Writer LE(OS, little); + endian::Writer LE(OS, llvm::endianness::little); std::vector Vec{0x1234, 0x5678}; LE.write(Vec); } @@ -188,7 +188,7 @@ TEST(EndianStream, WriteFloatArrayLE) { { raw_svector_ostream OS(Data); - endian::Writer LE(OS, little); + endian::Writer LE(OS, llvm::endianness::little); LE.write({12345.0f, 12346.0f}); } diff --git a/llvm/unittests/Support/EndianTest.cpp b/llvm/unittests/Support/EndianTest.cpp index b5e4a9c8d14a3..ab7dfc3800691 100644 --- a/llvm/unittests/Support/EndianTest.cpp +++ b/llvm/unittests/Support/EndianTest.cpp @@ -23,12 +23,17 @@ TEST(Endian, Read) { unsigned char bigval[] = {0x00, 0x01, 0x02, 0x03, 0x04}; unsigned char littleval[] = {0x00, 0x04, 0x03, 0x02, 0x01}; int32_t BigAsHost = 0x00010203; - EXPECT_EQ(BigAsHost, (endian::read(bigval))); + EXPECT_EQ(BigAsHost, + (endian::read(bigval))); int32_t LittleAsHost = 0x02030400; - EXPECT_EQ(LittleAsHost,(endian::read(littleval))); + EXPECT_EQ( + LittleAsHost, + (endian::read(littleval))); - EXPECT_EQ((endian::read(bigval + 1)), - (endian::read(littleval + 1))); + EXPECT_EQ( + (endian::read(bigval + 1)), + (endian::read(littleval + + 1))); } TEST(Endian, ReadBitAligned) { @@ -36,35 +41,43 @@ TEST(Endian, ReadBitAligned) { unsigned char littleval[] = {0x3f, 0x00, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff}; unsigned char bigval[] = {0x00, 0x00, 0x00, 0x3f, 0xff, 0xff, 0xff, 0xc0}; EXPECT_EQ( - (endian::readAtBitAlignment(&littleval[0], 6)), + (endian::readAtBitAlignment( + &littleval[0], 6)), 0x0); - EXPECT_EQ((endian::readAtBitAlignment(&bigval[0], 6)), + EXPECT_EQ((endian::readAtBitAlignment( + &bigval[0], 6)), 0x0); // Test to make sure that signed right shift of 0xf0000000 is masked // properly. unsigned char littleval2[] = {0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x00}; unsigned char bigval2[] = {0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; EXPECT_EQ( - (endian::readAtBitAlignment(&littleval2[0], 4)), + (endian::readAtBitAlignment( + &littleval2[0], 4)), 0x0f000000); - EXPECT_EQ((endian::readAtBitAlignment(&bigval2[0], 4)), + EXPECT_EQ((endian::readAtBitAlignment( + &bigval2[0], 4)), 0x0f000000); // Test to make sure left shift of start bit doesn't overflow. EXPECT_EQ( - (endian::readAtBitAlignment(&littleval2[0], 1)), + (endian::readAtBitAlignment( + &littleval2[0], 1)), 0x78000000); - EXPECT_EQ((endian::readAtBitAlignment(&bigval2[0], 1)), + EXPECT_EQ((endian::readAtBitAlignment( + &bigval2[0], 1)), 0x78000000); // Test to make sure 64-bit int doesn't overflow. unsigned char littleval3[] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; unsigned char bigval3[] = {0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; - EXPECT_EQ((endian::readAtBitAlignment( - &littleval3[0], 4)), - 0x0f00000000000000); EXPECT_EQ( - (endian::readAtBitAlignment(&bigval3[0], 4)), + (endian::readAtBitAlignment( + &littleval3[0], 4)), + 0x0f00000000000000); + EXPECT_EQ( + (endian::readAtBitAlignment( + &bigval3[0], 4)), 0x0f00000000000000); } @@ -72,8 +85,8 @@ TEST(Endian, WriteBitAligned) { // This test ensures that signed right shift of 0xffffaa is masked // properly. unsigned char bigval[8] = {0x00}; - endian::writeAtBitAlignment(bigval, (int)0xffffaaaa, - 4); + endian::writeAtBitAlignment( + bigval, (int)0xffffaaaa, 4); EXPECT_EQ(bigval[0], 0xff); EXPECT_EQ(bigval[1], 0xfa); EXPECT_EQ(bigval[2], 0xaa); @@ -84,8 +97,8 @@ TEST(Endian, WriteBitAligned) { EXPECT_EQ(bigval[7], 0x0f); unsigned char littleval[8] = {0x00}; - endian::writeAtBitAlignment(littleval, - (int)0xffffaaaa, 4); + endian::writeAtBitAlignment( + littleval, (int)0xffffaaaa, 4); EXPECT_EQ(littleval[0], 0xa0); EXPECT_EQ(littleval[1], 0xaa); EXPECT_EQ(littleval[2], 0xfa); @@ -98,8 +111,8 @@ TEST(Endian, WriteBitAligned) { // This test makes sure 1<<31 doesn't overflow. // Test to make sure left shift of start bit doesn't overflow. unsigned char bigval2[8] = {0x00}; - endian::writeAtBitAlignment(bigval2, (int)0xffffffff, - 1); + endian::writeAtBitAlignment( + bigval2, (int)0xffffffff, 1); EXPECT_EQ(bigval2[0], 0xff); EXPECT_EQ(bigval2[1], 0xff); EXPECT_EQ(bigval2[2], 0xff); @@ -110,8 +123,8 @@ TEST(Endian, WriteBitAligned) { EXPECT_EQ(bigval2[7], 0x01); unsigned char littleval2[8] = {0x00}; - endian::writeAtBitAlignment(littleval2, - (int)0xffffffff, 1); + endian::writeAtBitAlignment( + littleval2, (int)0xffffffff, 1); EXPECT_EQ(littleval2[0], 0xfe); EXPECT_EQ(littleval2[1], 0xff); EXPECT_EQ(littleval2[2], 0xff); @@ -123,7 +136,7 @@ TEST(Endian, WriteBitAligned) { // Test to make sure 64-bit int doesn't overflow. unsigned char bigval64[16] = {0x00}; - endian::writeAtBitAlignment( + endian::writeAtBitAlignment( bigval64, (int64_t)0xffffffffffffffff, 1); EXPECT_EQ(bigval64[0], 0xff); EXPECT_EQ(bigval64[1], 0xff); @@ -143,7 +156,7 @@ TEST(Endian, WriteBitAligned) { EXPECT_EQ(bigval64[15], 0x01); unsigned char littleval64[16] = {0x00}; - endian::writeAtBitAlignment( + endian::writeAtBitAlignment( littleval64, (int64_t)0xffffffffffffffff, 1); EXPECT_EQ(littleval64[0], 0xfe); EXPECT_EQ(littleval64[1], 0xff); @@ -165,23 +178,26 @@ TEST(Endian, WriteBitAligned) { TEST(Endian, Write) { unsigned char data[5]; - endian::write(data, -1362446643); + endian::write(data, -1362446643); EXPECT_EQ(data[0], 0xAE); EXPECT_EQ(data[1], 0xCA); EXPECT_EQ(data[2], 0xB6); EXPECT_EQ(data[3], 0xCD); - endian::write(data + 1, -1362446643); + endian::write(data + 1, + -1362446643); EXPECT_EQ(data[1], 0xAE); EXPECT_EQ(data[2], 0xCA); EXPECT_EQ(data[3], 0xB6); EXPECT_EQ(data[4], 0xCD); - endian::write(data, -1362446643); + endian::write(data, + -1362446643); EXPECT_EQ(data[0], 0xCD); EXPECT_EQ(data[1], 0xB6); EXPECT_EQ(data[2], 0xCA); EXPECT_EQ(data[3], 0xAE); - endian::write(data + 1, -1362446643); + endian::write(data + 1, + -1362446643); EXPECT_EQ(data[1], 0xCD); EXPECT_EQ(data[2], 0xB6); EXPECT_EQ(data[3], 0xCA); From 93229c7bfd97429aa0ac55b45e618bdb013702b2 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Sat, 14 Oct 2023 10:52:34 +0400 Subject: [PATCH 128/720] [lldb] Add SBType::FindDirectNestedType() function (#68705) This patch adds a `SBType::FindDirectNestedType(name)` function which performs a non-recursive search in given class for a type with specified name. The intent is to perform a fast search in debug info, so that it can be used in formatters, and let them remain responsive. This is driven by my work on formatters for Clang and LLVM types. In particular, by [`PointerIntPairInfo::MaskAndShiftConstants`](https://github.com/llvm/llvm-project/blob/cde9f9df79805a0850310870d6dcc64004292727/llvm/include/llvm/ADT/PointerIntPair.h#L174C16-L174C16), which is required to extract pointer and integer from `PointerIntPair`. Related Discourse thread: https://discourse.llvm.org/t/traversing-member-types-of-a-type/72452 --- lldb/bindings/interface/SBTypeDocstrings.i | 8 +++++ lldb/include/lldb/API/SBType.h | 2 ++ lldb/include/lldb/Symbol/Type.h | 2 ++ lldb/include/lldb/Symbol/TypeSystem.h | 4 +++ lldb/source/API/SBType.cpp | 8 +++++ .../TypeSystem/Clang/TypeSystemClang.cpp | 7 +++++ .../TypeSystem/Clang/TypeSystemClang.h | 3 ++ lldb/source/Symbol/Type.cpp | 17 ++++++++++ lldb/source/Symbol/TypeSystem.cpp | 5 +++ lldb/test/API/python_api/type/TestTypeList.py | 31 +++++++++++++++++++ lldb/test/API/python_api/type/main.cpp | 5 +++ llvm/docs/ReleaseNotes.rst | 4 +++ 12 files changed, 96 insertions(+) diff --git a/lldb/bindings/interface/SBTypeDocstrings.i b/lldb/bindings/interface/SBTypeDocstrings.i index 96421a6aa2010..c49e9647ba046 100644 --- a/lldb/bindings/interface/SBTypeDocstrings.i +++ b/lldb/bindings/interface/SBTypeDocstrings.i @@ -720,6 +720,14 @@ SBType supports the eq/ne operator. For example,:: " ) lldb::SBType::GetTypeFlags; +%feature("docstring", + "Searches for a directly nested type that has the provided name. + + Returns the type if it was found. + Returns invalid type if nothing was found. + " +) lldb::SBType::FindDirectNestedType; + %feature("docstring", "Represents a list of :py:class:`SBType` s. diff --git a/lldb/include/lldb/API/SBType.h b/lldb/include/lldb/API/SBType.h index 5962f0c50dee1..9980fe1218305 100644 --- a/lldb/include/lldb/API/SBType.h +++ b/lldb/include/lldb/API/SBType.h @@ -215,6 +215,8 @@ class SBType { bool GetDescription(lldb::SBStream &description, lldb::DescriptionLevel description_level); + lldb::SBType FindDirectNestedType(const char *name); + lldb::SBType &operator=(const lldb::SBType &rhs); bool operator==(lldb::SBType &rhs); diff --git a/lldb/include/lldb/Symbol/Type.h b/lldb/include/lldb/Symbol/Type.h index d7bccae5f4135..c505262cd9eae 100644 --- a/lldb/include/lldb/Symbol/Type.h +++ b/lldb/include/lldb/Symbol/Type.h @@ -304,6 +304,8 @@ class TypeImpl { bool GetDescription(lldb_private::Stream &strm, lldb::DescriptionLevel description_level); + CompilerType FindDirectNestedType(llvm::StringRef name); + private: bool CheckModule(lldb::ModuleSP &module_sp) const; bool CheckExeModule(lldb::ModuleSP &module_sp) const; diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h index 56acb1db1546a..5ac16be3347ff 100644 --- a/lldb/include/lldb/Symbol/TypeSystem.h +++ b/lldb/include/lldb/Symbol/TypeSystem.h @@ -142,6 +142,10 @@ class TypeSystem : public PluginInterface, virtual lldb::LanguageType DeclContextGetLanguage(void *opaque_decl_ctx) = 0; + /// Returns the direct parent context of specified type + virtual CompilerDeclContext + GetCompilerDeclContextForType(const CompilerType &type); + // Tests #ifndef NDEBUG /// Verify the integrity of the type to catch CompilerTypes that mix diff --git a/lldb/source/API/SBType.cpp b/lldb/source/API/SBType.cpp index ee5b644742809..ac0e56303fae3 100644 --- a/lldb/source/API/SBType.cpp +++ b/lldb/source/API/SBType.cpp @@ -586,6 +586,14 @@ lldb::TemplateArgumentKind SBType::GetTemplateArgumentKind(uint32_t idx) { return eTemplateArgumentKindNull; } +SBType SBType::FindDirectNestedType(const char *name) { + LLDB_INSTRUMENT_VA(this, name); + + if (!IsValid()) + return SBType(); + return SBType(m_opaque_sp->FindDirectNestedType(name)); +} + SBTypeList::SBTypeList() : m_opaque_up(new TypeListImpl()) { LLDB_INSTRUMENT_VA(this); } diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index bcf4b62478068..f1353db2631dd 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -2637,6 +2637,13 @@ TypeSystemClang::GetDeclContextForType(const CompilerType &type) { return GetDeclContextForType(ClangUtil::GetQualType(type)); } +CompilerDeclContext +TypeSystemClang::GetCompilerDeclContextForType(const CompilerType &type) { + if (auto *decl_context = GetDeclContextForType(type)) + return CreateDeclContext(decl_context); + return CompilerDeclContext(); +} + /// Aggressively desugar the provided type, skipping past various kinds of /// syntactic sugar and other constructs one typically wants to ignore. /// The \p mask argument allows one to skip certain kinds of simplifications, diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h index 7805be92ec136..66e59ec985fb8 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h @@ -219,6 +219,9 @@ class TypeSystemClang : public TypeSystem { static clang::DeclContext *GetDeclContextForType(const CompilerType &type); + CompilerDeclContext + GetCompilerDeclContextForType(const CompilerType &type) override; + uint32_t GetPointerByteSize() override; clang::TranslationUnitDecl *GetTranslationUnitDecl() { diff --git a/lldb/source/Symbol/Type.cpp b/lldb/source/Symbol/Type.cpp index 5f4c6303334a2..548300d570953 100644 --- a/lldb/source/Symbol/Type.cpp +++ b/lldb/source/Symbol/Type.cpp @@ -1040,6 +1040,23 @@ bool TypeImpl::GetDescription(lldb_private::Stream &strm, return true; } +CompilerType TypeImpl::FindDirectNestedType(llvm::StringRef name) { + if (name.empty()) + return CompilerType(); + auto type_system = GetTypeSystem(/*prefer_dynamic*/ false); + auto *symbol_file = type_system->GetSymbolFile(); + auto decl_context = type_system->GetCompilerDeclContextForType(m_static_type); + if (!decl_context.IsValid()) + return CompilerType(); + llvm::DenseSet searched_symbol_files; + TypeMap search_result; + symbol_file->FindTypes(ConstString(name), decl_context, /*max_matches*/ 1, + searched_symbol_files, search_result); + if (search_result.Empty()) + return CompilerType(); + return search_result.GetTypeAtIndex(0)->GetFullCompilerType(); +} + bool TypeMemberFunctionImpl::IsValid() { return m_type.IsValid() && m_kind != lldb::eMemberFunctionKindUnknown; } diff --git a/lldb/source/Symbol/TypeSystem.cpp b/lldb/source/Symbol/TypeSystem.cpp index 24f2029305650..874f12573eca3 100644 --- a/lldb/source/Symbol/TypeSystem.cpp +++ b/lldb/source/Symbol/TypeSystem.cpp @@ -186,6 +186,11 @@ std::optional TypeSystem::ReportStatistics() { return std::nullopt; } +CompilerDeclContext +TypeSystem::GetCompilerDeclContextForType(const CompilerType &type) { + return CompilerDeclContext(); +} + #pragma mark TypeSystemMap TypeSystemMap::TypeSystemMap() : m_mutex(), m_map() {} diff --git a/lldb/test/API/python_api/type/TestTypeList.py b/lldb/test/API/python_api/type/TestTypeList.py index c2fcadc46ec15..c267defb58edf 100644 --- a/lldb/test/API/python_api/type/TestTypeList.py +++ b/lldb/test/API/python_api/type/TestTypeList.py @@ -119,6 +119,37 @@ def test(self): self.assertEqual(task_type, task_head_pointee_type) + # Check whether we can find a directly nested type by name + name_type = task_type.FindDirectNestedType("name") + self.assertTrue(name_type) + self.DebugSBType(name_type) + + enum_type = task_type.FindDirectNestedType("E") + self.assertTrue(enum_type) + self.DebugSBType(enum_type) + + union_type = task_type.FindDirectNestedType("U") + self.assertTrue(union_type) + self.DebugSBType(union_type) + + # Check that we don't find indirectly nested types + self.assertTrue(enum_type.size == 1) + + invalid_type = task_type.FindDirectNestedType("E2") + self.assertFalse(invalid_type) + + # Check that FindDirectNestedType handles types without DeclContext + # and other errorneous inputs + task_ptr_type = task_type.GetPointerType() + invalid_type = task_ptr_type.FindDirectNestedType("name") + self.assertFalse(invalid_type) + + invalid_type = task_type.FindDirectNestedType("") + self.assertFalse(invalid_type) + + invalid_type = task_type.FindDirectNestedType(None) + self.assertFalse(invalid_type) + # We'll now get the child member 'id' from 'task_head'. id = task_head.GetChildMemberWithName("id") self.DebugSBValue(id) diff --git a/lldb/test/API/python_api/type/main.cpp b/lldb/test/API/python_api/type/main.cpp index b1ef625283855..98de9707d8865 100644 --- a/lldb/test/API/python_api/type/main.cpp +++ b/lldb/test/API/python_api/type/main.cpp @@ -21,7 +21,12 @@ class Task { } my_type_is_nameless; struct name { int x; + enum E : int {} e; + enum E2 {} e2; } my_type_is_named; + enum E : unsigned char {} e; + union U { + } u; Task(int i, Task *n): id(i), next(n), diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 3453c7e61ae4a..467b4b5320ad9 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -191,6 +191,10 @@ Changes to LLDB * Methods in SBHostOS related to threads have had their implementations removed. These methods will return a value indicating failure. +* ``SBType::FindDirectNestedType`` function is added. It's useful + for formatters to quickly find directly nested type when it's known + where to search for it, avoiding more expensive global search via + ``SBTarget::FindFirstType``. Changes to Sanitizers --------------------- From a653749acab8d5cb84e7f15cccc97e76ebe8c84b Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Sat, 14 Oct 2023 09:17:35 +0200 Subject: [PATCH 129/720] [clang][Interp] Implement compound assign operators on bitfields (#67306) --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 7 ++++- clang/test/AST/Interp/bitfields.cpp | 38 ++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 2b745d6a15098..71aac8c6245c5 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -1170,8 +1170,13 @@ bool ByteCodeExprGen::VisitCompoundAssignOperator( } // And store the result in LHS. - if (DiscardResult) + if (DiscardResult) { + if (LHS->refersToBitField()) + return this->emitStoreBitFieldPop(*ResultT, E); return this->emitStorePop(*ResultT, E); + } + if (LHS->refersToBitField()) + return this->emitStoreBitField(*ResultT, E); return this->emitStore(*ResultT, E); } diff --git a/clang/test/AST/Interp/bitfields.cpp b/clang/test/AST/Interp/bitfields.cpp index e078704fce51f..9a144e2f0d961 100644 --- a/clang/test/AST/Interp/bitfields.cpp +++ b/clang/test/AST/Interp/bitfields.cpp @@ -31,8 +31,6 @@ namespace Basic { return a.a = 10; } static_assert(storeA2() == 2, ""); - - // TODO: +=, -=, etc. operators. } namespace Overflow { @@ -45,3 +43,39 @@ namespace Overflow { static_assert(f() == 3, ""); } + +namespace Compound { + struct A { + unsigned int a : 2; + constexpr A() : a(0) {} + constexpr A(int a) : a(a) {} + }; + + constexpr unsigned add() { + A a; + a.a += 10; + return a.a; + } + static_assert(add() == 2, ""); + + constexpr unsigned sub() { + A a; + a.a -= 10; + return a.a; + } + static_assert(sub() == 2, ""); + + constexpr unsigned mul() { + A a(1); + a.a *= 5; + return a.a; + } + static_assert(mul() == 1, ""); + + constexpr unsigned div() { + A a(2); + a.a /= 2; + return a.a; + } + static_assert(div() == 1, ""); +} From 7060422265902f11a13f785a1a0ba246eff96114 Mon Sep 17 00:00:00 2001 From: Aviad Cohen Date: Sat, 14 Oct 2023 10:40:45 +0300 Subject: [PATCH 130/720] [mlir][Linalg]: Optimize linalg generic in transform::PromoteOp to avoid unnecessary copies (#68555) If the operands are not used in the payload of linalg generic operations, there is no need to copy them before the operation. --- mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp | 10 ++++++++++ mlir/test/Dialect/GPU/promotion.mlir | 1 + mlir/test/Dialect/Linalg/promote.mlir | 1 - 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp index ad399f57f72cb..a131f30976661 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp @@ -28,6 +28,7 @@ #include "mlir/Transforms/FoldUtils.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -142,6 +143,8 @@ struct LinalgOpInstancePromotionOptions { const LinalgPromotionOptions &options); /// SubViews to promote. MapVector subViews; + /// Subviews operand numbers to copy in using copyInFn. + llvm::SmallSet operandsNumbersToCopyIn; /// True if the full view should be used for the promoted buffer. DenseMap useFullTileBuffers; @@ -174,6 +177,11 @@ LinalgOpInstancePromotionOptions::LinalgOpInstancePromotionOptions( Operation *op = opOperand.get().getDefiningOp(); if (auto sv = dyn_cast_or_null(op)) { subViews[operandNumber] = sv; + // In case of linalg generic, copy in only if subview is used in linalg + // payload. + if (!isa(linalgOp) || + linalgOp.payloadUsesValueFromOperand(&opOperand)) + operandsNumbersToCopyIn.insert(operandNumber); useFullTileBuffers[sv] = vUseFullTileBuffers[operandNumber]; } } @@ -324,6 +332,8 @@ promoteSubViews(ImplicitLocOpBuilder &b, auto info = promotionInfoMap.find(v.first); if (info == promotionInfoMap.end()) continue; + if (options.operandsNumbersToCopyIn.count(v.first) == 0) + continue; if (failed(options.copyInFn( b, cast(v.second.getDefiningOp()), info->second.partialLocalView))) diff --git a/mlir/test/Dialect/GPU/promotion.mlir b/mlir/test/Dialect/GPU/promotion.mlir index b4668b5678894..2da1be597753b 100644 --- a/mlir/test/Dialect/GPU/promotion.mlir +++ b/mlir/test/Dialect/GPU/promotion.mlir @@ -1,3 +1,4 @@ + // RUN: mlir-opt -allow-unregistered-dialect -pass-pipeline='builtin.module(gpu.module(gpu.func(test-gpu-memory-promotion)))' -split-input-file %s | FileCheck %s gpu.module @foo { diff --git a/mlir/test/Dialect/Linalg/promote.mlir b/mlir/test/Dialect/Linalg/promote.mlir index 5cd56db7fd2d8..31b29c0e105d9 100644 --- a/mlir/test/Dialect/Linalg/promote.mlir +++ b/mlir/test/Dialect/Linalg/promote.mlir @@ -353,7 +353,6 @@ func.func @linalg_generic_update_all_function_inputs_outputs(%arg0: memref<3x4xf // CHECK: %[[VAL_62:.*]] = memref.subview %[[VAL_61]][0, 0] {{\[}}%[[VAL_52]], %[[VAL_55]]] [1, 1] : memref> to memref, #gpu.address_space> // CHECK: memref.copy %[[VAL_3]], %[[VAL_24]] : memref<4x3xf32, strided<[4, 1]>, 1> to memref, #gpu.address_space> // CHECK: memref.copy %[[VAL_4]], %[[VAL_43]] : memref<4x3xf32, strided<[4, 1]>, 1> to memref, #gpu.address_space> - // CHECK: memref.copy %[[VAL_5]], %[[VAL_62]] : memref<4x3xf32, strided<[4, 1]>, 1> to memref, #gpu.address_space> // CHECK: linalg.generic {doc = "", indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"], library_call = ""} ins(%[[VAL_24]], %[[VAL_43]] : memref, #gpu.address_space>, memref, #gpu.address_space>) outs(%[[VAL_62]] : memref, #gpu.address_space>) { // CHECK: ^bb0(%[[VAL_63:.*]]: f32, %[[VAL_64:.*]]: f32, %[[VAL_65:.*]]: f32): // CHECK: %[[VAL_66:.*]] = arith.addf %[[VAL_63]], %[[VAL_64]] : f32 From 769bc11f684d376bff03649da41296a4fc710161 Mon Sep 17 00:00:00 2001 From: Bill Wendling <5993918+bwendling@users.noreply.github.com> Date: Sat, 14 Oct 2023 04:18:02 -0700 Subject: [PATCH 131/720] [Clang] Implement the 'counted_by' attribute (#68750) The 'counted_by' attribute is used on flexible array members. The argument for the attribute is the name of the field member in the same structure holding the count of elements in the flexible array. This information can be used to improve the results of the array bound sanitizer and the '__builtin_dynamic_object_size' builtin. This example specifies the that the flexible array member 'array' has the number of elements allocated for it in 'count': struct bar; struct foo { size_t count; /* ... */ struct bar *array[] __attribute__((counted_by(count))); }; This establishes a relationship between 'array' and 'count', specifically that 'p->array' must have *at least* 'p->count' number of elements available. It's the user's responsibility to ensure that this relationship is maintained through changes to the structure. In the following, the allocated array erroneously has fewer elements than what's specified by 'p->count'. This would result in an out-of-bounds access not not being detected: struct foo *p; void foo_alloc(size_t count) { p = malloc(MAX(sizeof(struct foo), offsetof(struct foo, array[0]) + count * sizeof(struct bar *))); p->count = count + 42; } The next example updates 'p->count', breaking the relationship requirement that 'p->array' must have at least 'p->count' number of elements available: struct foo *p; void foo_alloc(size_t count) { p = malloc(MAX(sizeof(struct foo), offsetof(struct foo, array[0]) + count * sizeof(struct bar *))); p->count = count + 42; } void use_foo(int index) { p->count += 42; p->array[index] = 0; /* The sanitizer cannot properly check this access */ } Reviewed By: nickdesaulniers, aaron.ballman Differential Revision: https://reviews.llvm.org/D148381 --- clang/docs/ReleaseNotes.rst | 5 + clang/include/clang/AST/Decl.h | 24 ++ clang/include/clang/AST/DeclBase.h | 10 + clang/include/clang/Basic/Attr.td | 18 ++ clang/include/clang/Basic/AttrDocs.td | 66 +++++ .../clang/Basic/DiagnosticSemaKinds.td | 11 + clang/include/clang/Sema/Sema.h | 3 + clang/include/clang/Sema/TypoCorrection.h | 12 +- clang/lib/AST/ASTImporter.cpp | 13 + clang/lib/AST/DeclBase.cpp | 77 +++++- clang/lib/AST/Expr.cpp | 83 +------ clang/lib/CodeGen/CGBuiltin.cpp | 51 ++++ clang/lib/CodeGen/CGExpr.cpp | 64 ++++- clang/lib/CodeGen/CodeGenFunction.h | 6 + clang/lib/Sema/SemaDecl.cpp | 14 ++ clang/lib/Sema/SemaDeclAttr.cpp | 90 +++++++ clang/lib/Sema/SemaExpr.cpp | 16 +- clang/test/CodeGen/attr-counted-by.c | 227 ++++++++++++++++++ clang/test/CodeGen/bounds-checking.c | 10 +- ...a-attribute-supported-attributes-list.test | 1 + clang/test/Sema/attr-counted-by.c | 50 ++++ 21 files changed, 762 insertions(+), 89 deletions(-) create mode 100644 clang/test/CodeGen/attr-counted-by.c create mode 100644 clang/test/Sema/attr-counted-by.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 2d918967e7f0b..1eebf5ea6b3e3 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -157,6 +157,11 @@ C Language Changes - ``structs``, ``unions``, and ``arrays`` that are const may now be used as constant expressions. This change is more consistent with the behavior of GCC. +- Clang now supports the C-only attribute ``counted_by``. When applied to a + struct's flexible array member, it points to the struct field that holds the + number of elements in the flexible array member. This information can improve + the results of the array bound sanitizer and the + ``__builtin_dynamic_object_size`` builtin. C23 Feature Support ^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 02e30e24c8be4..7f076cc77ea82 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -4302,6 +4302,30 @@ class RecordDecl : public TagDecl { return field_begin() == field_end(); } + FieldDecl *getLastField() { + FieldDecl *FD = nullptr; + for (FieldDecl *Field : fields()) + FD = Field; + return FD; + } + const FieldDecl *getLastField() const { + return const_cast(this)->getLastField(); + } + + template + const FieldDecl *findFieldIf(Functor &Pred) const { + for (const Decl *D : decls()) { + if (const auto *FD = dyn_cast(D); FD && Pred(FD)) + return FD; + + if (const auto *RD = dyn_cast(D)) + if (const FieldDecl *FD = RD->findFieldIf(Pred)) + return FD; + } + + return nullptr; + } + /// Note that the definition of this type is now complete. virtual void completeDefinition(); diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index 12137387b676a..d383e46e22e16 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -18,6 +18,7 @@ #include "clang/AST/DeclarationName.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/LLVM.h" +#include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/Specifiers.h" #include "llvm/ADT/ArrayRef.h" @@ -477,6 +478,15 @@ class alignas(8) Decl { // Return true if this is a FileContext Decl. bool isFileContextDecl() const; + /// Whether it resembles a flexible array member. This is a static member + /// because we want to be able to call it with a nullptr. That allows us to + /// perform non-Decl specific checks based on the object's type and strict + /// flex array level. + static bool isFlexibleArrayMemberLike( + ASTContext &Context, const Decl *D, QualType Ty, + LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel, + bool IgnoreTemplateOrMacroSubstitution); + ASTContext &getASTContext() const LLVM_READONLY; /// Helper to get the language options from the ASTContext. diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 7a6ec77ae84b1..5c9eb7b8a9810 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -4246,3 +4246,21 @@ def AvailableOnlyInDefaultEvalMethod : InheritableAttr { let Subjects = SubjectList<[TypedefName], ErrorDiag>; let Documentation = [Undocumented]; } + +def CountedBy : InheritableAttr { + let Spellings = [Clang<"counted_by">]; + let Subjects = SubjectList<[Field]>; + let Args = [IdentifierArgument<"CountedByField">]; + let Documentation = [CountedByDocs]; + let LangOpts = [COnly]; + // FIXME: This is ugly. Let using a DeclArgument would be nice, but a Decl + // isn't yet available due to the fact that we're still parsing the + // structure. Maybe that code could be changed sometime in the future. + code AdditionalMembers = [{ + private: + SourceRange CountedByFieldLoc; + public: + SourceRange getCountedByFieldLoc() const { return CountedByFieldLoc; } + void setCountedByFieldLoc(SourceRange Loc) { CountedByFieldLoc = Loc; } + }]; +} diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 8d928dcc146b2..9f9991bdae361 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -7275,3 +7275,69 @@ relative ordering of values is important. For example: attribute, they default to the value ``65535``. }]; } + +def CountedByDocs : Documentation { + let Category = DocCatField; + let Content = [{ +Clang supports the ``counted_by`` attribute on the flexible array member of a +structure in C. The argument for the attribute is the name of a field member in +the same structure holding the count of elements in the flexible array. This +information can be used to improve the results of the array bound sanitizer and +the ``__builtin_dynamic_object_size`` builtin. + +For example, the following code: + +.. code-block:: c + + struct bar; + + struct foo { + size_t count; + char other; + struct bar *array[] __attribute__((counted_by(count))); + }; + +specifies that the flexible array member ``array`` has the number of elements +allocated for it stored in ``count``. This establishes a relationship between +``array`` and ``count``. Specifically, ``p->array`` must have at least +``p->count`` number of elements available. It's the user's responsibility to +ensure that this relationship is maintained through changes to the structure. + +In the following example, the allocated array erroneously has fewer elements +than what's specified by ``p->count``. This would result in an out-of-bounds +access not being detected. + +.. code-block:: c + + #define SIZE_INCR 42 + + struct foo *p; + + void foo_alloc(size_t count) { + p = malloc(MAX(sizeof(struct foo), + offsetof(struct foo, array[0]) + count * sizeof(struct bar *))); + p->count = count + SIZE_INCR; + } + +The next example updates ``p->count``, breaking the relationship requirement +that ``p->array`` must have at least ``p->count`` number of elements available: + +.. code-block:: c + + #define SIZE_INCR 42 + + struct foo *p; + + void foo_alloc(size_t count) { + p = malloc(MAX(sizeof(struct foo), + offsetof(struct foo, array[0]) + count * sizeof(struct bar *))); + p->count = count; + } + + void use_foo(int index) { + p->count += SIZE_INCR + 1; /* 'count' is now larger than the number of elements of 'array'. */ + p->array[index] = 0; /* the sanitizer can't properly check if this is an out-of-bounds access. */ + } + + }]; +} diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index c1a6e3831127e..e85cd4d1a1ddc 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6389,6 +6389,17 @@ def warn_superclass_variable_sized_type_not_at_end : Warning< "field %0 can overwrite instance variable %1 with variable sized type %2" " in superclass %3">, InGroup; +def err_counted_by_attr_not_on_flexible_array_member : Error< + "'counted_by' only applies to flexible array members">; +def err_counted_by_attr_refers_to_flexible_array : Error< + "'counted_by' cannot refer to the flexible array %0">; +def err_counted_by_must_be_in_structure : Error< + "field %0 in 'counted_by' not inside structure">; +def err_flexible_array_counted_by_attr_field_not_integer : Error< + "field %0 in 'counted_by' must be a non-boolean integer type">; +def note_flexible_array_counted_by_attr_field : Note< + "field %0 declared here">; + let CategoryName = "ARC Semantic Issue" in { // ARC-mode diagnostics. diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 2ebd21090ae4e..250ac33680cdb 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -4795,6 +4795,8 @@ class Sema final { bool CheckAlwaysInlineAttr(const Stmt *OrigSt, const Stmt *CurSt, const AttributeCommonInfo &A); + bool CheckCountedByAttr(Scope *Scope, const FieldDecl *FD); + /// Adjust the calling convention of a method to be the ABI default if it /// wasn't specified explicitly. This handles method types formed from /// function type typedefs and typename template arguments. @@ -5638,6 +5640,7 @@ class Sema final { CorrectionCandidateCallback &CCC, TemplateArgumentListInfo *ExplicitTemplateArgs = nullptr, ArrayRef Args = std::nullopt, + DeclContext *LookupCtx = nullptr, TypoExpr **Out = nullptr); DeclResult LookupIvarInObjCMethod(LookupResult &Lookup, Scope *S, diff --git a/clang/include/clang/Sema/TypoCorrection.h b/clang/include/clang/Sema/TypoCorrection.h index e0f8d152dbe55..09de164297e7b 100644 --- a/clang/include/clang/Sema/TypoCorrection.h +++ b/clang/include/clang/Sema/TypoCorrection.h @@ -282,7 +282,7 @@ class CorrectionCandidateCallback { public: static const unsigned InvalidDistance = TypoCorrection::InvalidDistance; - explicit CorrectionCandidateCallback(IdentifierInfo *Typo = nullptr, + explicit CorrectionCandidateCallback(const IdentifierInfo *Typo = nullptr, NestedNameSpecifier *TypoNNS = nullptr) : Typo(Typo), TypoNNS(TypoNNS) {} @@ -319,7 +319,7 @@ class CorrectionCandidateCallback { /// this method. virtual std::unique_ptr clone() = 0; - void setTypoName(IdentifierInfo *II) { Typo = II; } + void setTypoName(const IdentifierInfo *II) { Typo = II; } void setTypoNNS(NestedNameSpecifier *NNS) { TypoNNS = NNS; } // Flags for context-dependent keywords. WantFunctionLikeCasts is only @@ -345,13 +345,13 @@ class CorrectionCandidateCallback { candidate.getCorrectionSpecifier() == TypoNNS; } - IdentifierInfo *Typo; + const IdentifierInfo *Typo; NestedNameSpecifier *TypoNNS; }; class DefaultFilterCCC final : public CorrectionCandidateCallback { public: - explicit DefaultFilterCCC(IdentifierInfo *Typo = nullptr, + explicit DefaultFilterCCC(const IdentifierInfo *Typo = nullptr, NestedNameSpecifier *TypoNNS = nullptr) : CorrectionCandidateCallback(Typo, TypoNNS) {} @@ -365,6 +365,10 @@ class DefaultFilterCCC final : public CorrectionCandidateCallback { template class DeclFilterCCC final : public CorrectionCandidateCallback { public: + explicit DeclFilterCCC(const IdentifierInfo *Typo = nullptr, + NestedNameSpecifier *TypoNNS = nullptr) + : CorrectionCandidateCallback(Typo, TypoNNS) {} + bool ValidateCandidate(const TypoCorrection &candidate) override { return candidate.getCorrectionDeclAs(); } diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 72e70427161bb..3adbabdb7fb87 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -8978,6 +8978,10 @@ class AttrImporter { public: AttrImporter(ASTImporter &I) : Importer(I), NImporter(I) {} + // Useful for accessing the imported attribute. + template T *castAttrAs() { return cast(ToAttr); } + template const T *castAttrAs() const { return cast(ToAttr); } + // Create an "importer" for an attribute parameter. // Result of the 'value()' of that object is to be passed to the function // 'importAttr', in the order that is expected by the attribute class. @@ -9184,6 +9188,15 @@ Expected ASTImporter::Import(const Attr *FromAttr) { From->args_size()); break; } + case attr::CountedBy: { + AI.cloneAttr(FromAttr); + const auto *CBA = cast(FromAttr); + Expected SR = Import(CBA->getCountedByFieldLoc()).get(); + if (!SR) + return SR.takeError(); + AI.castAttrAs()->setCountedByFieldLoc(SR.get()); + break; + } default: { // The default branch works for attributes that have no arguments to import. diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp index 3804f1a5b49d3..a3847a73faf81 100644 --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -29,7 +29,6 @@ #include "clang/AST/Type.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/LLVM.h" -#include "clang/Basic/LangOptions.h" #include "clang/Basic/Module.h" #include "clang/Basic/ObjCRuntime.h" #include "clang/Basic/PartialDiagnostic.h" @@ -411,6 +410,82 @@ bool Decl::isFileContextDecl() const { return DC && DC->isFileContext(); } +bool Decl::isFlexibleArrayMemberLike( + ASTContext &Ctx, const Decl *D, QualType Ty, + LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel, + bool IgnoreTemplateOrMacroSubstitution) { + // For compatibility with existing code, we treat arrays of length 0 or + // 1 as flexible array members. + const auto *CAT = Ctx.getAsConstantArrayType(Ty); + if (CAT) { + using FAMKind = LangOptions::StrictFlexArraysLevelKind; + + llvm::APInt Size = CAT->getSize(); + FAMKind StrictFlexArraysLevel = + Ctx.getLangOpts().getStrictFlexArraysLevel(); + + if (StrictFlexArraysLevel == FAMKind::IncompleteOnly) + return false; + + // GCC extension, only allowed to represent a FAM. + if (Size.isZero()) + return true; + + if (StrictFlexArraysLevel == FAMKind::ZeroOrIncomplete && Size.uge(1)) + return false; + + if (StrictFlexArraysLevel == FAMKind::OneZeroOrIncomplete && Size.uge(2)) + return false; + } else if (!Ctx.getAsIncompleteArrayType(Ty)) { + return false; + } + + if (const auto *OID = dyn_cast_if_present(D)) + return OID->getNextIvar() == nullptr; + + const auto *FD = dyn_cast_if_present(D); + if (!FD) + return false; + + if (CAT) { + // GCC treats an array memeber of a union as an FAM if the size is one or + // zero. + llvm::APInt Size = CAT->getSize(); + if (FD->getParent()->isUnion() && (Size.isZero() || Size.isOne())) + return true; + } + + // Don't consider sizes resulting from macro expansions or template argument + // substitution to form C89 tail-padded arrays. + if (IgnoreTemplateOrMacroSubstitution) { + TypeSourceInfo *TInfo = FD->getTypeSourceInfo(); + while (TInfo) { + TypeLoc TL = TInfo->getTypeLoc(); + + // Look through typedefs. + if (TypedefTypeLoc TTL = TL.getAsAdjusted()) { + const TypedefNameDecl *TDL = TTL.getTypedefNameDecl(); + TInfo = TDL->getTypeSourceInfo(); + continue; + } + + if (auto CTL = TL.getAs()) { + if (const Expr *SizeExpr = + dyn_cast_if_present(CTL.getSizeExpr()); + !SizeExpr || SizeExpr->getExprLoc().isMacroID()) + return false; + } + + break; + } + } + + // Test that the field is the last in the structure. + RecordDecl::field_iterator FI( + DeclContext::decl_iterator(const_cast(FD))); + return ++FI == FD->getParent()->field_end(); +} + TranslationUnitDecl *Decl::getTranslationUnitDecl() { if (auto *TUD = dyn_cast(this)) return TUD; diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 4bfc4f082cd6a..5d3b510df1ef9 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -205,85 +205,22 @@ bool Expr::isKnownToHaveBooleanValue(bool Semantic) const { } bool Expr::isFlexibleArrayMemberLike( - ASTContext &Context, + ASTContext &Ctx, LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel, bool IgnoreTemplateOrMacroSubstitution) const { - - // For compatibility with existing code, we treat arrays of length 0 or - // 1 as flexible array members. - const auto *CAT = Context.getAsConstantArrayType(getType()); - if (CAT) { - llvm::APInt Size = CAT->getSize(); - - using FAMKind = LangOptions::StrictFlexArraysLevelKind; - - if (StrictFlexArraysLevel == FAMKind::IncompleteOnly) - return false; - - // GCC extension, only allowed to represent a FAM. - if (Size == 0) - return true; - - if (StrictFlexArraysLevel == FAMKind::ZeroOrIncomplete && Size.uge(1)) - return false; - - if (StrictFlexArraysLevel == FAMKind::OneZeroOrIncomplete && Size.uge(2)) - return false; - } else if (!Context.getAsIncompleteArrayType(getType())) - return false; - const Expr *E = IgnoreParens(); + const Decl *D = nullptr; - const NamedDecl *ND = nullptr; - if (const auto *DRE = dyn_cast(E)) - ND = DRE->getDecl(); - else if (const auto *ME = dyn_cast(E)) - ND = ME->getMemberDecl(); + if (const auto *ME = dyn_cast(E)) + D = ME->getMemberDecl(); + else if (const auto *DRE = dyn_cast(E)) + D = DRE->getDecl(); else if (const auto *IRE = dyn_cast(E)) - return IRE->getDecl()->getNextIvar() == nullptr; - - if (!ND) - return false; + D = IRE->getDecl(); - // A flexible array member must be the last member in the class. - // FIXME: If the base type of the member expr is not FD->getParent(), - // this should not be treated as a flexible array member access. - if (const auto *FD = dyn_cast(ND)) { - // GCC treats an array memeber of a union as an FAM if the size is one or - // zero. - if (CAT) { - llvm::APInt Size = CAT->getSize(); - if (FD->getParent()->isUnion() && (Size.isZero() || Size.isOne())) - return true; - } - - // Don't consider sizes resulting from macro expansions or template argument - // substitution to form C89 tail-padded arrays. - if (IgnoreTemplateOrMacroSubstitution) { - TypeSourceInfo *TInfo = FD->getTypeSourceInfo(); - while (TInfo) { - TypeLoc TL = TInfo->getTypeLoc(); - // Look through typedefs. - if (TypedefTypeLoc TTL = TL.getAsAdjusted()) { - const TypedefNameDecl *TDL = TTL.getTypedefNameDecl(); - TInfo = TDL->getTypeSourceInfo(); - continue; - } - if (ConstantArrayTypeLoc CTL = TL.getAs()) { - const Expr *SizeExpr = dyn_cast(CTL.getSizeExpr()); - if (!SizeExpr || SizeExpr->getExprLoc().isMacroID()) - return false; - } - break; - } - } - - RecordDecl::field_iterator FI( - DeclContext::decl_iterator(const_cast(FD))); - return ++FI == FD->getParent()->field_end(); - } - - return false; + return Decl::isFlexibleArrayMemberLike(Ctx, D, E->getType(), + StrictFlexArraysLevel, + IgnoreTemplateOrMacroSubstitution); } const ValueDecl * diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c05e69eff4370..4d86e8a769846 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -853,6 +853,57 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, } } + if (IsDynamic) { + LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel = + getLangOpts().getStrictFlexArraysLevel(); + const Expr *Base = E->IgnoreParenImpCasts(); + + if (FieldDecl *FD = FindCountedByField(Base, StrictFlexArraysLevel)) { + const auto *ME = dyn_cast(Base); + llvm::Value *ObjectSize = nullptr; + + if (!ME) { + const auto *DRE = dyn_cast(Base); + ValueDecl *VD = nullptr; + + ObjectSize = ConstantInt::get( + ResType, + getContext().getTypeSize(DRE->getType()->getPointeeType()) / 8, + true); + + if (auto *RD = DRE->getType()->getPointeeType()->getAsRecordDecl()) + VD = RD->getLastField(); + + Expr *ICE = ImplicitCastExpr::Create( + getContext(), DRE->getType(), CK_LValueToRValue, + const_cast(cast(DRE)), nullptr, VK_PRValue, + FPOptionsOverride()); + ME = MemberExpr::CreateImplicit(getContext(), ICE, true, VD, + VD->getType(), VK_LValue, OK_Ordinary); + } + + // At this point, we know that \p ME is a flexible array member. + const auto *ArrayTy = getContext().getAsArrayType(ME->getType()); + unsigned Size = getContext().getTypeSize(ArrayTy->getElementType()); + + llvm::Value *CountField = + EmitAnyExprToTemp(MemberExpr::CreateImplicit( + getContext(), const_cast(ME->getBase()), + ME->isArrow(), FD, FD->getType(), VK_LValue, + OK_Ordinary)) + .getScalarVal(); + + llvm::Value *Mul = Builder.CreateMul( + CountField, llvm::ConstantInt::get(CountField->getType(), Size / 8)); + Mul = Builder.CreateZExtOrTrunc(Mul, ResType); + + if (ObjectSize) + return Builder.CreateAdd(ObjectSize, Mul); + + return Mul; + } + } + // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't // evaluate E for side-effects. In either case, we shouldn't lower to // @llvm.objectsize. diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 1b6a2c1fc4996..54a1d300a9ac7 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -30,6 +30,7 @@ #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/SourceManager.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" @@ -931,16 +932,31 @@ static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF, if (CE->getCastKind() == CK_ArrayToPointerDecay && !CE->getSubExpr()->isFlexibleArrayMemberLike(CGF.getContext(), StrictFlexArraysLevel)) { + CodeGenFunction::SanitizerScope SanScope(&CGF); + IndexedType = CE->getSubExpr()->getType(); const ArrayType *AT = IndexedType->castAsArrayTypeUnsafe(); if (const auto *CAT = dyn_cast(AT)) return CGF.Builder.getInt(CAT->getSize()); - else if (const auto *VAT = dyn_cast(AT)) + + if (const auto *VAT = dyn_cast(AT)) return CGF.getVLASize(VAT).NumElts; // Ignore pass_object_size here. It's not applicable on decayed pointers. } + + if (FieldDecl *FD = CGF.FindCountedByField(Base, StrictFlexArraysLevel)) { + const auto *ME = dyn_cast(CE->getSubExpr()); + IndexedType = Base->getType(); + return CGF + .EmitAnyExprToTemp(MemberExpr::CreateImplicit( + CGF.getContext(), const_cast(ME->getBase()), + ME->isArrow(), FD, FD->getType(), VK_LValue, OK_Ordinary)) + .getScalarVal(); + } } + CodeGenFunction::SanitizerScope SanScope(&CGF); + QualType EltTy{Base->getType()->getPointeeOrArrayElementType(), 0}; if (llvm::Value *POS = CGF.LoadPassedObjectSize(Base, EltTy)) { IndexedType = Base->getType(); @@ -950,13 +966,53 @@ static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF, return nullptr; } +FieldDecl *CodeGenFunction::FindCountedByField( + const Expr *Base, + LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel) { + const ValueDecl *VD = nullptr; + + Base = Base->IgnoreParenImpCasts(); + + if (const auto *ME = dyn_cast(Base)) { + VD = dyn_cast(ME->getMemberDecl()); + } else if (const auto *DRE = dyn_cast(Base)) { + // Pointing to the full structure. + VD = dyn_cast(DRE->getDecl()); + + QualType Ty = VD->getType(); + if (Ty->isPointerType()) + Ty = Ty->getPointeeType(); + + if (const auto *RD = Ty->getAsRecordDecl()) + VD = RD->getLastField(); + } else if (const auto *CE = dyn_cast(Base)) { + if (const auto *ME = dyn_cast(CE->getSubExpr())) + VD = dyn_cast(ME->getMemberDecl()); + } + + const auto *FD = dyn_cast_if_present(VD); + if (!FD || !FD->getParent() || + !Decl::isFlexibleArrayMemberLike(getContext(), FD, FD->getType(), + StrictFlexArraysLevel, true)) + return nullptr; + + const auto *CBA = FD->getAttr(); + if (!CBA) + return nullptr; + + StringRef FieldName = CBA->getCountedByField()->getName(); + auto It = + llvm::find_if(FD->getParent()->fields(), [&](const FieldDecl *Field) { + return FieldName == Field->getName(); + }); + return It != FD->getParent()->field_end() ? *It : nullptr; +} + void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base, llvm::Value *Index, QualType IndexType, bool Accessed) { assert(SanOpts.has(SanitizerKind::ArrayBounds) && "should not be called unless adding bounds checks"); - SanitizerScope SanScope(this); - const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel = getLangOpts().getStrictFlexArraysLevel(); @@ -966,6 +1022,8 @@ void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base, if (!Bound) return; + SanitizerScope SanScope(this); + bool IndexSigned = IndexType->isSignedIntegerOrEnumerationType(); llvm::Value *IndexVal = Builder.CreateIntCast(Index, SizeTy, IndexSigned); llvm::Value *BoundVal = Builder.CreateIntCast(Bound, SizeTy, false); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 471aad94e10c6..d5336382a2b9c 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3022,6 +3022,12 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitBoundsCheck(const Expr *E, const Expr *Base, llvm::Value *Index, QualType IndexType, bool Accessed); + /// Find the FieldDecl specified in a FAM's "counted_by" attribute. Returns + /// \p nullptr if either the attribute or the field doesn't exist. + FieldDecl *FindCountedByField( + const Expr *Base, + LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel); + llvm::Value *EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, bool isInc, bool isPre); ComplexPairTy EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV, diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index f249d41bc9bfb..e3387b5b669c6 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -19447,6 +19447,20 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl, CDecl->setIvarRBraceLoc(RBrac); } } + + // Check the "counted_by" attribute to ensure that the count field exists in + // the struct. Make sure we're performing this check on the outer-most + // record. This is a C-only feature. + if (!getLangOpts().CPlusPlus && Record && + !isa(Record->getParent())) { + auto Pred = [](const Decl *D) { + if (const auto *FD = dyn_cast_if_present(D)) + return FD->hasAttr(); + return false; + }; + if (const FieldDecl *FD = Record->findFieldIf(Pred)) + CheckCountedByAttr(S, FD); + } } /// Determine whether the given integral value is representable within diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index ed0b4d29b0563..feb02cad9080e 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -8361,6 +8361,92 @@ static void handleZeroCallUsedRegsAttr(Sema &S, Decl *D, const ParsedAttr &AL) { D->addAttr(ZeroCallUsedRegsAttr::Create(S.Context, Kind, AL)); } +static void handleCountedByAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + if (!AL.isArgIdent(0)) { + S.Diag(AL.getLoc(), diag::err_attribute_argument_type) + << AL << AANT_ArgumentIdentifier; + return; + } + + IdentifierLoc *IL = AL.getArgAsIdent(0); + CountedByAttr *CBA = + ::new (S.Context) CountedByAttr(S.Context, AL, IL->Ident); + CBA->setCountedByFieldLoc(IL->Loc); + D->addAttr(CBA); +} + +bool Sema::CheckCountedByAttr(Scope *S, const FieldDecl *FD) { + const auto *CBA = FD->getAttr(); + const IdentifierInfo *FieldName = CBA->getCountedByField(); + DeclarationNameInfo NameInfo(FieldName, + CBA->getCountedByFieldLoc().getBegin()); + + LookupResult MemResult(*this, NameInfo, Sema::LookupMemberName); + LookupName(MemResult, S); + + if (MemResult.empty()) { + // The "counted_by" field needs to exist within the struct. + LookupResult OrdResult(*this, NameInfo, Sema::LookupOrdinaryName); + LookupName(OrdResult, S); + + if (!OrdResult.empty()) { + SourceRange SR = FD->getLocation(); + Diag(SR.getBegin(), diag::err_counted_by_must_be_in_structure) + << FieldName << SR; + + if (auto *ND = OrdResult.getAsSingle()) { + SR = ND->getLocation(); + Diag(SR.getBegin(), diag::note_flexible_array_counted_by_attr_field) + << ND << SR; + } + return true; + } + + CXXScopeSpec SS; + DeclFilterCCC Filter(FieldName); + return DiagnoseEmptyLookup(S, SS, MemResult, Filter, nullptr, std::nullopt, + const_cast(FD->getDeclContext())); + } + + LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel = + Context.getLangOpts().getStrictFlexArraysLevel(); + + if (!Decl::isFlexibleArrayMemberLike(Context, FD, FD->getType(), + StrictFlexArraysLevel, true)) { + // The "counted_by" attribute must be on a flexible array member. + SourceRange SR = FD->getLocation(); + Diag(SR.getBegin(), diag::err_counted_by_attr_not_on_flexible_array_member) + << SR; + return true; + } + + if (const FieldDecl *Field = MemResult.getAsSingle()) { + if (Field->hasAttr()) { + // The "counted_by" field can't point to the flexible array member. + SourceRange SR = CBA->getCountedByFieldLoc(); + Diag(SR.getBegin(), diag::err_counted_by_attr_refers_to_flexible_array) + << CBA->getCountedByField() << SR; + return true; + } + + if (!Field->getType()->isIntegerType() || + Field->getType()->isBooleanType()) { + // The "counted_by" field must have an integer type. + SourceRange SR = CBA->getCountedByFieldLoc(); + Diag(SR.getBegin(), + diag::err_flexible_array_counted_by_attr_field_not_integer) + << CBA->getCountedByField() << SR; + + SR = Field->getLocation(); + Diag(SR.getBegin(), diag::note_flexible_array_counted_by_attr_field) + << Field << SR; + return true; + } + } + + return false; +} + static void handleFunctionReturnThunksAttr(Sema &S, Decl *D, const ParsedAttr &AL) { StringRef KindStr; @@ -9314,6 +9400,10 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, handleAvailableOnlyInDefaultEvalMethod(S, D, AL); break; + case ParsedAttr::AT_CountedBy: + handleCountedByAttr(S, D, AL); + break; + // Microsoft attributes: case ParsedAttr::AT_LayoutVersion: handleLayoutVersion(S, D, AL); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index cf45fc388083c..d78f923b2cb2c 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -2458,7 +2458,8 @@ bool Sema::DiagnoseDependentMemberLookup(const LookupResult &R) { bool Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R, CorrectionCandidateCallback &CCC, TemplateArgumentListInfo *ExplicitTemplateArgs, - ArrayRef Args, TypoExpr **Out) { + ArrayRef Args, DeclContext *LookupCtx, + TypoExpr **Out) { DeclarationName Name = R.getLookupName(); unsigned diagnostic = diag::err_undeclared_var_use; @@ -2474,7 +2475,8 @@ bool Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R, // unqualified lookup. This is useful when (for example) the // original lookup would not have found something because it was a // dependent name. - DeclContext *DC = SS.isEmpty() ? CurContext : nullptr; + DeclContext *DC = + LookupCtx ? LookupCtx : (SS.isEmpty() ? CurContext : nullptr); while (DC) { if (isa(DC)) { LookupQualifiedName(R, DC); @@ -2517,12 +2519,12 @@ bool Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R, emitEmptyLookupTypoDiagnostic(TC, *this, SS, Name, TypoLoc, Args, diagnostic, diagnostic_suggest); }, - nullptr, CTK_ErrorRecovery); + nullptr, CTK_ErrorRecovery, LookupCtx); if (*Out) return true; - } else if (S && - (Corrected = CorrectTypo(R.getLookupNameInfo(), R.getLookupKind(), - S, &SS, CCC, CTK_ErrorRecovery))) { + } else if (S && (Corrected = + CorrectTypo(R.getLookupNameInfo(), R.getLookupKind(), S, + &SS, CCC, CTK_ErrorRecovery, LookupCtx))) { std::string CorrectedStr(Corrected.getAsString(getLangOpts())); bool DroppedSpecifier = Corrected.WillReplaceSpecifier() && Name.getAsString() == CorrectedStr; @@ -2812,7 +2814,7 @@ Sema::ActOnIdExpression(Scope *S, CXXScopeSpec &SS, // a template name, but we happen to have always already looked up the name // before we get here if it must be a template name. if (DiagnoseEmptyLookup(S, SS, R, CCC ? *CCC : DefaultValidator, nullptr, - std::nullopt, &TE)) { + std::nullopt, nullptr, &TE)) { if (TE && KeywordReplacement) { auto &State = getTypoExprState(TE); auto BestTC = State.Consumer->getNextCorrection(); diff --git a/clang/test/CodeGen/attr-counted-by.c b/clang/test/CodeGen/attr-counted-by.c new file mode 100644 index 0000000000000..a7eb0da6dd282 --- /dev/null +++ b/clang/test/CodeGen/attr-counted-by.c @@ -0,0 +1,227 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -DCOUNTED_BY -O2 -Wall -fsanitize=array-bounds,object-size,local-bounds -fstrict-flex-arrays=3 -emit-llvm -o - %s | FileCheck --check-prefix=SANITIZE-WITH-ATTR %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -DCOUNTED_BY -O2 -Wall -fstrict-flex-arrays=3 -emit-llvm -o - %s | FileCheck --check-prefix=NO-SANITIZE-WITH-ATTR %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -Wall -fsanitize=array-bounds,object-size,local-bounds -fstrict-flex-arrays=3 -emit-llvm -o - %s | FileCheck --check-prefix=SANITIZE-WITHOUT-ATTR %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -Wall -fstrict-flex-arrays=3 -emit-llvm -o - %s | FileCheck --check-prefix=NO-SANITIZE-WITHOUT-ATTR %s + +#if !__has_attribute(counted_by) +#error "has attribute broken" +#endif + +#ifdef COUNTED_BY +#define __counted_by(member) __attribute__((__counted_by__(member))) +#else +#define __counted_by(member) +#endif + +typedef long unsigned int size_t; + +struct annotated { + unsigned long flags; + int count; + int array[] __counted_by(count); +}; + +// SANITIZE-WITH-ATTR-LABEL: define dso_local void @test1( +// SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = getelementptr inbounds [[STRUCT_ANNOTATED:%.*]], ptr [[P]], i64 0, i32 1 +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[COUNT]], align 8, !tbaa [[TBAA2:![0-9]+]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = sext i32 [[INDEX]] to i64, !nosanitize !6 +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = zext i32 [[TMP0]] to i64, !nosanitize !6 +// SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]], !nosanitize !6 +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP3]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF7:![0-9]+]], !nosanitize !6 +// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = zext i32 [[INDEX]] to i64, !nosanitize !6 +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB2:[0-9]+]], i64 [[TMP4]]) #[[ATTR2:[0-9]+]], !nosanitize !6 +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize !6 +// SANITIZE-WITH-ATTR: cont7: +// SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ANNOTATED]], ptr [[P]], i64 0, i32 2, i64 [[TMP1]] +// SANITIZE-WITH-ATTR-NEXT: store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITH-ATTR-NEXT: ret void +// +// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test1( +// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef writeonly [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 +// NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ANNOTATED:%.*]], ptr [[P]], i64 0, i32 2, i64 [[IDXPROM]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2:![0-9]+]] +// NO-SANITIZE-WITH-ATTR-NEXT: ret void +// +// SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test1( +// SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 +// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ANNOTATED:%.*]], ptr [[P]], i64 0, i32 2, i64 [[IDXPROM]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2:![0-9]+]] +// SANITIZE-WITHOUT-ATTR-NEXT: ret void +// +// NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test1( +// NO-SANITIZE-WITHOUT-ATTR-SAME: ptr nocapture noundef writeonly [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ANNOTATED:%.*]], ptr [[P]], i64 0, i32 2, i64 [[IDXPROM]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2:![0-9]+]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void +// +void test1(struct annotated *p, int index, int val) { + p->array[index] = val; +} + +// SANITIZE-WITH-ATTR-LABEL: define dso_local void @test2( +// SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = getelementptr inbounds [[STRUCT_ANNOTATED:%.*]], ptr [[P]], i64 0, i32 1 +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[COUNT]], align 8, !tbaa [[TBAA2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !nosanitize !6 +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP1]], [[INDEX]], !nosanitize !6 +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label [[CONT12:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF7]], !nosanitize !6 +// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[INDEX]]) #[[ATTR2]], !nosanitize !6 +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize !6 +// SANITIZE-WITH-ATTR: cont12: +// SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ANNOTATED]], ptr [[P]], i64 0, i32 2, i64 [[INDEX]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = shl i32 [[TMP0]], 2 +// SANITIZE-WITH-ATTR-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITH-ATTR-NEXT: ret void +// +// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test2( +// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = getelementptr inbounds [[STRUCT_ANNOTATED:%.*]], ptr [[P]], i64 0, i32 1 +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[COUNT]], align 8, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 2 +// NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ANNOTATED]], ptr [[P]], i64 0, i32 2, i64 [[INDEX]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: ret void +// +// SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test2( +// SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ANNOTATED:%.*]], ptr [[P]], i64 0, i32 2, i64 [[INDEX]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: ret void +// +// NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test2( +// NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ANNOTATED:%.*]], ptr [[P]], i64 0, i32 2, i64 [[INDEX]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void +// +void test2(struct annotated *p, size_t index) { + p->array[index] = __builtin_dynamic_object_size(p->array, 1); +} + +// SANITIZE-WITH-ATTR-LABEL: define dso_local void @test3( +// SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = getelementptr inbounds [[STRUCT_ANNOTATED:%.*]], ptr [[P]], i64 0, i32 1 +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[COUNT]], align 8, !tbaa [[TBAA2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !nosanitize !6 +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP1]], [[INDEX]], !nosanitize !6 +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label [[CONT12:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF7]], !nosanitize !6 +// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 [[INDEX]]) #[[ATTR2]], !nosanitize !6 +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize !6 +// SANITIZE-WITH-ATTR: cont12: +// SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ANNOTATED]], ptr [[P]], i64 0, i32 2, i64 [[INDEX]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = shl i32 [[TMP0]], 2 +// SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = add i32 [[TMP3]], 16 +// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITH-ATTR-NEXT: ret void +// +// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test3( +// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = getelementptr inbounds [[STRUCT_ANNOTATED:%.*]], ptr [[P]], i64 0, i32 1 +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[COUNT]], align 8, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 2 +// NO-SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = add i32 [[TMP1]], 16 +// NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ANNOTATED]], ptr [[P]], i64 0, i32 2, i64 [[INDEX]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: ret void +// +// SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test3( +// SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ANNOTATED:%.*]], ptr [[P]], i64 0, i32 2, i64 [[INDEX]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: ret void +// +// NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test3( +// NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ANNOTATED:%.*]], ptr [[P]], i64 0, i32 2, i64 [[INDEX]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void +// +void test3(struct annotated *p, size_t index) { + // This test differs from 'test2' by checking bdos on the whole array and not + // just the FAM. + p->array[index] = __builtin_dynamic_object_size(p, 1); +} + +struct annotated_with_anon_struct { + unsigned long flags; + struct { + unsigned char count; + int array[] __counted_by(count); + }; +}; + +// SANITIZE-WITH-ATTR-LABEL: define dso_local void @test4( +// SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANNOTATED_WITH_ANON_STRUCT:%.*]], ptr [[P]], i64 0, i32 1 +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 8, !tbaa [[TBAA8:![0-9]+]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = sext i32 [[INDEX]] to i64, !nosanitize !6 +// SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = zext i8 [[TMP1]] to i64, !nosanitize !6 +// SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP2]], [[TMP3]], !nosanitize !6 +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP4]], label [[CONT18:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF7]], !nosanitize !6 +// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: [[TMP5:%.*]] = zext i32 [[INDEX]] to i64, !nosanitize !6 +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[TMP5]]) #[[ATTR2]], !nosanitize !6 +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize !6 +// SANITIZE-WITH-ATTR: cont18: +// SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12 +// SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[TMP2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP6:%.*]] = shl i8 [[TMP1]], 2 +// SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = zext i8 [[TMP6]] to i32 +// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITH-ATTR-NEXT: ret void +// +// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test4( +// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANNOTATED_WITH_ANON_STRUCT:%.*]], ptr [[P]], i64 0, i32 1 +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 8, !tbaa [[TBAA6:![0-9]+]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], 2 +// NO-SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = zext i8 [[TMP2]] to i32 +// NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12 +// NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 +// NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: ret void +// +// SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test4( +// SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12 +// SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 +// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: ret void +// +// NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test4( +// NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12 +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void +// +void test4(struct annotated_with_anon_struct *p, int index) { + p->array[index] = __builtin_dynamic_object_size(p->array, 1); +} diff --git a/clang/test/CodeGen/bounds-checking.c b/clang/test/CodeGen/bounds-checking.c index 636d4f289e247..8100e30d0650a 100644 --- a/clang/test/CodeGen/bounds-checking.c +++ b/clang/test/CodeGen/bounds-checking.c @@ -69,7 +69,6 @@ int f7(union U *u, int i) { return u->c[i]; } - char B[10]; char B2[10]; // CHECK-LABEL: @f8 @@ -82,3 +81,12 @@ void f8(int i, int k) { // NOOPTARRAY: call void @llvm.ubsantrap(i8 4) B2[k] = '\0'; } + +// See commit 9a954c6 that caused a SEGFAULT in this code. +struct S { + __builtin_va_list ap; +} *s; +// CHECK-LABEL: @f9 +struct S *f9(int i) { + return &s[i]; +} diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index eaf6d34421bbe..f48126775c868 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -56,6 +56,7 @@ // CHECK-NEXT: ConsumableAutoCast (SubjectMatchRule_record) // CHECK-NEXT: ConsumableSetOnRead (SubjectMatchRule_record) // CHECK-NEXT: Convergent (SubjectMatchRule_function) +// CHECK-NEXT: CountedBy (SubjectMatchRule_field) // CHECK-NEXT: DLLExport (SubjectMatchRule_function, SubjectMatchRule_variable, SubjectMatchRule_record, SubjectMatchRule_objc_interface) // CHECK-NEXT: DLLImport (SubjectMatchRule_function, SubjectMatchRule_variable, SubjectMatchRule_record, SubjectMatchRule_objc_interface) // CHECK-NEXT: Destructor (SubjectMatchRule_function) diff --git a/clang/test/Sema/attr-counted-by.c b/clang/test/Sema/attr-counted-by.c new file mode 100644 index 0000000000000..654ddb7f1b42b --- /dev/null +++ b/clang/test/Sema/attr-counted-by.c @@ -0,0 +1,50 @@ +// RUN: %clang_cc1 -fstrict-flex-arrays=3 -fsyntax-only -verify %s + +#define __counted_by(f) __attribute__((counted_by(f))) + +struct bar; + +struct not_found { + int count; + struct bar *fam[] __counted_by(bork); // expected-error {{use of undeclared identifier 'bork'}} +}; + +struct not_found_suggest { + int bork; // expected-note {{'bork' declared here}} + struct bar *fam[] __counted_by(blork); // expected-error {{use of undeclared identifier 'blork'; did you mean 'bork'?}} +}; + +int global; // expected-note {{'global' declared here}} + +struct found_outside_of_struct { + int bork; + struct bar *fam[] __counted_by(global); // expected-error {{field 'global' in 'counted_by' not inside structure}} +}; + +struct self_referrential { + int bork; + struct bar *self[] __counted_by(self); // expected-error {{'counted_by' cannot refer to the flexible array 'self'}} +}; + +struct non_int_count { + double dbl_count; // expected-note {{field 'dbl_count' declared here}} + struct bar *fam[] __counted_by(dbl_count); // expected-error {{field 'dbl_count' in 'counted_by' must be a non-boolean integer type}} +}; + +struct array_of_ints_count { + int integers[2]; // expected-note {{field 'integers' declared here}} + struct bar *fam[] __counted_by(integers); // expected-error {{field 'integers' in 'counted_by' must be a non-boolean integer type}} +}; + +struct not_a_fam { + int count; + struct bar *non_fam __counted_by(count); // expected-error {{'counted_by' only applies to flexible array members}} +}; + +struct annotated_with_anon_struct { + unsigned long flags; + struct { + unsigned char count; // expected-note {{'count' declared here}} + int array[] __counted_by(crount); // expected-error {{use of undeclared identifier 'crount'; did you mean 'count'?}} + }; +}; From a502dddfd0da8ccefd2cee15599b49f6eaf74efa Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 14 Oct 2023 12:34:54 +0100 Subject: [PATCH 132/720] [AArch64] Additional GISel test for FMA. NFC --- llvm/test/CodeGen/AArch64/fmla.ll | 2484 +++++++++++++++++++++++++++++ 1 file changed, 2484 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/fmla.ll diff --git a/llvm/test/CodeGen/AArch64/fmla.ll b/llvm/test/CodeGen/AArch64/fmla.ll new file mode 100644 index 0000000000000..a1782f8e9087c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fmla.ll @@ -0,0 +1,2484 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16 +; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16 +; RUN: llc -mtriple=aarch64-none-eabi -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 +; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 + +; CHECK-GI: warning: Instruction selection used fallback path for fma_v3f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_v4f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_v3f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_v8f32 +; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for fma_v7f16 +; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for fma_v16f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmuladd_v3f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmuladd_v4f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmuladd_v3f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmuladd_v8f32 +; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for fmuladd_v7f16 +; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for fmuladd_v16f16 + +define double @fma_f64(double %a, double %b, double %c) { +; CHECK-LABEL: fma_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmadd d0, d0, d1, d2 +; CHECK-NEXT: ret +entry: + %d = call double @llvm.fma.f64(double %a, double %b, double %c) + ret double %d +} + +define float @fma_f32(float %a, float %b, float %c) { +; CHECK-LABEL: fma_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmadd s0, s0, s1, s2 +; CHECK-NEXT: ret +entry: + %d = call float @llvm.fma.f32(float %a, float %b, float %c) + ret float %d +} + +define half @fma_f16(half %a, half %b, half %c) { +; CHECK-SD-NOFP16-LABEL: fma_f16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s1, s2 +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fma_f16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fmadd h0, h0, h1, h2 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fma_f16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 +; CHECK-GI-NOFP16-NEXT: fmadd s0, s0, s1, s2 +; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fma_f16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fmadd h0, h0, h1, h2 +; CHECK-GI-FP16-NEXT: ret +entry: + %d = call half @llvm.fma.f16(half %a, half %b, half %c) + ret half %d +} + +define <2 x double> @fma_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; CHECK-LABEL: fma_v2f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmla v2.2d, v1.2d, v0.2d +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret +entry: + %d = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) + ret <2 x double> %d +} + +define <3 x double> @fma_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) { +; CHECK-LABEL: fma_v3f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d6 killed $d6 def $q6 +; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d7 killed $d7 def $q7 +; CHECK-NEXT: // kill: def $d4 killed $d4 def $q4 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d5 killed $d5 def $q5 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v3.d[1], v4.d[0] +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: mov v6.d[1], v7.d[0] +; CHECK-NEXT: fmla v6.2d, v3.2d, v0.2d +; CHECK-NEXT: ldr d3, [sp] +; CHECK-NEXT: fmla v3.2d, v5.2d, v2.2d +; CHECK-NEXT: fmov d0, d6 +; CHECK-NEXT: ext v1.16b, v6.16b, v6.16b, #8 +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-NEXT: fmov d2, d3 +; CHECK-NEXT: ret +entry: + %d = call <3 x double> @llvm.fma.v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) + ret <3 x double> %d +} + +define <4 x double> @fma_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) { +; CHECK-LABEL: fma_v4f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmla v4.2d, v2.2d, v0.2d +; CHECK-NEXT: fmla v5.2d, v3.2d, v1.2d +; CHECK-NEXT: mov v0.16b, v4.16b +; CHECK-NEXT: mov v1.16b, v5.16b +; CHECK-NEXT: ret +entry: + %d = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) + ret <4 x double> %d +} + +define <2 x float> @fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) { +; CHECK-LABEL: fma_v2f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmla v2.2s, v1.2s, v0.2s +; CHECK-NEXT: fmov d0, d2 +; CHECK-NEXT: ret +entry: + %d = call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) + ret <2 x float> %d +} + +define <3 x float> @fma_v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c) { +; CHECK-LABEL: fma_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmla v2.4s, v1.4s, v0.4s +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret +entry: + %d = call <3 x float> @llvm.fma.v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c) + ret <3 x float> %d +} + +define <4 x float> @fma_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: fma_v4f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmla v2.4s, v1.4s, v0.4s +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret +entry: + %d = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) + ret <4 x float> %d +} + +define <8 x float> @fma_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) { +; CHECK-LABEL: fma_v8f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmla v4.4s, v2.4s, v0.4s +; CHECK-NEXT: fmla v5.4s, v3.4s, v1.4s +; CHECK-NEXT: mov v0.16b, v4.16b +; CHECK-NEXT: mov v1.16b, v5.16b +; CHECK-NEXT: ret +entry: + %d = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) + ret <8 x float> %d +} + +define <7 x half> @fma_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) { +; CHECK-SD-NOFP16-LABEL: fma_v7f16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: mov h3, v2.h[1] +; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[1] +; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt s6, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h0 +; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[2] +; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[2] +; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: fmadd s6, s16, s7, s6 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h17 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s17, h19 +; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[3] +; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[3] +; CHECK-SD-NOFP16-NEXT: fmadd s4, s5, s4, s3 +; CHECK-SD-NOFP16-NEXT: mov h5, v2.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt h3, s6 +; CHECK-SD-NOFP16-NEXT: fmadd s6, s17, s16, s7 +; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt s7, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h19 +; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt h4, s4 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt h6, s6 +; CHECK-SD-NOFP16-NEXT: fcvt s17, h17 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h18 +; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v4.h[0] +; CHECK-SD-NOFP16-NEXT: mov h4, v2.h[5] +; CHECK-SD-NOFP16-NEXT: fmadd s5, s16, s7, s5 +; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[5] +; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt s19, h19 +; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v6.h[0] +; CHECK-SD-NOFP16-NEXT: mov h6, v2.h[6] +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h7 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h16 +; CHECK-SD-NOFP16-NEXT: fcvt h5, s5 +; CHECK-SD-NOFP16-NEXT: fmadd s17, s19, s18, s17 +; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[6] +; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[6] +; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7] +; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7] +; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7] +; CHECK-SD-NOFP16-NEXT: fmadd s4, s16, s7, s4 +; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v5.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt s5, h6 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h19 +; CHECK-SD-NOFP16-NEXT: fcvt h16, s17 +; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvt h4, s4 +; CHECK-SD-NOFP16-NEXT: fmadd s5, s7, s6, s5 +; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v16.h[0] +; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s1, s2 +; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v4.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h4, s5 +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v4.h[0] +; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v0.h[0] +; CHECK-SD-NOFP16-NEXT: mov v0.16b, v3.16b +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fma_v7f16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h +; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fma_v7f16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov h5, v2.h[1] +; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2] +; CHECK-GI-NOFP16-NEXT: mov h16, v2.h[2] +; CHECK-GI-NOFP16-NEXT: fcvt s17, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s18, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s19, h2 +; CHECK-GI-NOFP16-NEXT: mov h20, v0.h[4] +; CHECK-GI-NOFP16-NEXT: mov h21, v1.h[4] +; CHECK-GI-NOFP16-NEXT: mov h22, v2.h[4] +; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 +; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 +; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 +; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 +; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 +; CHECK-GI-NOFP16-NEXT: fcvt s16, h16 +; CHECK-GI-NOFP16-NEXT: fmadd s17, s17, s18, s19 +; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[3] +; CHECK-GI-NOFP16-NEXT: mov h19, v2.h[3] +; CHECK-GI-NOFP16-NEXT: fmadd s4, s3, s4, s5 +; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fmadd s6, s6, s7, s16 +; CHECK-GI-NOFP16-NEXT: fcvt h3, s17 +; CHECK-GI-NOFP16-NEXT: fcvt s7, h18 +; CHECK-GI-NOFP16-NEXT: fcvt s16, h19 +; CHECK-GI-NOFP16-NEXT: fcvt s17, h20 +; CHECK-GI-NOFP16-NEXT: fcvt s18, h21 +; CHECK-GI-NOFP16-NEXT: fcvt s19, h22 +; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 +; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 +; CHECK-GI-NOFP16-NEXT: mov h20, v0.h[5] +; CHECK-GI-NOFP16-NEXT: mov h21, v1.h[5] +; CHECK-GI-NOFP16-NEXT: mov h22, v2.h[5] +; CHECK-GI-NOFP16-NEXT: fcvt h6, s6 +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] +; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[6] +; CHECK-GI-NOFP16-NEXT: mov h2, v2.h[6] +; CHECK-GI-NOFP16-NEXT: fmadd s5, s5, s7, s16 +; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fmadd s4, s17, s18, s19 +; CHECK-GI-NOFP16-NEXT: fcvt s7, h20 +; CHECK-GI-NOFP16-NEXT: fcvt s16, h21 +; CHECK-GI-NOFP16-NEXT: fcvt s17, h22 +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 +; CHECK-GI-NOFP16-NEXT: fcvt h5, s5 +; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0] +; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 +; CHECK-GI-NOFP16-NEXT: fmadd s6, s7, s16, s17 +; CHECK-GI-NOFP16-NEXT: fmadd s0, s0, s1, s2 +; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: fcvt h5, s6 +; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 +; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v4.h[0] +; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v0.h[0] +; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v0.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.16b, v3.16b +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fma_v7f16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h +; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b +; CHECK-GI-FP16-NEXT: ret +entry: + %d = call <7 x half> @llvm.fma.v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) + ret <7 x half> %d +} + +define <4 x half> @fma_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) { +; CHECK-SD-NOFP16-LABEL: fma_v4f16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NOFP16-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NOFP16-NEXT: mov h3, v2.h[1] +; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[1] +; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt s6, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h0 +; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[2] +; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[2] +; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[2] +; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[3] +; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: fmadd s6, s16, s7, s6 +; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s7, h19 +; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fmadd s3, s5, s4, s3 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h17 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h18 +; CHECK-SD-NOFP16-NEXT: fcvt h0, s6 +; CHECK-SD-NOFP16-NEXT: fmadd s4, s7, s5, s4 +; CHECK-SD-NOFP16-NEXT: fcvt h3, s3 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h16 +; CHECK-SD-NOFP16-NEXT: mov v0.h[1], v3.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h3, s4 +; CHECK-SD-NOFP16-NEXT: fmadd s1, s5, s1, s2 +; CHECK-SD-NOFP16-NEXT: mov v0.h[2], v3.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 +; CHECK-SD-NOFP16-NEXT: mov v0.h[3], v1.h[0] +; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fma_v4f16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h +; CHECK-SD-FP16-NEXT: fmov d0, d2 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fma_v4f16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NOFP16-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NOFP16-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov h5, v2.h[1] +; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2] +; CHECK-GI-NOFP16-NEXT: mov h16, v2.h[2] +; CHECK-GI-NOFP16-NEXT: fcvt s17, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s18, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s19, h2 +; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[3] +; CHECK-GI-NOFP16-NEXT: mov h2, v2.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 +; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 +; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 +; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 +; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 +; CHECK-GI-NOFP16-NEXT: fcvt s16, h16 +; CHECK-GI-NOFP16-NEXT: fmadd s17, s17, s18, s19 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 +; CHECK-GI-NOFP16-NEXT: fmadd s3, s3, s4, s5 +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fmadd s5, s6, s7, s16 +; CHECK-GI-NOFP16-NEXT: fcvt h0, s17 +; CHECK-GI-NOFP16-NEXT: fcvt h3, s3 +; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 +; CHECK-GI-NOFP16-NEXT: fcvt h5, s5 +; CHECK-GI-NOFP16-NEXT: fmadd s1, s4, s1, s2 +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[0] +; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0] +; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fma_v4f16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h +; CHECK-GI-FP16-NEXT: fmov d0, d2 +; CHECK-GI-FP16-NEXT: ret +entry: + %d = call <4 x half> @llvm.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) + ret <4 x half> %d +} + +define <8 x half> @fma_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; CHECK-SD-NOFP16-LABEL: fma_v8f16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: mov h3, v2.h[1] +; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[1] +; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt s6, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h0 +; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[2] +; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[2] +; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: fmadd s6, s16, s7, s6 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h17 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s17, h19 +; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[3] +; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[3] +; CHECK-SD-NOFP16-NEXT: fmadd s4, s5, s4, s3 +; CHECK-SD-NOFP16-NEXT: mov h5, v2.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt h3, s6 +; CHECK-SD-NOFP16-NEXT: fmadd s6, s17, s16, s7 +; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt s7, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h19 +; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt h4, s4 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt h6, s6 +; CHECK-SD-NOFP16-NEXT: fcvt s17, h17 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h18 +; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v4.h[0] +; CHECK-SD-NOFP16-NEXT: mov h4, v2.h[5] +; CHECK-SD-NOFP16-NEXT: fmadd s5, s16, s7, s5 +; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[5] +; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt s19, h19 +; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v6.h[0] +; CHECK-SD-NOFP16-NEXT: mov h6, v2.h[6] +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h7 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h16 +; CHECK-SD-NOFP16-NEXT: fcvt h5, s5 +; CHECK-SD-NOFP16-NEXT: fmadd s17, s19, s18, s17 +; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[6] +; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[6] +; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7] +; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7] +; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7] +; CHECK-SD-NOFP16-NEXT: fmadd s4, s16, s7, s4 +; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v5.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt s5, h6 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h19 +; CHECK-SD-NOFP16-NEXT: fcvt h16, s17 +; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvt h4, s4 +; CHECK-SD-NOFP16-NEXT: fmadd s5, s7, s6, s5 +; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v16.h[0] +; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s1, s2 +; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v4.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h4, s5 +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v4.h[0] +; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v0.h[0] +; CHECK-SD-NOFP16-NEXT: mov v0.16b, v3.16b +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fma_v8f16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h +; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fma_v8f16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov h5, v2.h[1] +; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2] +; CHECK-GI-NOFP16-NEXT: mov h16, v2.h[2] +; CHECK-GI-NOFP16-NEXT: fcvt s17, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s18, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s19, h2 +; CHECK-GI-NOFP16-NEXT: mov h20, v0.h[3] +; CHECK-GI-NOFP16-NEXT: mov h21, v1.h[3] +; CHECK-GI-NOFP16-NEXT: mov h22, v2.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 +; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 +; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 +; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 +; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 +; CHECK-GI-NOFP16-NEXT: fcvt s16, h16 +; CHECK-GI-NOFP16-NEXT: fmadd s17, s17, s18, s19 +; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4] +; CHECK-GI-NOFP16-NEXT: fcvt s19, h22 +; CHECK-GI-NOFP16-NEXT: mov h22, v2.h[5] +; CHECK-GI-NOFP16-NEXT: fmadd s4, s3, s4, s5 +; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4] +; CHECK-GI-NOFP16-NEXT: fmadd s6, s6, s7, s16 +; CHECK-GI-NOFP16-NEXT: fcvt s7, h20 +; CHECK-GI-NOFP16-NEXT: fcvt s16, h21 +; CHECK-GI-NOFP16-NEXT: mov h20, v2.h[4] +; CHECK-GI-NOFP16-NEXT: fcvt h3, s17 +; CHECK-GI-NOFP16-NEXT: mov h17, v0.h[5] +; CHECK-GI-NOFP16-NEXT: mov h21, v1.h[5] +; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 +; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 +; CHECK-GI-NOFP16-NEXT: fmadd s7, s7, s16, s19 +; CHECK-GI-NOFP16-NEXT: fcvt h6, s6 +; CHECK-GI-NOFP16-NEXT: fcvt s16, h18 +; CHECK-GI-NOFP16-NEXT: fcvt s18, h20 +; CHECK-GI-NOFP16-NEXT: fcvt s19, h22 +; CHECK-GI-NOFP16-NEXT: mov h20, v0.h[6] +; CHECK-GI-NOFP16-NEXT: mov h22, v2.h[6] +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] +; CHECK-GI-NOFP16-NEXT: mov h2, v2.h[7] +; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fcvt s4, h17 +; CHECK-GI-NOFP16-NEXT: fcvt s17, h21 +; CHECK-GI-NOFP16-NEXT: mov h21, v1.h[6] +; CHECK-GI-NOFP16-NEXT: fcvt h7, s7 +; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7] +; CHECK-GI-NOFP16-NEXT: fmadd s5, s5, s16, s18 +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 +; CHECK-GI-NOFP16-NEXT: fmadd s4, s4, s17, s19 +; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0] +; CHECK-GI-NOFP16-NEXT: fcvt s6, h20 +; CHECK-GI-NOFP16-NEXT: fcvt s16, h21 +; CHECK-GI-NOFP16-NEXT: fcvt s17, h22 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fcvt h5, s5 +; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v7.h[0] +; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 +; CHECK-GI-NOFP16-NEXT: fmadd s6, s6, s16, s17 +; CHECK-GI-NOFP16-NEXT: fmadd s0, s0, s1, s2 +; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v5.h[0] +; CHECK-GI-NOFP16-NEXT: fcvt h5, s6 +; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 +; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v4.h[0] +; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v0.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.16b, v3.16b +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fma_v8f16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h +; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b +; CHECK-GI-FP16-NEXT: ret +entry: + %d = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) + ret <8 x half> %d +} + +define <16 x half> @fma_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c) { +; CHECK-SD-NOFP16-LABEL: fma_v16f16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: mov h6, v4.h[1] +; CHECK-SD-NOFP16-NEXT: mov h7, v2.h[1] +; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt s17, h4 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s19, h0 +; CHECK-SD-NOFP16-NEXT: mov h20, v4.h[2] +; CHECK-SD-NOFP16-NEXT: mov h21, v2.h[2] +; CHECK-SD-NOFP16-NEXT: mov h22, v0.h[2] +; CHECK-SD-NOFP16-NEXT: mov h23, v4.h[3] +; CHECK-SD-NOFP16-NEXT: mov h24, v2.h[3] +; CHECK-SD-NOFP16-NEXT: mov h25, v0.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s6, h6 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h7 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h16 +; CHECK-SD-NOFP16-NEXT: fmadd s17, s19, s18, s17 +; CHECK-SD-NOFP16-NEXT: mov h26, v1.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt s27, h5 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h20 +; CHECK-SD-NOFP16-NEXT: fcvt s19, h21 +; CHECK-SD-NOFP16-NEXT: fcvt s20, h22 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h23 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h24 +; CHECK-SD-NOFP16-NEXT: fcvt s23, h25 +; CHECK-SD-NOFP16-NEXT: fmadd s7, s16, s7, s6 +; CHECK-SD-NOFP16-NEXT: mov h24, v5.h[1] +; CHECK-SD-NOFP16-NEXT: mov h25, v3.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt h6, s17 +; CHECK-SD-NOFP16-NEXT: fcvt s28, h3 +; CHECK-SD-NOFP16-NEXT: fcvt s29, h1 +; CHECK-SD-NOFP16-NEXT: fmadd s19, s20, s19, s18 +; CHECK-SD-NOFP16-NEXT: fcvt s26, h26 +; CHECK-SD-NOFP16-NEXT: mov h16, v4.h[4] +; CHECK-SD-NOFP16-NEXT: fmadd s21, s23, s22, s21 +; CHECK-SD-NOFP16-NEXT: mov h22, v3.h[2] +; CHECK-SD-NOFP16-NEXT: mov h23, v1.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt h20, s7 +; CHECK-SD-NOFP16-NEXT: fcvt s24, h24 +; CHECK-SD-NOFP16-NEXT: fcvt s25, h25 +; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[4] +; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[4] +; CHECK-SD-NOFP16-NEXT: mov h7, v4.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt h19, s19 +; CHECK-SD-NOFP16-NEXT: mov h30, v2.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt s16, h16 +; CHECK-SD-NOFP16-NEXT: fcvt h21, s21 +; CHECK-SD-NOFP16-NEXT: mov h31, v1.h[4] +; CHECK-SD-NOFP16-NEXT: fmadd s24, s26, s25, s24 +; CHECK-SD-NOFP16-NEXT: fmadd s25, s29, s28, s27 +; CHECK-SD-NOFP16-NEXT: mov v6.h[1], v20.h[0] +; CHECK-SD-NOFP16-NEXT: mov h20, v5.h[2] +; CHECK-SD-NOFP16-NEXT: mov h26, v5.h[3] +; CHECK-SD-NOFP16-NEXT: mov h27, v3.h[3] +; CHECK-SD-NOFP16-NEXT: mov h28, v1.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s17, h17 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s29, h7 +; CHECK-SD-NOFP16-NEXT: fcvt s30, h30 +; CHECK-SD-NOFP16-NEXT: mov v6.h[2], v19.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h24, s24 +; CHECK-SD-NOFP16-NEXT: fcvt h7, s25 +; CHECK-SD-NOFP16-NEXT: fcvt s19, h20 +; CHECK-SD-NOFP16-NEXT: fcvt s20, h22 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h23 +; CHECK-SD-NOFP16-NEXT: fmadd s16, s18, s17, s16 +; CHECK-SD-NOFP16-NEXT: mov h23, v0.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt s25, h26 +; CHECK-SD-NOFP16-NEXT: fcvt s26, h27 +; CHECK-SD-NOFP16-NEXT: fcvt s27, h28 +; CHECK-SD-NOFP16-NEXT: mov h18, v4.h[6] +; CHECK-SD-NOFP16-NEXT: mov v6.h[3], v21.h[0] +; CHECK-SD-NOFP16-NEXT: mov v7.h[1], v24.h[0] +; CHECK-SD-NOFP16-NEXT: mov h24, v5.h[5] +; CHECK-SD-NOFP16-NEXT: fmadd s19, s22, s20, s19 +; CHECK-SD-NOFP16-NEXT: mov h20, v5.h[4] +; CHECK-SD-NOFP16-NEXT: mov h22, v3.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt s23, h23 +; CHECK-SD-NOFP16-NEXT: mov h28, v0.h[6] +; CHECK-SD-NOFP16-NEXT: fcvt h16, s16 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h18 +; CHECK-SD-NOFP16-NEXT: mov h4, v4.h[7] +; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7] +; CHECK-SD-NOFP16-NEXT: fcvt s20, h20 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h22 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h31 +; CHECK-SD-NOFP16-NEXT: fmadd s17, s23, s30, s29 +; CHECK-SD-NOFP16-NEXT: fmadd s23, s27, s26, s25 +; CHECK-SD-NOFP16-NEXT: fcvt h19, s19 +; CHECK-SD-NOFP16-NEXT: mov h25, v3.h[5] +; CHECK-SD-NOFP16-NEXT: mov h26, v1.h[5] +; CHECK-SD-NOFP16-NEXT: mov h27, v2.h[6] +; CHECK-SD-NOFP16-NEXT: mov h29, v1.h[6] +; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7] +; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7] +; CHECK-SD-NOFP16-NEXT: fmadd s20, s22, s21, s20 +; CHECK-SD-NOFP16-NEXT: mov h21, v5.h[6] +; CHECK-SD-NOFP16-NEXT: mov h22, v3.h[6] +; CHECK-SD-NOFP16-NEXT: mov v7.h[2], v19.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h19, s23 +; CHECK-SD-NOFP16-NEXT: fcvt s23, h24 +; CHECK-SD-NOFP16-NEXT: fcvt s24, h25 +; CHECK-SD-NOFP16-NEXT: fcvt s25, h26 +; CHECK-SD-NOFP16-NEXT: fcvt s26, h27 +; CHECK-SD-NOFP16-NEXT: fcvt s27, h28 +; CHECK-SD-NOFP16-NEXT: fcvt s28, h29 +; CHECK-SD-NOFP16-NEXT: mov h5, v5.h[7] +; CHECK-SD-NOFP16-NEXT: fcvt s21, h21 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h22 +; CHECK-SD-NOFP16-NEXT: mov h3, v3.h[7] +; CHECK-SD-NOFP16-NEXT: mov v7.h[3], v19.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h19, s20 +; CHECK-SD-NOFP16-NEXT: mov v6.h[4], v16.h[0] +; CHECK-SD-NOFP16-NEXT: fmadd s20, s25, s24, s23 +; CHECK-SD-NOFP16-NEXT: fcvt h16, s17 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fmadd s18, s27, s26, s18 +; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fmadd s21, s28, s22, s21 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: mov v7.h[4], v19.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt h17, s20 +; CHECK-SD-NOFP16-NEXT: mov v6.h[5], v16.h[0] +; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s2, s4 +; CHECK-SD-NOFP16-NEXT: fcvt h2, s18 +; CHECK-SD-NOFP16-NEXT: fcvt h4, s21 +; CHECK-SD-NOFP16-NEXT: fmadd s1, s1, s3, s5 +; CHECK-SD-NOFP16-NEXT: mov v7.h[5], v17.h[0] +; CHECK-SD-NOFP16-NEXT: mov v6.h[6], v2.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 +; CHECK-SD-NOFP16-NEXT: mov v7.h[6], v4.h[0] +; CHECK-SD-NOFP16-NEXT: mov v6.h[7], v0.h[0] +; CHECK-SD-NOFP16-NEXT: mov v7.h[7], v1.h[0] +; CHECK-SD-NOFP16-NEXT: mov v0.16b, v6.16b +; CHECK-SD-NOFP16-NEXT: mov v1.16b, v7.16b +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fma_v16f16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h +; CHECK-SD-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h +; CHECK-SD-FP16-NEXT: mov v0.16b, v4.16b +; CHECK-SD-FP16-NEXT: mov v1.16b, v5.16b +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fma_v16f16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NOFP16-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NOFP16-NEXT: .cfi_offset b8, -16 +; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[1] +; CHECK-GI-NOFP16-NEXT: fcvt s16, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s17, h2 +; CHECK-GI-NOFP16-NEXT: fcvt s18, h4 +; CHECK-GI-NOFP16-NEXT: mov h19, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h20, v2.h[1] +; CHECK-GI-NOFP16-NEXT: mov h21, v4.h[1] +; CHECK-GI-NOFP16-NEXT: mov h22, v0.h[3] +; CHECK-GI-NOFP16-NEXT: mov h23, v2.h[2] +; CHECK-GI-NOFP16-NEXT: mov h24, v4.h[2] +; CHECK-GI-NOFP16-NEXT: mov h26, v2.h[3] +; CHECK-GI-NOFP16-NEXT: mov h27, v4.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 +; CHECK-GI-NOFP16-NEXT: mov h25, v0.h[4] +; CHECK-GI-NOFP16-NEXT: mov h28, v1.h[1] +; CHECK-GI-NOFP16-NEXT: fmadd s16, s16, s17, s18 +; CHECK-GI-NOFP16-NEXT: fcvt s17, h20 +; CHECK-GI-NOFP16-NEXT: fcvt s19, h19 +; CHECK-GI-NOFP16-NEXT: fcvt s18, h21 +; CHECK-GI-NOFP16-NEXT: fcvt s20, h23 +; CHECK-GI-NOFP16-NEXT: fcvt s22, h22 +; CHECK-GI-NOFP16-NEXT: fcvt s21, h24 +; CHECK-GI-NOFP16-NEXT: fcvt s23, h26 +; CHECK-GI-NOFP16-NEXT: fcvt s24, h27 +; CHECK-GI-NOFP16-NEXT: fcvt s26, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s27, h3 +; CHECK-GI-NOFP16-NEXT: fcvt s29, h5 +; CHECK-GI-NOFP16-NEXT: mov h31, v2.h[4] +; CHECK-GI-NOFP16-NEXT: mov h8, v3.h[1] +; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2] +; CHECK-GI-NOFP16-NEXT: fmadd s17, s6, s17, s18 +; CHECK-GI-NOFP16-NEXT: fcvt h6, s16 +; CHECK-GI-NOFP16-NEXT: fcvt s16, h28 +; CHECK-GI-NOFP16-NEXT: fmadd s19, s19, s20, s21 +; CHECK-GI-NOFP16-NEXT: fmadd s18, s22, s23, s24 +; CHECK-GI-NOFP16-NEXT: mov h20, v5.h[1] +; CHECK-GI-NOFP16-NEXT: fmadd s24, s26, s27, s29 +; CHECK-GI-NOFP16-NEXT: mov h22, v4.h[4] +; CHECK-GI-NOFP16-NEXT: mov h21, v3.h[2] +; CHECK-GI-NOFP16-NEXT: mov h26, v5.h[2] +; CHECK-GI-NOFP16-NEXT: fcvt s25, h25 +; CHECK-GI-NOFP16-NEXT: fcvt s28, h31 +; CHECK-GI-NOFP16-NEXT: fcvt h29, s17 +; CHECK-GI-NOFP16-NEXT: fcvt s17, h8 +; CHECK-GI-NOFP16-NEXT: mov h30, v1.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s20, h20 +; CHECK-GI-NOFP16-NEXT: mov h23, v3.h[3] +; CHECK-GI-NOFP16-NEXT: mov h27, v5.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s22, h22 +; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 +; CHECK-GI-NOFP16-NEXT: fcvt s21, h21 +; CHECK-GI-NOFP16-NEXT: fcvt s26, h26 +; CHECK-GI-NOFP16-NEXT: mov h31, v0.h[5] +; CHECK-GI-NOFP16-NEXT: mov h8, v1.h[4] +; CHECK-GI-NOFP16-NEXT: fcvt s30, h30 +; CHECK-GI-NOFP16-NEXT: fcvt h19, s19 +; CHECK-GI-NOFP16-NEXT: mov v6.h[1], v29.h[0] +; CHECK-GI-NOFP16-NEXT: fmadd s20, s16, s17, s20 +; CHECK-GI-NOFP16-NEXT: fcvt s23, h23 +; CHECK-GI-NOFP16-NEXT: fcvt s27, h27 +; CHECK-GI-NOFP16-NEXT: fmadd s16, s25, s28, s22 +; CHECK-GI-NOFP16-NEXT: mov h22, v2.h[5] +; CHECK-GI-NOFP16-NEXT: mov h25, v4.h[5] +; CHECK-GI-NOFP16-NEXT: fmadd s21, s7, s21, s26 +; CHECK-GI-NOFP16-NEXT: mov h26, v3.h[4] +; CHECK-GI-NOFP16-NEXT: mov h28, v5.h[4] +; CHECK-GI-NOFP16-NEXT: fcvt h7, s24 +; CHECK-GI-NOFP16-NEXT: fcvt s24, h31 +; CHECK-GI-NOFP16-NEXT: mov h29, v1.h[5] +; CHECK-GI-NOFP16-NEXT: fmadd s17, s30, s23, s27 +; CHECK-GI-NOFP16-NEXT: fcvt h20, s20 +; CHECK-GI-NOFP16-NEXT: fcvt s27, h8 +; CHECK-GI-NOFP16-NEXT: fcvt s22, h22 +; CHECK-GI-NOFP16-NEXT: fcvt s25, h25 +; CHECK-GI-NOFP16-NEXT: fcvt h18, s18 +; CHECK-GI-NOFP16-NEXT: fcvt s26, h26 +; CHECK-GI-NOFP16-NEXT: fcvt s28, h28 +; CHECK-GI-NOFP16-NEXT: mov v6.h[2], v19.h[0] +; CHECK-GI-NOFP16-NEXT: fcvt h21, s21 +; CHECK-GI-NOFP16-NEXT: mov h23, v0.h[6] +; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[6] +; CHECK-GI-NOFP16-NEXT: mov v7.h[1], v20.h[0] +; CHECK-GI-NOFP16-NEXT: mov h20, v3.h[5] +; CHECK-GI-NOFP16-NEXT: fcvt h17, s17 +; CHECK-GI-NOFP16-NEXT: fmadd s22, s24, s22, s25 +; CHECK-GI-NOFP16-NEXT: mov h24, v5.h[5] +; CHECK-GI-NOFP16-NEXT: mov h25, v2.h[6] +; CHECK-GI-NOFP16-NEXT: fmadd s26, s27, s26, s28 +; CHECK-GI-NOFP16-NEXT: mov h27, v4.h[6] +; CHECK-GI-NOFP16-NEXT: mov h28, v3.h[6] +; CHECK-GI-NOFP16-NEXT: mov v6.h[3], v18.h[0] +; CHECK-GI-NOFP16-NEXT: mov h18, v5.h[6] +; CHECK-GI-NOFP16-NEXT: fcvt h16, s16 +; CHECK-GI-NOFP16-NEXT: mov v7.h[2], v21.h[0] +; CHECK-GI-NOFP16-NEXT: fcvt s21, h29 +; CHECK-GI-NOFP16-NEXT: fcvt s20, h20 +; CHECK-GI-NOFP16-NEXT: fcvt s24, h24 +; CHECK-GI-NOFP16-NEXT: fcvt s23, h23 +; CHECK-GI-NOFP16-NEXT: fcvt s25, h25 +; CHECK-GI-NOFP16-NEXT: fcvt s27, h27 +; CHECK-GI-NOFP16-NEXT: fcvt s19, h19 +; CHECK-GI-NOFP16-NEXT: fcvt s28, h28 +; CHECK-GI-NOFP16-NEXT: fcvt s18, h18 +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] +; CHECK-GI-NOFP16-NEXT: mov h2, v2.h[7] +; CHECK-GI-NOFP16-NEXT: mov v7.h[3], v17.h[0] +; CHECK-GI-NOFP16-NEXT: fcvt h17, s26 +; CHECK-GI-NOFP16-NEXT: mov h4, v4.h[7] +; CHECK-GI-NOFP16-NEXT: fmadd s20, s21, s20, s24 +; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7] +; CHECK-GI-NOFP16-NEXT: mov h3, v3.h[7] +; CHECK-GI-NOFP16-NEXT: fmadd s21, s23, s25, s27 +; CHECK-GI-NOFP16-NEXT: mov h5, v5.h[7] +; CHECK-GI-NOFP16-NEXT: mov v6.h[4], v16.h[0] +; CHECK-GI-NOFP16-NEXT: fmadd s18, s19, s28, s18 +; CHECK-GI-NOFP16-NEXT: fcvt h16, s22 +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: mov v7.h[4], v17.h[0] +; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 +; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 +; CHECK-GI-NOFP16-NEXT: fcvt h17, s20 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 +; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 +; CHECK-GI-NOFP16-NEXT: mov v6.h[5], v16.h[0] +; CHECK-GI-NOFP16-NEXT: fmadd s0, s0, s2, s4 +; CHECK-GI-NOFP16-NEXT: fcvt h2, s21 +; CHECK-GI-NOFP16-NEXT: mov v7.h[5], v17.h[0] +; CHECK-GI-NOFP16-NEXT: fmadd s1, s1, s3, s5 +; CHECK-GI-NOFP16-NEXT: fcvt h3, s18 +; CHECK-GI-NOFP16-NEXT: mov v6.h[6], v2.h[0] +; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 +; CHECK-GI-NOFP16-NEXT: mov v7.h[6], v3.h[0] +; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 +; CHECK-GI-NOFP16-NEXT: mov v6.h[7], v0.h[0] +; CHECK-GI-NOFP16-NEXT: mov v7.h[7], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.16b, v6.16b +; CHECK-GI-NOFP16-NEXT: mov v1.16b, v7.16b +; CHECK-GI-NOFP16-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fma_v16f16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h +; CHECK-GI-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h +; CHECK-GI-FP16-NEXT: mov v0.16b, v4.16b +; CHECK-GI-FP16-NEXT: mov v1.16b, v5.16b +; CHECK-GI-FP16-NEXT: ret +entry: + %d = call <16 x half> @llvm.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c) + ret <16 x half> %d +} + +define double @fmuladd_f64(double %a, double %b, double %c) { +; CHECK-LABEL: fmuladd_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmadd d0, d0, d1, d2 +; CHECK-NEXT: ret +entry: + %d = call double @llvm.fmuladd.f64(double %a, double %b, double %c) + ret double %d +} + +define float @fmuladd_f32(float %a, float %b, float %c) { +; CHECK-LABEL: fmuladd_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmadd s0, s0, s1, s2 +; CHECK-NEXT: ret +entry: + %d = call float @llvm.fmuladd.f32(float %a, float %b, float %c) + ret float %d +} + +define half @fmuladd_f16(half %a, half %b, half %c) { +; CHECK-SD-NOFP16-LABEL: fmuladd_f16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h2 +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s1 +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fmuladd_f16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fmadd h0, h0, h1, h2 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fmuladd_f16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h2 +; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1 +; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fmuladd_f16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fmadd h0, h0, h1, h2 +; CHECK-GI-FP16-NEXT: ret +entry: + %d = call half @llvm.fmuladd.f16(half %a, half %b, half %c) + ret half %d +} + +define <2 x double> @fmuladd_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; CHECK-LABEL: fmuladd_v2f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmla v2.2d, v1.2d, v0.2d +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret +entry: + %d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) + ret <2 x double> %d +} + +define <3 x double> @fmuladd_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) { +; CHECK-LABEL: fmuladd_v3f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d6 killed $d6 def $q6 +; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d7 killed $d7 def $q7 +; CHECK-NEXT: // kill: def $d4 killed $d4 def $q4 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d5 killed $d5 def $q5 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v3.d[1], v4.d[0] +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: mov v6.d[1], v7.d[0] +; CHECK-NEXT: fmla v6.2d, v3.2d, v0.2d +; CHECK-NEXT: ldr d3, [sp] +; CHECK-NEXT: fmla v3.2d, v5.2d, v2.2d +; CHECK-NEXT: fmov d0, d6 +; CHECK-NEXT: ext v1.16b, v6.16b, v6.16b, #8 +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-NEXT: fmov d2, d3 +; CHECK-NEXT: ret +entry: + %d = call <3 x double> @llvm.fmuladd.v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) + ret <3 x double> %d +} + +define <4 x double> @fmuladd_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) { +; CHECK-LABEL: fmuladd_v4f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmla v4.2d, v2.2d, v0.2d +; CHECK-NEXT: fmla v5.2d, v3.2d, v1.2d +; CHECK-NEXT: mov v0.16b, v4.16b +; CHECK-NEXT: mov v1.16b, v5.16b +; CHECK-NEXT: ret +entry: + %d = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) + ret <4 x double> %d +} + +define <2 x float> @fmuladd_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) { +; CHECK-LABEL: fmuladd_v2f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmla v2.2s, v1.2s, v0.2s +; CHECK-NEXT: fmov d0, d2 +; CHECK-NEXT: ret +entry: + %d = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) + ret <2 x float> %d +} + +define <3 x float> @fmuladd_v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c) { +; CHECK-LABEL: fmuladd_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmla v2.4s, v1.4s, v0.4s +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret +entry: + %d = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c) + ret <3 x float> %d +} + +define <4 x float> @fmuladd_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: fmuladd_v4f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmla v2.4s, v1.4s, v0.4s +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret +entry: + %d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) + ret <4 x float> %d +} + +define <8 x float> @fmuladd_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) { +; CHECK-LABEL: fmuladd_v8f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmla v4.4s, v2.4s, v0.4s +; CHECK-NEXT: fmla v5.4s, v3.4s, v1.4s +; CHECK-NEXT: mov v0.16b, v4.16b +; CHECK-NEXT: mov v1.16b, v5.16b +; CHECK-NEXT: ret +entry: + %d = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) + ret <8 x float> %d +} + +define <7 x half> @fmuladd_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) { +; CHECK-SD-NOFP16-LABEL: fmuladd_v7f16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: mov h3, v1.h[1] +; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt s5, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h0 +; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[2] +; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[2] +; CHECK-SD-NOFP16-NEXT: mov h17, v0.h[3] +; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt s19, h2 +; CHECK-SD-NOFP16-NEXT: mov h20, v2.h[2] +; CHECK-SD-NOFP16-NEXT: mov h21, v1.h[5] +; CHECK-SD-NOFP16-NEXT: mov h22, v0.h[6] +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fmul s5, s6, s5 +; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s7, h7 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h16 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h21 +; CHECK-SD-NOFP16-NEXT: fcvt s20, h20 +; CHECK-SD-NOFP16-NEXT: fmul s3, s4, s3 +; CHECK-SD-NOFP16-NEXT: fcvt h4, s5 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h6 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h17 +; CHECK-SD-NOFP16-NEXT: fmul s7, s16, s7 +; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[1] +; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt h3, s3 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fmul s5, s6, s5 +; CHECK-SD-NOFP16-NEXT: mov h6, v0.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt s17, h17 +; CHECK-SD-NOFP16-NEXT: fcvt h7, s7 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h16 +; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7] +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: fadd s4, s4, s19 +; CHECK-SD-NOFP16-NEXT: mov h19, v2.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s6, h6 +; CHECK-SD-NOFP16-NEXT: fcvt h5, s5 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h7 +; CHECK-SD-NOFP16-NEXT: fmul s16, s18, s16 +; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[6] +; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7] +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fadd s17, s3, s17 +; CHECK-SD-NOFP16-NEXT: fmul s6, s6, s21 +; CHECK-SD-NOFP16-NEXT: fcvt s19, h19 +; CHECK-SD-NOFP16-NEXT: fcvt h3, s4 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: fadd s4, s7, s20 +; CHECK-SD-NOFP16-NEXT: fcvt h16, s16 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s20, h22 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt h7, s17 +; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt h6, s6 +; CHECK-SD-NOFP16-NEXT: fadd s5, s5, s19 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h16 +; CHECK-SD-NOFP16-NEXT: fcvt h4, s4 +; CHECK-SD-NOFP16-NEXT: fmul s18, s20, s18 +; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NOFP16-NEXT: mov h1, v2.h[6] +; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v7.h[0] +; CHECK-SD-NOFP16-NEXT: mov h7, v2.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt s17, h17 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h6 +; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7] +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h7 +; CHECK-SD-NOFP16-NEXT: fadd s16, s16, s17 +; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v4.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h4, s5 +; CHECK-SD-NOFP16-NEXT: fcvt h5, s18 +; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fadd s6, s6, s7 +; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v4.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h4, s16 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2 +; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v4.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h4, s6 +; CHECK-SD-NOFP16-NEXT: fadd s1, s5, s1 +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v4.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 +; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v1.h[0] +; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v0.h[0] +; CHECK-SD-NOFP16-NEXT: mov v0.16b, v3.16b +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fmuladd_v7f16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h +; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fmuladd_v7f16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[4] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[5] +; CHECK-GI-NOFP16-NEXT: mov h5, v1.h[4] +; CHECK-GI-NOFP16-NEXT: mov h6, v1.h[5] +; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v16.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] +; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[6] +; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v4.h[0] +; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v6.h[0] +; CHECK-GI-NOFP16-NEXT: fmul v4.4s, v7.4s, v16.4s +; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v2.4h +; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v0.h[0] +; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v1.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v4.4s +; CHECK-GI-NOFP16-NEXT: mov h1, v2.h[4] +; CHECK-GI-NOFP16-NEXT: mov h4, v2.h[5] +; CHECK-GI-NOFP16-NEXT: mov h2, v2.h[6] +; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v0.h[0] +; CHECK-GI-NOFP16-NEXT: mov v5.h[3], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v5.4h +; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v6.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0] +; CHECK-GI-NOFP16-NEXT: fmul v2.4s, v3.4s, v4.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v2.4s +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[0] +; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] +; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0] +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fmuladd_v7f16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h +; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b +; CHECK-GI-FP16-NEXT: ret +entry: + %d = call <7 x half> @llvm.fmuladd.v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) + ret <7 x half> %d +} + +define <4 x half> @fmuladd_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) { +; CHECK-SD-NOFP16-LABEL: fmuladd_v4f16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s +; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v2.4h +; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fmuladd_v4f16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h +; CHECK-SD-FP16-NEXT: fmov d0, d2 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fmuladd_v4f16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fmuladd_v4f16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h +; CHECK-GI-FP16-NEXT: fmov d0, d2 +; CHECK-GI-FP16-NEXT: ret +entry: + %d = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) + ret <4 x half> %d +} + +define <8 x half> @fmuladd_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; CHECK-SD-NOFP16-LABEL: fmuladd_v8f16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: mov h3, v1.h[1] +; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt s5, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h0 +; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[2] +; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[2] +; CHECK-SD-NOFP16-NEXT: mov h17, v0.h[3] +; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt s19, h2 +; CHECK-SD-NOFP16-NEXT: mov h20, v2.h[2] +; CHECK-SD-NOFP16-NEXT: mov h21, v1.h[5] +; CHECK-SD-NOFP16-NEXT: mov h22, v0.h[6] +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fmul s5, s6, s5 +; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s7, h7 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h16 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h21 +; CHECK-SD-NOFP16-NEXT: fcvt s20, h20 +; CHECK-SD-NOFP16-NEXT: fmul s3, s4, s3 +; CHECK-SD-NOFP16-NEXT: fcvt h4, s5 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h6 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h17 +; CHECK-SD-NOFP16-NEXT: fmul s7, s16, s7 +; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[1] +; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt h3, s3 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fmul s5, s6, s5 +; CHECK-SD-NOFP16-NEXT: mov h6, v0.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt s17, h17 +; CHECK-SD-NOFP16-NEXT: fcvt h7, s7 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h16 +; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7] +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: fadd s4, s4, s19 +; CHECK-SD-NOFP16-NEXT: mov h19, v2.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s6, h6 +; CHECK-SD-NOFP16-NEXT: fcvt h5, s5 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h7 +; CHECK-SD-NOFP16-NEXT: fmul s16, s18, s16 +; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[6] +; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7] +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fadd s17, s3, s17 +; CHECK-SD-NOFP16-NEXT: fmul s6, s6, s21 +; CHECK-SD-NOFP16-NEXT: fcvt s19, h19 +; CHECK-SD-NOFP16-NEXT: fcvt h3, s4 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: fadd s4, s7, s20 +; CHECK-SD-NOFP16-NEXT: fcvt h16, s16 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s20, h22 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt h7, s17 +; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt h6, s6 +; CHECK-SD-NOFP16-NEXT: fadd s5, s5, s19 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h16 +; CHECK-SD-NOFP16-NEXT: fcvt h4, s4 +; CHECK-SD-NOFP16-NEXT: fmul s18, s20, s18 +; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NOFP16-NEXT: mov h1, v2.h[6] +; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v7.h[0] +; CHECK-SD-NOFP16-NEXT: mov h7, v2.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt s17, h17 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h6 +; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7] +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h7 +; CHECK-SD-NOFP16-NEXT: fadd s16, s16, s17 +; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v4.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h4, s5 +; CHECK-SD-NOFP16-NEXT: fcvt h5, s18 +; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fadd s6, s6, s7 +; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v4.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h4, s16 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2 +; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v4.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h4, s6 +; CHECK-SD-NOFP16-NEXT: fadd s1, s5, s1 +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v4.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 +; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v1.h[0] +; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v0.h[0] +; CHECK-SD-NOFP16-NEXT: mov v0.16b, v3.16b +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fmuladd_v8f16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h +; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fmuladd_v8f16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s +; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v2.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v0.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fmuladd_v8f16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h +; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b +; CHECK-GI-FP16-NEXT: ret +entry: + %d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) + ret <8 x half> %d +} + +define <16 x half> @fmuladd_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c) { +; CHECK-SD-NOFP16-LABEL: fmuladd_v16f16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: mov h6, v2.h[1] +; CHECK-SD-NOFP16-NEXT: mov h7, v0.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt s16, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s17, h0 +; CHECK-SD-NOFP16-NEXT: mov h18, v2.h[2] +; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[2] +; CHECK-SD-NOFP16-NEXT: mov h20, v2.h[3] +; CHECK-SD-NOFP16-NEXT: mov h21, v0.h[3] +; CHECK-SD-NOFP16-NEXT: mov h24, v3.h[1] +; CHECK-SD-NOFP16-NEXT: mov h25, v1.h[1] +; CHECK-SD-NOFP16-NEXT: mov h26, v1.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s27, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h6 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h7 +; CHECK-SD-NOFP16-NEXT: mov h29, v1.h[4] +; CHECK-SD-NOFP16-NEXT: fmul s16, s17, s16 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s19, h19 +; CHECK-SD-NOFP16-NEXT: mov h17, v4.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt s20, h20 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h21 +; CHECK-SD-NOFP16-NEXT: fcvt s24, h24 +; CHECK-SD-NOFP16-NEXT: fcvt s25, h25 +; CHECK-SD-NOFP16-NEXT: mov h30, v1.h[6] +; CHECK-SD-NOFP16-NEXT: fmul s6, s7, s6 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h4 +; CHECK-SD-NOFP16-NEXT: fmul s23, s19, s18 +; CHECK-SD-NOFP16-NEXT: fcvt h22, s16 +; CHECK-SD-NOFP16-NEXT: mov h18, v2.h[4] +; CHECK-SD-NOFP16-NEXT: fmul s20, s21, s20 +; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[4] +; CHECK-SD-NOFP16-NEXT: mov h16, v4.h[2] +; CHECK-SD-NOFP16-NEXT: fmul s24, s25, s24 +; CHECK-SD-NOFP16-NEXT: fcvt s17, h17 +; CHECK-SD-NOFP16-NEXT: mov h25, v3.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt h6, s6 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h22 +; CHECK-SD-NOFP16-NEXT: fcvt h22, s23 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s19, h19 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h16 +; CHECK-SD-NOFP16-NEXT: fcvt h20, s20 +; CHECK-SD-NOFP16-NEXT: fcvt s23, h6 +; CHECK-SD-NOFP16-NEXT: mov h6, v4.h[3] +; CHECK-SD-NOFP16-NEXT: fadd s7, s21, s7 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h22 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h3 +; CHECK-SD-NOFP16-NEXT: fmul s18, s19, s18 +; CHECK-SD-NOFP16-NEXT: fcvt s19, h20 +; CHECK-SD-NOFP16-NEXT: fadd s17, s23, s17 +; CHECK-SD-NOFP16-NEXT: mov h23, v3.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s28, h6 +; CHECK-SD-NOFP16-NEXT: fmul s22, s27, s22 +; CHECK-SD-NOFP16-NEXT: fcvt h6, s7 +; CHECK-SD-NOFP16-NEXT: fadd s7, s21, s16 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h26 +; CHECK-SD-NOFP16-NEXT: mov h26, v1.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt h18, s18 +; CHECK-SD-NOFP16-NEXT: mov h27, v0.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt h16, s17 +; CHECK-SD-NOFP16-NEXT: mov h17, v4.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt s20, h23 +; CHECK-SD-NOFP16-NEXT: fadd s19, s19, s28 +; CHECK-SD-NOFP16-NEXT: fcvt h7, s7 +; CHECK-SD-NOFP16-NEXT: mov h23, v2.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt s18, h18 +; CHECK-SD-NOFP16-NEXT: fmul s20, s21, s20 +; CHECK-SD-NOFP16-NEXT: mov v6.h[1], v16.h[0] +; CHECK-SD-NOFP16-NEXT: mov h16, v5.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt s17, h17 +; CHECK-SD-NOFP16-NEXT: fcvt h21, s22 +; CHECK-SD-NOFP16-NEXT: fcvt h22, s24 +; CHECK-SD-NOFP16-NEXT: fcvt s24, h25 +; CHECK-SD-NOFP16-NEXT: fcvt s25, h26 +; CHECK-SD-NOFP16-NEXT: mov h26, v5.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt h19, s19 +; CHECK-SD-NOFP16-NEXT: mov v6.h[2], v7.h[0] +; CHECK-SD-NOFP16-NEXT: mov h7, v3.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt s28, h16 +; CHECK-SD-NOFP16-NEXT: fcvt h20, s20 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h21 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h22 +; CHECK-SD-NOFP16-NEXT: fadd s16, s18, s17 +; CHECK-SD-NOFP16-NEXT: fmul s18, s25, s24 +; CHECK-SD-NOFP16-NEXT: fcvt s17, h5 +; CHECK-SD-NOFP16-NEXT: mov h24, v0.h[6] +; CHECK-SD-NOFP16-NEXT: fcvt s25, h26 +; CHECK-SD-NOFP16-NEXT: fcvt s26, h27 +; CHECK-SD-NOFP16-NEXT: mov v6.h[3], v19.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt s19, h23 +; CHECK-SD-NOFP16-NEXT: mov h23, v2.h[6] +; CHECK-SD-NOFP16-NEXT: fcvt s20, h20 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h7 +; CHECK-SD-NOFP16-NEXT: fcvt s27, h29 +; CHECK-SD-NOFP16-NEXT: fadd s22, s22, s28 +; CHECK-SD-NOFP16-NEXT: fadd s17, s21, s17 +; CHECK-SD-NOFP16-NEXT: fcvt h18, s18 +; CHECK-SD-NOFP16-NEXT: mov h21, v5.h[3] +; CHECK-SD-NOFP16-NEXT: mov h28, v3.h[5] +; CHECK-SD-NOFP16-NEXT: mov h29, v1.h[5] +; CHECK-SD-NOFP16-NEXT: fmul s19, s26, s19 +; CHECK-SD-NOFP16-NEXT: fcvt s23, h23 +; CHECK-SD-NOFP16-NEXT: fcvt s24, h24 +; CHECK-SD-NOFP16-NEXT: fadd s20, s20, s25 +; CHECK-SD-NOFP16-NEXT: fmul s25, s27, s7 +; CHECK-SD-NOFP16-NEXT: mov h27, v3.h[6] +; CHECK-SD-NOFP16-NEXT: fcvt h22, s22 +; CHECK-SD-NOFP16-NEXT: fcvt h7, s17 +; CHECK-SD-NOFP16-NEXT: fcvt s17, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h21 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h28 +; CHECK-SD-NOFP16-NEXT: fcvt s28, h29 +; CHECK-SD-NOFP16-NEXT: fmul s23, s24, s23 +; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7] +; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7] +; CHECK-SD-NOFP16-NEXT: fcvt s24, h27 +; CHECK-SD-NOFP16-NEXT: fcvt s26, h30 +; CHECK-SD-NOFP16-NEXT: fcvt h20, s20 +; CHECK-SD-NOFP16-NEXT: mov v7.h[1], v22.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h22, s25 +; CHECK-SD-NOFP16-NEXT: mov h25, v5.h[4] +; CHECK-SD-NOFP16-NEXT: fadd s17, s17, s18 +; CHECK-SD-NOFP16-NEXT: fmul s18, s28, s21 +; CHECK-SD-NOFP16-NEXT: mov h3, v3.h[7] +; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7] +; CHECK-SD-NOFP16-NEXT: fcvt h19, s19 +; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 +; CHECK-SD-NOFP16-NEXT: fmul s21, s26, s24 +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: mov h24, v4.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt s22, h22 +; CHECK-SD-NOFP16-NEXT: fcvt s25, h25 +; CHECK-SD-NOFP16-NEXT: mov v7.h[2], v20.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h17, s17 +; CHECK-SD-NOFP16-NEXT: fcvt h18, s18 +; CHECK-SD-NOFP16-NEXT: mov h20, v5.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt h23, s23 +; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s2 +; CHECK-SD-NOFP16-NEXT: mov h2, v4.h[6] +; CHECK-SD-NOFP16-NEXT: fcvt h21, s21 +; CHECK-SD-NOFP16-NEXT: fadd s22, s22, s25 +; CHECK-SD-NOFP16-NEXT: mov h25, v5.h[6] +; CHECK-SD-NOFP16-NEXT: fcvt h16, s16 +; CHECK-SD-NOFP16-NEXT: mov v7.h[3], v17.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt s17, h19 +; CHECK-SD-NOFP16-NEXT: fcvt s19, h24 +; CHECK-SD-NOFP16-NEXT: fmul s1, s1, s3 +; CHECK-SD-NOFP16-NEXT: fcvt s3, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h20 +; CHECK-SD-NOFP16-NEXT: fcvt s20, h23 +; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h21 +; CHECK-SD-NOFP16-NEXT: fcvt h22, s22 +; CHECK-SD-NOFP16-NEXT: fcvt s23, h25 +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: fadd s17, s17, s19 +; CHECK-SD-NOFP16-NEXT: mov h4, v4.h[7] +; CHECK-SD-NOFP16-NEXT: mov h5, v5.h[7] +; CHECK-SD-NOFP16-NEXT: fadd s3, s3, s18 +; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 +; CHECK-SD-NOFP16-NEXT: mov v6.h[4], v16.h[0] +; CHECK-SD-NOFP16-NEXT: fadd s2, s20, s2 +; CHECK-SD-NOFP16-NEXT: mov v7.h[4], v22.h[0] +; CHECK-SD-NOFP16-NEXT: fadd s16, s21, s23 +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvt h17, s17 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: fcvt h3, s3 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt h2, s2 +; CHECK-SD-NOFP16-NEXT: mov v6.h[5], v17.h[0] +; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s4 +; CHECK-SD-NOFP16-NEXT: mov v7.h[5], v3.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h3, s16 +; CHECK-SD-NOFP16-NEXT: fadd s1, s1, s5 +; CHECK-SD-NOFP16-NEXT: mov v6.h[6], v2.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: mov v7.h[6], v3.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 +; CHECK-SD-NOFP16-NEXT: mov v6.h[7], v0.h[0] +; CHECK-SD-NOFP16-NEXT: mov v7.h[7], v1.h[0] +; CHECK-SD-NOFP16-NEXT: mov v0.16b, v6.16b +; CHECK-SD-NOFP16-NEXT: mov v1.16b, v7.16b +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fmuladd_v16f16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h +; CHECK-SD-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h +; CHECK-SD-FP16-NEXT: mov v0.16b, v4.16b +; CHECK-SD-FP16-NEXT: mov v1.16b, v5.16b +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fmuladd_v16f16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v2.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v16.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v17.4s, v3.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h +; CHECK-GI-NOFP16-NEXT: fmul v6.4s, v6.4s, v7.4s +; CHECK-GI-NOFP16-NEXT: fmul v7.4s, v16.4s, v17.4s +; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v1.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v6.4s +; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v4.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v4.4s, v4.8h +; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v7.4s +; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v5.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v5.4s, v5.8h +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v2.4s, v6.4s +; CHECK-GI-NOFP16-NEXT: fadd v3.4s, v3.4s, v7.4s +; CHECK-GI-NOFP16-NEXT: fadd v4.4s, v0.4s, v4.4s +; CHECK-GI-NOFP16-NEXT: fadd v5.4s, v1.4s, v5.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fmuladd_v16f16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h +; CHECK-GI-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h +; CHECK-GI-FP16-NEXT: mov v0.16b, v4.16b +; CHECK-GI-FP16-NEXT: mov v1.16b, v5.16b +; CHECK-GI-FP16-NEXT: ret +entry: + %d = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c) + ret <16 x half> %d +} + +define double @fmul_f64(double %a, double %b, double %c) { +; CHECK-LABEL: fmul_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmadd d0, d0, d1, d2 +; CHECK-NEXT: ret +entry: + %d = fmul fast double %a, %b + %e = fadd fast double %d, %c + ret double %e +} + +define float @fmul_f32(float %a, float %b, float %c) { +; CHECK-LABEL: fmul_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmadd s0, s0, s1, s2 +; CHECK-NEXT: ret +entry: + %d = fmul fast float %a, %b + %e = fadd fast float %d, %c + ret float %e +} + +define half @fmul_f16(half %a, half %b, half %c) { +; CHECK-SD-NOFP16-LABEL: fmul_f16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h2 +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s1 +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fmul_f16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fmadd h0, h0, h1, h2 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fmul_f16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h2 +; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1 +; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fmul_f16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fmadd h0, h0, h1, h2 +; CHECK-GI-FP16-NEXT: ret +entry: + %d = fmul fast half %a, %b + %e = fadd fast half %d, %c + ret half %e +} + +define <2 x double> @fmul_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; CHECK-SD-LABEL: fmul_v2f64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmla v2.2d, v1.2d, v0.2d +; CHECK-SD-NEXT: mov v0.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fmul_v2f64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fmla v2.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: mov v0.16b, v2.16b +; CHECK-GI-NEXT: ret +entry: + %d = fmul fast <2 x double> %a, %b + %e = fadd fast <2 x double> %d, %c + ret <2 x double> %e +} + +define <3 x double> @fmul_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) { +; CHECK-SD-LABEL: fmul_v3f64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d6 killed $d6 def $q6 +; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d7 killed $d7 def $q7 +; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: mov v3.d[1], v4.d[0] +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: mov v6.d[1], v7.d[0] +; CHECK-SD-NEXT: fmla v6.2d, v3.2d, v0.2d +; CHECK-SD-NEXT: ldr d3, [sp] +; CHECK-SD-NEXT: fmla v3.2d, v5.2d, v2.2d +; CHECK-SD-NEXT: fmov d0, d6 +; CHECK-SD-NEXT: ext v1.16b, v6.16b, v6.16b, #8 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: fmov d2, d3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fmul_v3f64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-GI-NEXT: // kill: def $d6 killed $d6 def $q6 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4 +; CHECK-GI-NEXT: // kill: def $d7 killed $d7 def $q7 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: mov v3.d[1], v4.d[0] +; CHECK-GI-NEXT: mov v6.d[1], v7.d[0] +; CHECK-GI-NEXT: fmla v6.2d, v0.2d, v3.2d +; CHECK-GI-NEXT: ldr d0, [sp] +; CHECK-GI-NEXT: fmadd d2, d2, d5, d0 +; CHECK-GI-NEXT: mov d1, v6.d[1] +; CHECK-GI-NEXT: fmov d0, d6 +; CHECK-GI-NEXT: ret +entry: + %d = fmul fast <3 x double> %a, %b + %e = fadd fast <3 x double> %d, %c + ret <3 x double> %e +} + +define <4 x double> @fmul_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) { +; CHECK-SD-LABEL: fmul_v4f64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmla v4.2d, v2.2d, v0.2d +; CHECK-SD-NEXT: fmla v5.2d, v3.2d, v1.2d +; CHECK-SD-NEXT: mov v0.16b, v4.16b +; CHECK-SD-NEXT: mov v1.16b, v5.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fmul_v4f64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fmla v4.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: fmla v5.2d, v1.2d, v3.2d +; CHECK-GI-NEXT: mov v0.16b, v4.16b +; CHECK-GI-NEXT: mov v1.16b, v5.16b +; CHECK-GI-NEXT: ret +entry: + %d = fmul fast <4 x double> %a, %b + %e = fadd fast <4 x double> %d, %c + ret <4 x double> %e +} + +define <2 x float> @fmul_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) { +; CHECK-SD-LABEL: fmul_v2f32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmla v2.2s, v1.2s, v0.2s +; CHECK-SD-NEXT: fmov d0, d2 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fmul_v2f32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fmla v2.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: fmov d0, d2 +; CHECK-GI-NEXT: ret +entry: + %d = fmul fast <2 x float> %a, %b + %e = fadd fast <2 x float> %d, %c + ret <2 x float> %e +} + +define <3 x float> @fmul_v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c) { +; CHECK-SD-LABEL: fmul_v3f32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmla v2.4s, v1.4s, v0.4s +; CHECK-SD-NEXT: mov v0.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fmul_v3f32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fmla v2.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: mov v0.16b, v2.16b +; CHECK-GI-NEXT: ret +entry: + %d = fmul fast <3 x float> %a, %b + %e = fadd fast <3 x float> %d, %c + ret <3 x float> %e +} + +define <4 x float> @fmul_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-SD-LABEL: fmul_v4f32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmla v2.4s, v1.4s, v0.4s +; CHECK-SD-NEXT: mov v0.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fmul_v4f32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fmla v2.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: mov v0.16b, v2.16b +; CHECK-GI-NEXT: ret +entry: + %d = fmul fast <4 x float> %a, %b + %e = fadd fast <4 x float> %d, %c + ret <4 x float> %e +} + +define <8 x float> @fmul_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) { +; CHECK-SD-LABEL: fmul_v8f32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmla v4.4s, v2.4s, v0.4s +; CHECK-SD-NEXT: fmla v5.4s, v3.4s, v1.4s +; CHECK-SD-NEXT: mov v0.16b, v4.16b +; CHECK-SD-NEXT: mov v1.16b, v5.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fmul_v8f32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fmla v4.4s, v0.4s, v2.4s +; CHECK-GI-NEXT: fmla v5.4s, v1.4s, v3.4s +; CHECK-GI-NEXT: mov v0.16b, v4.16b +; CHECK-GI-NEXT: mov v1.16b, v5.16b +; CHECK-GI-NEXT: ret +entry: + %d = fmul fast <8 x float> %a, %b + %e = fadd fast <8 x float> %d, %c + ret <8 x float> %e +} + +define <7 x half> @fmul_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) { +; CHECK-SD-NOFP16-LABEL: fmul_v7f16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: mov h3, v1.h[1] +; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt s5, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h0 +; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[2] +; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[2] +; CHECK-SD-NOFP16-NEXT: mov h17, v1.h[3] +; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[3] +; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[4] +; CHECK-SD-NOFP16-NEXT: mov h20, v0.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt s21, h2 +; CHECK-SD-NOFP16-NEXT: mov h22, v2.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fmul s5, s6, s5 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h16 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h18 +; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt s20, h20 +; CHECK-SD-NOFP16-NEXT: fmul s3, s4, s3 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h7 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h17 +; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt h5, s5 +; CHECK-SD-NOFP16-NEXT: fcvt h3, s3 +; CHECK-SD-NOFP16-NEXT: fmul s4, s6, s4 +; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[5] +; CHECK-SD-NOFP16-NEXT: fmul s7, s16, s7 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h17 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: fcvt s17, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h19 +; CHECK-SD-NOFP16-NEXT: mov h19, v2.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: fcvt h4, s4 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h6 +; CHECK-SD-NOFP16-NEXT: fcvt h7, s7 +; CHECK-SD-NOFP16-NEXT: fadd s5, s5, s21 +; CHECK-SD-NOFP16-NEXT: mov h21, v0.h[6] +; CHECK-SD-NOFP16-NEXT: fmul s17, s18, s17 +; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[6] +; CHECK-SD-NOFP16-NEXT: fcvt s19, h19 +; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7] +; CHECK-SD-NOFP16-NEXT: fadd s3, s3, s16 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h22 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fmul s6, s20, s6 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h7 +; CHECK-SD-NOFP16-NEXT: mov h20, v1.h[7] +; CHECK-SD-NOFP16-NEXT: fcvt h1, s5 +; CHECK-SD-NOFP16-NEXT: mov h5, v2.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvt h3, s3 +; CHECK-SD-NOFP16-NEXT: fadd s4, s4, s16 +; CHECK-SD-NOFP16-NEXT: fcvt h16, s17 +; CHECK-SD-NOFP16-NEXT: fcvt s17, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h21 +; CHECK-SD-NOFP16-NEXT: fadd s7, s7, s19 +; CHECK-SD-NOFP16-NEXT: fcvt h6, s6 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v3.h[0] +; CHECK-SD-NOFP16-NEXT: mov h3, v2.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt s16, h16 +; CHECK-SD-NOFP16-NEXT: fcvt h4, s4 +; CHECK-SD-NOFP16-NEXT: fmul s17, s18, s17 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h20 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h6 +; CHECK-SD-NOFP16-NEXT: fcvt h7, s7 +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: fadd s5, s16, s5 +; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v4.h[0] +; CHECK-SD-NOFP16-NEXT: mov h4, v2.h[6] +; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s18 +; CHECK-SD-NOFP16-NEXT: fcvt h16, s17 +; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7] +; CHECK-SD-NOFP16-NEXT: fadd s3, s6, s3 +; CHECK-SD-NOFP16-NEXT: fcvt h5, s5 +; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v7.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h16 +; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 +; CHECK-SD-NOFP16-NEXT: fcvt h3, s3 +; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v5.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fadd s4, s6, s4 +; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v3.h[0] +; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2 +; CHECK-SD-NOFP16-NEXT: fcvt h3, s4 +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v3.h[0] +; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v0.h[0] +; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fmul_v7f16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h +; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fmul_v7f16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[4] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[5] +; CHECK-GI-NOFP16-NEXT: mov h5, v1.h[4] +; CHECK-GI-NOFP16-NEXT: mov h6, v1.h[5] +; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v16.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] +; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[6] +; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v4.h[0] +; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v6.h[0] +; CHECK-GI-NOFP16-NEXT: fmul v4.4s, v7.4s, v16.4s +; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v2.4h +; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v0.h[0] +; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v1.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v4.4s +; CHECK-GI-NOFP16-NEXT: mov h1, v2.h[4] +; CHECK-GI-NOFP16-NEXT: mov h4, v2.h[5] +; CHECK-GI-NOFP16-NEXT: mov h2, v2.h[6] +; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v0.h[0] +; CHECK-GI-NOFP16-NEXT: mov v5.h[3], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v5.4h +; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v6.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0] +; CHECK-GI-NOFP16-NEXT: fmul v2.4s, v3.4s, v4.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v2.4s +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[0] +; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] +; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0] +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fmul_v7f16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fmla v2.8h, v0.8h, v1.8h +; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b +; CHECK-GI-FP16-NEXT: ret +entry: + %d = fmul fast <7 x half> %a, %b + %e = fadd fast <7 x half> %d, %c + ret <7 x half> %e +} + +define <4 x half> @fmul_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) { +; CHECK-SD-NOFP16-LABEL: fmul_v4f16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s +; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v2.4h +; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fmul_v4f16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h +; CHECK-SD-FP16-NEXT: fmov d0, d2 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fmul_v4f16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fmul_v4f16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fmla v2.4h, v0.4h, v1.4h +; CHECK-GI-FP16-NEXT: fmov d0, d2 +; CHECK-GI-FP16-NEXT: ret +entry: + %d = fmul fast <4 x half> %a, %b + %e = fadd fast <4 x half> %d, %c + ret <4 x half> %e +} + +define <8 x half> @fmul_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; CHECK-SD-NOFP16-LABEL: fmul_v8f16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: mov h3, v1.h[1] +; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt s5, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h0 +; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[2] +; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[2] +; CHECK-SD-NOFP16-NEXT: mov h17, v1.h[3] +; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[3] +; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[4] +; CHECK-SD-NOFP16-NEXT: mov h20, v0.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt s21, h2 +; CHECK-SD-NOFP16-NEXT: mov h22, v2.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fmul s5, s6, s5 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h16 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h18 +; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt s20, h20 +; CHECK-SD-NOFP16-NEXT: fmul s3, s4, s3 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h7 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h17 +; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt h5, s5 +; CHECK-SD-NOFP16-NEXT: fcvt h3, s3 +; CHECK-SD-NOFP16-NEXT: fmul s4, s6, s4 +; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[5] +; CHECK-SD-NOFP16-NEXT: fmul s7, s16, s7 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h17 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: fcvt s17, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h19 +; CHECK-SD-NOFP16-NEXT: mov h19, v2.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: fcvt h4, s4 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h6 +; CHECK-SD-NOFP16-NEXT: fcvt h7, s7 +; CHECK-SD-NOFP16-NEXT: fadd s5, s5, s21 +; CHECK-SD-NOFP16-NEXT: mov h21, v0.h[6] +; CHECK-SD-NOFP16-NEXT: fmul s17, s18, s17 +; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[6] +; CHECK-SD-NOFP16-NEXT: fcvt s19, h19 +; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7] +; CHECK-SD-NOFP16-NEXT: fadd s3, s3, s16 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h22 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fmul s6, s20, s6 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h7 +; CHECK-SD-NOFP16-NEXT: mov h20, v1.h[7] +; CHECK-SD-NOFP16-NEXT: fcvt h1, s5 +; CHECK-SD-NOFP16-NEXT: mov h5, v2.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvt h3, s3 +; CHECK-SD-NOFP16-NEXT: fadd s4, s4, s16 +; CHECK-SD-NOFP16-NEXT: fcvt h16, s17 +; CHECK-SD-NOFP16-NEXT: fcvt s17, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h21 +; CHECK-SD-NOFP16-NEXT: fadd s7, s7, s19 +; CHECK-SD-NOFP16-NEXT: fcvt h6, s6 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v3.h[0] +; CHECK-SD-NOFP16-NEXT: mov h3, v2.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt s16, h16 +; CHECK-SD-NOFP16-NEXT: fcvt h4, s4 +; CHECK-SD-NOFP16-NEXT: fmul s17, s18, s17 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h20 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h6 +; CHECK-SD-NOFP16-NEXT: fcvt h7, s7 +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: fadd s5, s16, s5 +; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v4.h[0] +; CHECK-SD-NOFP16-NEXT: mov h4, v2.h[6] +; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s18 +; CHECK-SD-NOFP16-NEXT: fcvt h16, s17 +; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7] +; CHECK-SD-NOFP16-NEXT: fadd s3, s6, s3 +; CHECK-SD-NOFP16-NEXT: fcvt h5, s5 +; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v7.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h16 +; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 +; CHECK-SD-NOFP16-NEXT: fcvt h3, s3 +; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v5.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fadd s4, s6, s4 +; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v3.h[0] +; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2 +; CHECK-SD-NOFP16-NEXT: fcvt h3, s4 +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v3.h[0] +; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v0.h[0] +; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fmul_v8f16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h +; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fmul_v8f16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s +; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v2.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v0.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fmul_v8f16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fmla v2.8h, v0.8h, v1.8h +; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b +; CHECK-GI-FP16-NEXT: ret +entry: + %d = fmul fast <8 x half> %a, %b + %e = fadd fast <8 x half> %d, %c + ret <8 x half> %e +} + +define <16 x half> @fmul_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c) { +; CHECK-SD-NOFP16-LABEL: fmul_v16f16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: stp d11, d10, [sp, #-32]! // 16-byte Folded Spill +; CHECK-SD-NOFP16-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NOFP16-NEXT: .cfi_def_cfa_offset 32 +; CHECK-SD-NOFP16-NEXT: .cfi_offset b8, -8 +; CHECK-SD-NOFP16-NEXT: .cfi_offset b9, -16 +; CHECK-SD-NOFP16-NEXT: .cfi_offset b10, -24 +; CHECK-SD-NOFP16-NEXT: .cfi_offset b11, -32 +; CHECK-SD-NOFP16-NEXT: mov h6, v3.h[7] +; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[7] +; CHECK-SD-NOFP16-NEXT: mov h20, v3.h[6] +; CHECK-SD-NOFP16-NEXT: mov h22, v1.h[6] +; CHECK-SD-NOFP16-NEXT: mov h23, v3.h[5] +; CHECK-SD-NOFP16-NEXT: mov h24, v3.h[2] +; CHECK-SD-NOFP16-NEXT: mov h26, v1.h[2] +; CHECK-SD-NOFP16-NEXT: mov h17, v3.h[3] +; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[3] +; CHECK-SD-NOFP16-NEXT: mov h27, v3.h[1] +; CHECK-SD-NOFP16-NEXT: mov h28, v1.h[1] +; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt s6, h6 +; CHECK-SD-NOFP16-NEXT: fcvt s25, h16 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h20 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h22 +; CHECK-SD-NOFP16-NEXT: fcvt s20, h23 +; CHECK-SD-NOFP16-NEXT: fcvt s23, h24 +; CHECK-SD-NOFP16-NEXT: fcvt s24, h26 +; CHECK-SD-NOFP16-NEXT: mov h26, v0.h[1] +; CHECK-SD-NOFP16-NEXT: mov h19, v3.h[4] +; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt s29, h3 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fmul s6, s25, s6 +; CHECK-SD-NOFP16-NEXT: mov h25, v2.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt s27, h27 +; CHECK-SD-NOFP16-NEXT: fmul s3, s22, s21 +; CHECK-SD-NOFP16-NEXT: fcvt s28, h28 +; CHECK-SD-NOFP16-NEXT: fcvt s17, h17 +; CHECK-SD-NOFP16-NEXT: fmul s23, s24, s23 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h0 +; CHECK-SD-NOFP16-NEXT: fcvt s19, h19 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h16 +; CHECK-SD-NOFP16-NEXT: fcvt s24, h25 +; CHECK-SD-NOFP16-NEXT: fcvt s25, h26 +; CHECK-SD-NOFP16-NEXT: fmul s26, s1, s29 +; CHECK-SD-NOFP16-NEXT: fmul s27, s28, s27 +; CHECK-SD-NOFP16-NEXT: mov h28, v2.h[7] +; CHECK-SD-NOFP16-NEXT: fcvt s7, h7 +; CHECK-SD-NOFP16-NEXT: fmul s1, s18, s17 +; CHECK-SD-NOFP16-NEXT: fcvt h17, s23 +; CHECK-SD-NOFP16-NEXT: mov h29, v4.h[1] +; CHECK-SD-NOFP16-NEXT: fmul s21, s22, s21 +; CHECK-SD-NOFP16-NEXT: fmul s16, s16, s19 +; CHECK-SD-NOFP16-NEXT: mov h8, v2.h[6] +; CHECK-SD-NOFP16-NEXT: fmul s23, s25, s24 +; CHECK-SD-NOFP16-NEXT: mov h24, v2.h[2] +; CHECK-SD-NOFP16-NEXT: mov h25, v0.h[2] +; CHECK-SD-NOFP16-NEXT: fmul s7, s7, s20 +; CHECK-SD-NOFP16-NEXT: fcvt h18, s26 +; CHECK-SD-NOFP16-NEXT: fcvt h19, s27 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h28 +; CHECK-SD-NOFP16-NEXT: mov h26, v2.h[3] +; CHECK-SD-NOFP16-NEXT: mov h27, v0.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt h21, s21 +; CHECK-SD-NOFP16-NEXT: mov h20, v0.h[7] +; CHECK-SD-NOFP16-NEXT: mov h11, v2.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt h28, s23 +; CHECK-SD-NOFP16-NEXT: fcvt s30, h24 +; CHECK-SD-NOFP16-NEXT: fcvt s31, h25 +; CHECK-SD-NOFP16-NEXT: mov h24, v0.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt s29, h29 +; CHECK-SD-NOFP16-NEXT: mov h9, v0.h[6] +; CHECK-SD-NOFP16-NEXT: fcvt s25, h26 +; CHECK-SD-NOFP16-NEXT: fcvt s26, h27 +; CHECK-SD-NOFP16-NEXT: mov h10, v2.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt s20, h20 +; CHECK-SD-NOFP16-NEXT: mov h23, v0.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt s0, h11 +; CHECK-SD-NOFP16-NEXT: fmul s27, s31, s30 +; CHECK-SD-NOFP16-NEXT: fcvt s28, h28 +; CHECK-SD-NOFP16-NEXT: fcvt s30, h21 +; CHECK-SD-NOFP16-NEXT: fcvt s31, h4 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h8 +; CHECK-SD-NOFP16-NEXT: mov h8, v5.h[1] +; CHECK-SD-NOFP16-NEXT: fmul s25, s26, s25 +; CHECK-SD-NOFP16-NEXT: fcvt s24, h24 +; CHECK-SD-NOFP16-NEXT: fcvt s19, h19 +; CHECK-SD-NOFP16-NEXT: fmul s2, s20, s22 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h9 +; CHECK-SD-NOFP16-NEXT: fcvt s20, h10 +; CHECK-SD-NOFP16-NEXT: fadd s26, s28, s29 +; CHECK-SD-NOFP16-NEXT: fcvt s23, h23 +; CHECK-SD-NOFP16-NEXT: fcvt h27, s27 +; CHECK-SD-NOFP16-NEXT: fadd s28, s30, s31 +; CHECK-SD-NOFP16-NEXT: mov h29, v4.h[2] +; CHECK-SD-NOFP16-NEXT: mov h30, v5.h[2] +; CHECK-SD-NOFP16-NEXT: fmul s24, s24, s0 +; CHECK-SD-NOFP16-NEXT: fcvt s31, h8 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h18 +; CHECK-SD-NOFP16-NEXT: fmul s21, s22, s21 +; CHECK-SD-NOFP16-NEXT: fcvt s8, h5 +; CHECK-SD-NOFP16-NEXT: fcvt h25, s25 +; CHECK-SD-NOFP16-NEXT: fmul s20, s23, s20 +; CHECK-SD-NOFP16-NEXT: fcvt h26, s26 +; CHECK-SD-NOFP16-NEXT: fcvt s27, h27 +; CHECK-SD-NOFP16-NEXT: fcvt h0, s28 +; CHECK-SD-NOFP16-NEXT: mov h28, v4.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s29, h29 +; CHECK-SD-NOFP16-NEXT: fcvt s17, h17 +; CHECK-SD-NOFP16-NEXT: fcvt s30, h30 +; CHECK-SD-NOFP16-NEXT: fadd s19, s19, s31 +; CHECK-SD-NOFP16-NEXT: fadd s18, s18, s8 +; CHECK-SD-NOFP16-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NOFP16-NEXT: fcvt h22, s1 +; CHECK-SD-NOFP16-NEXT: mov h23, v5.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s25, h25 +; CHECK-SD-NOFP16-NEXT: mov v0.h[1], v26.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt s26, h28 +; CHECK-SD-NOFP16-NEXT: fadd s27, s27, s29 +; CHECK-SD-NOFP16-NEXT: fcvt h24, s24 +; CHECK-SD-NOFP16-NEXT: fadd s17, s17, s30 +; CHECK-SD-NOFP16-NEXT: mov h28, v4.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt h19, s19 +; CHECK-SD-NOFP16-NEXT: fcvt h1, s18 +; CHECK-SD-NOFP16-NEXT: fcvt h16, s16 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h22 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h23 +; CHECK-SD-NOFP16-NEXT: fcvt h20, s20 +; CHECK-SD-NOFP16-NEXT: fadd s23, s25, s26 +; CHECK-SD-NOFP16-NEXT: mov h25, v5.h[4] +; CHECK-SD-NOFP16-NEXT: fcvt h26, s27 +; CHECK-SD-NOFP16-NEXT: fcvt s24, h24 +; CHECK-SD-NOFP16-NEXT: fcvt s27, h28 +; CHECK-SD-NOFP16-NEXT: fcvt h7, s7 +; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v19.h[0] +; CHECK-SD-NOFP16-NEXT: mov h19, v4.h[5] +; CHECK-SD-NOFP16-NEXT: fcvt h17, s17 +; CHECK-SD-NOFP16-NEXT: fadd s18, s18, s22 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h16 +; CHECK-SD-NOFP16-NEXT: fcvt h21, s21 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h25 +; CHECK-SD-NOFP16-NEXT: mov h25, v5.h[5] +; CHECK-SD-NOFP16-NEXT: mov v0.h[2], v26.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h23, s23 +; CHECK-SD-NOFP16-NEXT: fadd s24, s24, s27 +; CHECK-SD-NOFP16-NEXT: fcvt s20, h20 +; CHECK-SD-NOFP16-NEXT: fcvt s19, h19 +; CHECK-SD-NOFP16-NEXT: mov h26, v4.h[6] +; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v17.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h17, s18 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h7 +; CHECK-SD-NOFP16-NEXT: fcvt h3, s3 +; CHECK-SD-NOFP16-NEXT: fadd s16, s16, s22 +; CHECK-SD-NOFP16-NEXT: fcvt s18, h25 +; CHECK-SD-NOFP16-NEXT: mov h22, v5.h[6] +; CHECK-SD-NOFP16-NEXT: fcvt h6, s6 +; CHECK-SD-NOFP16-NEXT: fcvt h2, s2 +; CHECK-SD-NOFP16-NEXT: mov v0.h[3], v23.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h23, s24 +; CHECK-SD-NOFP16-NEXT: fadd s19, s20, s19 +; CHECK-SD-NOFP16-NEXT: fcvt s20, h21 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h26 +; CHECK-SD-NOFP16-NEXT: mov h4, v4.h[7] +; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v17.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h16, s16 +; CHECK-SD-NOFP16-NEXT: fadd s7, s7, s18 +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: fcvt s17, h22 +; CHECK-SD-NOFP16-NEXT: mov h5, v5.h[7] +; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 +; CHECK-SD-NOFP16-NEXT: mov v0.h[4], v23.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h18, s19 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h6 +; CHECK-SD-NOFP16-NEXT: fadd s19, s20, s21 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v16.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h7, s7 +; CHECK-SD-NOFP16-NEXT: fadd s3, s3, s17 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: mov v0.h[5], v18.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h16, s19 +; CHECK-SD-NOFP16-NEXT: fadd s2, s2, s4 +; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v7.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h3, s3 +; CHECK-SD-NOFP16-NEXT: fadd s4, s6, s5 +; CHECK-SD-NOFP16-NEXT: mov v0.h[6], v16.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h2, s2 +; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v3.h[0] +; CHECK-SD-NOFP16-NEXT: fcvt h3, s4 +; CHECK-SD-NOFP16-NEXT: mov v0.h[7], v2.h[0] +; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v3.h[0] +; CHECK-SD-NOFP16-NEXT: ldp d11, d10, [sp], #32 // 16-byte Folded Reload +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fmul_v16f16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h +; CHECK-SD-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h +; CHECK-SD-FP16-NEXT: mov v0.16b, v4.16b +; CHECK-SD-FP16-NEXT: mov v1.16b, v5.16b +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fmul_v16f16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v2.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v16.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v17.4s, v3.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h +; CHECK-GI-NOFP16-NEXT: fmul v6.4s, v6.4s, v7.4s +; CHECK-GI-NOFP16-NEXT: fmul v7.4s, v16.4s, v17.4s +; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v1.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v6.4s +; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v4.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v4.4s, v4.8h +; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v7.4s +; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v5.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v5.4s, v5.8h +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v2.4s, v6.4s +; CHECK-GI-NOFP16-NEXT: fadd v3.4s, v3.4s, v7.4s +; CHECK-GI-NOFP16-NEXT: fadd v4.4s, v0.4s, v4.4s +; CHECK-GI-NOFP16-NEXT: fadd v5.4s, v1.4s, v5.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fmul_v16f16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fmla v4.8h, v0.8h, v2.8h +; CHECK-GI-FP16-NEXT: fmla v5.8h, v1.8h, v3.8h +; CHECK-GI-FP16-NEXT: mov v0.16b, v4.16b +; CHECK-GI-FP16-NEXT: mov v1.16b, v5.16b +; CHECK-GI-FP16-NEXT: ret +entry: + %d = fmul fast <16 x half> %a, %b + %e = fadd fast <16 x half> %d, %c + ret <16 x half> %e +} + +declare <16 x half> @llvm.fma.v16f16(<16 x half>, <16 x half>, <16 x half>) +declare <16 x half> @llvm.fmuladd.v16f16(<16 x half>, <16 x half>, <16 x half>) +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) +declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) +declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) +declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) +declare <3 x double> @llvm.fma.v3f64(<3 x double>, <3 x double>, <3 x double>) +declare <3 x double> @llvm.fmuladd.v3f64(<3 x double>, <3 x double>, <3 x double>) +declare <3 x float> @llvm.fma.v3f32(<3 x float>, <3 x float>, <3 x float>) +declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) +declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) +declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>) +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) +declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) +declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) +declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) +declare <7 x half> @llvm.fma.v7f16(<7 x half>, <7 x half>, <7 x half>) +declare <7 x half> @llvm.fmuladd.v7f16(<7 x half>, <7 x half>, <7 x half>) +declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) +declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>) +declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) +declare <8 x half> @llvm.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>) +declare double @llvm.fma.f64(double, double, double) +declare double @llvm.fmuladd.f64(double, double, double) +declare float @llvm.fma.f32(float, float, float) +declare float @llvm.fmuladd.f32(float, float, float) +declare half @llvm.fma.f16(half, half, half) +declare half @llvm.fmuladd.f16(half, half, half) From e7a6171c4c0486aae051543e202d74c04178cfac Mon Sep 17 00:00:00 2001 From: Carlos Galvez Date: Sat, 14 Oct 2023 14:19:20 +0200 Subject: [PATCH 133/720] =?UTF-8?q?[clang]=20Enable=20Wenum-constexpr-conv?= =?UTF-8?q?ersion=20also=20in=20system=20headers=20and=20=E2=80=A6=20(#675?= =?UTF-8?q?28)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …macros As per review comments on https://reviews.llvm.org/D150226, we should allow for one more release before turning this warning into a hard error, by making it visible in system headers and macros, so that people are aware of it and can work on it. --- clang/docs/ReleaseNotes.rst | 4 ++++ .../include/clang/Basic/DiagnosticASTKinds.td | 3 ++- .../enum-constexpr-conversion-system-header.h | 19 +++++++++++++++++++ .../SemaCXX/constant-expression-cxx11.cpp | 11 ++++++++--- 4 files changed, 33 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/Inputs/enum-constexpr-conversion-system-header.h diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 1eebf5ea6b3e3..ade3c33b3b944 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -96,6 +96,10 @@ C++ Specific Potentially Breaking Changes Clang as a compiler, but it may break assumptions in Clang-based tools iterating over the AST. +- The warning `-Wenum-constexpr-conversion` is now also enabled by default on + system headers and macros. It will be turned into a hard (non-downgradable) + error in the next Clang release. + ABI Changes in This Version --------------------------- - Following the SystemV ABI for x86-64, ``__int128`` arguments will no longer diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td index d2656310e79c9..0019553233fde 100644 --- a/clang/include/clang/Basic/DiagnosticASTKinds.td +++ b/clang/include/clang/Basic/DiagnosticASTKinds.td @@ -405,7 +405,8 @@ def warn_fixedpoint_constant_overflow : Warning< InGroup>; def warn_constexpr_unscoped_enum_out_of_range : Warning< "integer value %0 is outside the valid range of values [%1, %2] for the " - "enumeration type %3">, DefaultError, InGroup>; + "enumeration type %3">, DefaultError, ShowInSystemHeader, ShowInSystemMacro, + InGroup>; // This is a temporary diagnostic, and shall be removed once our // implementation is complete, and like the preceding constexpr notes belongs diff --git a/clang/test/SemaCXX/Inputs/enum-constexpr-conversion-system-header.h b/clang/test/SemaCXX/Inputs/enum-constexpr-conversion-system-header.h new file mode 100644 index 0000000000000..0850f3405eed3 --- /dev/null +++ b/clang/test/SemaCXX/Inputs/enum-constexpr-conversion-system-header.h @@ -0,0 +1,19 @@ +// System header for testing that -Wenum-constexpr-conversion leads to an error +// when included in user code, or when the system macro is used. + +enum SystemEnum +{ + a = 0, + b = 1, +}; + +void testValueInRangeOfEnumerationValuesInSystemHeader() +{ + constexpr SystemEnum x1 = static_cast(123); + // expected-error@-1 {{integer value 123 is outside the valid range of values [0, 1] for the enumeration type 'SystemEnum'}} + + const SystemEnum x2 = static_cast(123); // ok, not a constant expression context +} + +#define CONSTEXPR_CAST_TO_SYSTEM_ENUM_OUTSIDE_OF_RANGE \ + constexpr SystemEnum system_enum = static_cast(123) diff --git a/clang/test/SemaCXX/constant-expression-cxx11.cpp b/clang/test/SemaCXX/constant-expression-cxx11.cpp index 89d1b3ea6de05..8fb994224853b 100644 --- a/clang/test/SemaCXX/constant-expression-cxx11.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx11.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -std=c++23 -fsyntax-only -verify=expected,cxx20_23,cxx23 -triple x86_64-linux -Wno-string-plus-int -Wno-pointer-arith -Wno-zero-length-array -Wno-c99-designator -fcxx-exceptions -pedantic %s -Wno-comment -Wno-tautological-pointer-compare -Wno-bool-conversion -// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify=expected,cxx11_20,cxx20_23 -triple x86_64-linux -Wno-string-plus-int -Wno-pointer-arith -Wno-zero-length-array -Wno-c99-designator -fcxx-exceptions -pedantic %s -Wno-comment -Wno-tautological-pointer-compare -Wno-bool-conversion -// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify=expected,cxx11_20,cxx11 -triple x86_64-linux -Wno-string-plus-int -Wno-pointer-arith -Wno-zero-length-array -Wno-c99-designator -fcxx-exceptions -pedantic %s -Wno-comment -Wno-tautological-pointer-compare -Wno-bool-conversion +// RUN: %clang_cc1 -std=c++23 -isystem %S/Inputs -fsyntax-only -verify=expected,cxx20_23,cxx23 -triple x86_64-linux -Wno-string-plus-int -Wno-pointer-arith -Wno-zero-length-array -Wno-c99-designator -fcxx-exceptions -pedantic %s -Wno-comment -Wno-tautological-pointer-compare -Wno-bool-conversion +// RUN: %clang_cc1 -std=c++20 -isystem %S/Inputs -fsyntax-only -verify=expected,cxx11_20,cxx20_23 -triple x86_64-linux -Wno-string-plus-int -Wno-pointer-arith -Wno-zero-length-array -Wno-c99-designator -fcxx-exceptions -pedantic %s -Wno-comment -Wno-tautological-pointer-compare -Wno-bool-conversion +// RUN: %clang_cc1 -std=c++11 -isystem %S/Inputs -fsyntax-only -verify=expected,cxx11_20,cxx11 -triple x86_64-linux -Wno-string-plus-int -Wno-pointer-arith -Wno-zero-length-array -Wno-c99-designator -fcxx-exceptions -pedantic %s -Wno-comment -Wno-tautological-pointer-compare -Wno-bool-conversion namespace StaticAssertFoldTest { @@ -2449,6 +2449,8 @@ E2 testDefaultArgForParam(E2 e2Param = (E2)-1) { // ok, not a constant expressio return e2LocalInit; } +#include + void testValueInRangeOfEnumerationValues() { constexpr E1 x1 = static_cast(-8); constexpr E1 x2 = static_cast(8); @@ -2486,6 +2488,9 @@ void testValueInRangeOfEnumerationValues() { // expected-error@-1 {{integer value 2147483648 is outside the valid range of values [-2147483648, 2147483647] for the enumeration type 'EMaxInt'}} const NumberType neg_one = (NumberType) ((NumberType) 0 - (NumberType) 1); // ok, not a constant expression context + + CONSTEXPR_CAST_TO_SYSTEM_ENUM_OUTSIDE_OF_RANGE; + // expected-error@-1 {{integer value 123 is outside the valid range of values [0, 1] for the enumeration type 'SystemEnum'}} } template struct Bitfield { From 5e1c2bf3e6fca35ee0445b2a81d47e8576024186 Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 14 Oct 2023 13:24:28 +0100 Subject: [PATCH 134/720] [AArch64][GlobalISel] Expand converage of FMA. This moves the legalization of G_FMA to the action builder that can handle more types. The existing arm64-vfloatintrinsics.ll has been removed as they are covered in other test files. --- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 10 +- .../AArch64/GlobalISel/legalize-fma.mir | 218 ++++---- .../GlobalISel/legalizer-info-validation.mir | 2 +- .../CodeGen/AArch64/arm64-vfloatintrinsics.ll | 514 ------------------ llvm/test/CodeGen/AArch64/fmla.ll | 514 ++++++------------ 5 files changed, 250 insertions(+), 1008 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 378a8d0da4925..d2f855f407530 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -229,10 +229,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .clampScalar(1, s32, s64) .widenScalarToNextPow2(0); - getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG, G_FABS, - G_FSQRT, G_FMAXNUM, G_FMINNUM, G_FMAXIMUM, - G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, - G_FNEARBYINT, G_INTRINSIC_TRUNC, + getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG, + G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM, + G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, + G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN}) .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64}) .legalIf([=](const LegalityQuery &Query) { @@ -251,7 +251,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .minScalar(0, s32) .scalarize(0); - getActionDefinitionsBuilder({G_FMA, G_INTRINSIC_LRINT}) + getActionDefinitionsBuilder(G_INTRINSIC_LRINT) // If we don't have full FP16 support, then scalarize the elements of // vectors containing fp16 types. .fewerElementsIf( diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fma.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fma.mir index 3388ab97dc335..d344511010b21 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fma.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fma.mir @@ -13,43 +13,27 @@ body: | ; NO-FP16-LABEL: name: test_v4f16.fma ; NO-FP16: liveins: $d0, $d1, $d2 - ; NO-FP16: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 - ; NO-FP16: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1 - ; NO-FP16: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $d2 - ; NO-FP16: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; NO-FP16: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; NO-FP16: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; NO-FP16: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) - ; NO-FP16: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) - ; NO-FP16: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV8]](s16) - ; NO-FP16: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] - ; NO-FP16: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) - ; NO-FP16: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) - ; NO-FP16: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) - ; NO-FP16: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV9]](s16) - ; NO-FP16: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] - ; NO-FP16: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) - ; NO-FP16: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; NO-FP16: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16) - ; NO-FP16: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[UV10]](s16) - ; NO-FP16: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] - ; NO-FP16: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) - ; NO-FP16: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) - ; NO-FP16: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16) - ; NO-FP16: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[UV11]](s16) - ; NO-FP16: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FPEXT9]], [[FPEXT10]], [[FPEXT11]] - ; NO-FP16: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA3]](s32) - ; NO-FP16: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; NO-FP16: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>) - ; NO-FP16: RET_ReallyLR implicit $d0 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; NO-FP16-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1 + ; NO-FP16-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $d2 + ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>) + ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY1]](<4 x s16>) + ; NO-FP16-NEXT: [[FPEXT2:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY2]](<4 x s16>) + ; NO-FP16-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] + ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FMA]](<4 x s32>) + ; NO-FP16-NEXT: $d0 = COPY [[FPTRUNC]](<4 x s16>) + ; NO-FP16-NEXT: RET_ReallyLR implicit $d0 + ; ; FP16-LABEL: name: test_v4f16.fma ; FP16: liveins: $d0, $d1, $d2 - ; FP16: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 - ; FP16: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1 - ; FP16: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $d2 - ; FP16: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; FP16: $d0 = COPY [[FMA]](<4 x s16>) - ; FP16: RET_ReallyLR implicit $d0 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; FP16-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1 + ; FP16-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $d2 + ; FP16-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; FP16-NEXT: $d0 = COPY [[FMA]](<4 x s16>) + ; FP16-NEXT: RET_ReallyLR implicit $d0 %0:_(<4 x s16>) = COPY $d0 %1:_(<4 x s16>) = COPY $d1 %2:_(<4 x s16>) = COPY $d2 @@ -69,63 +53,36 @@ body: | ; NO-FP16-LABEL: name: test_v8f16.fma ; NO-FP16: liveins: $q0, $q1, $q2 - ; NO-FP16: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 - ; NO-FP16: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1 - ; NO-FP16: [[COPY2:%[0-9]+]]:_(<8 x s16>) = COPY $q2 - ; NO-FP16: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; NO-FP16: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<8 x s16>) - ; NO-FP16: [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](<8 x s16>) - ; NO-FP16: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) - ; NO-FP16: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV8]](s16) - ; NO-FP16: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV16]](s16) - ; NO-FP16: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] - ; NO-FP16: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) - ; NO-FP16: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) - ; NO-FP16: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV9]](s16) - ; NO-FP16: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV17]](s16) - ; NO-FP16: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] - ; NO-FP16: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) - ; NO-FP16: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; NO-FP16: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV10]](s16) - ; NO-FP16: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[UV18]](s16) - ; NO-FP16: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] - ; NO-FP16: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) - ; NO-FP16: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) - ; NO-FP16: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[UV11]](s16) - ; NO-FP16: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[UV19]](s16) - ; NO-FP16: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FPEXT9]], [[FPEXT10]], [[FPEXT11]] - ; NO-FP16: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA3]](s32) - ; NO-FP16: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) - ; NO-FP16: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[UV12]](s16) - ; NO-FP16: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[UV20]](s16) - ; NO-FP16: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FPEXT12]], [[FPEXT13]], [[FPEXT14]] - ; NO-FP16: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA4]](s32) - ; NO-FP16: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) - ; NO-FP16: [[FPEXT16:%[0-9]+]]:_(s32) = G_FPEXT [[UV13]](s16) - ; NO-FP16: [[FPEXT17:%[0-9]+]]:_(s32) = G_FPEXT [[UV21]](s16) - ; NO-FP16: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FPEXT15]], [[FPEXT16]], [[FPEXT17]] - ; NO-FP16: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA5]](s32) - ; NO-FP16: [[FPEXT18:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16) - ; NO-FP16: [[FPEXT19:%[0-9]+]]:_(s32) = G_FPEXT [[UV14]](s16) - ; NO-FP16: [[FPEXT20:%[0-9]+]]:_(s32) = G_FPEXT [[UV22]](s16) - ; NO-FP16: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FPEXT18]], [[FPEXT19]], [[FPEXT20]] - ; NO-FP16: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA6]](s32) - ; NO-FP16: [[FPEXT21:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16) - ; NO-FP16: [[FPEXT22:%[0-9]+]]:_(s32) = G_FPEXT [[UV15]](s16) - ; NO-FP16: [[FPEXT23:%[0-9]+]]:_(s32) = G_FPEXT [[UV23]](s16) - ; NO-FP16: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FPEXT21]], [[FPEXT22]], [[FPEXT23]] - ; NO-FP16: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA7]](s32) - ; NO-FP16: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16), [[FPTRUNC4]](s16), [[FPTRUNC5]](s16), [[FPTRUNC6]](s16), [[FPTRUNC7]](s16) - ; NO-FP16: $q0 = COPY [[BUILD_VECTOR]](<8 x s16>) - ; NO-FP16: RET_ReallyLR implicit $q0 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; NO-FP16-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1 + ; NO-FP16-NEXT: [[COPY2:%[0-9]+]]:_(<8 x s16>) = COPY $q2 + ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>) + ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>) + ; NO-FP16-NEXT: [[UV2:%[0-9]+]]:_(<4 x s16>), [[UV3:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY1]](<8 x s16>) + ; NO-FP16-NEXT: [[FPEXT2:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV2]](<4 x s16>) + ; NO-FP16-NEXT: [[FPEXT3:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV3]](<4 x s16>) + ; NO-FP16-NEXT: [[UV4:%[0-9]+]]:_(<4 x s16>), [[UV5:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY2]](<8 x s16>) + ; NO-FP16-NEXT: [[FPEXT4:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV4]](<4 x s16>) + ; NO-FP16-NEXT: [[FPEXT5:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV5]](<4 x s16>) + ; NO-FP16-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[FPEXT]], [[FPEXT2]], [[FPEXT4]] + ; NO-FP16-NEXT: [[FMA1:%[0-9]+]]:_(<4 x s32>) = G_FMA [[FPEXT1]], [[FPEXT3]], [[FPEXT5]] + ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FMA]](<4 x s32>) + ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FMA1]](<4 x s32>) + ; NO-FP16-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[FPTRUNC]](<4 x s16>), [[FPTRUNC1]](<4 x s16>) + ; NO-FP16-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>) + ; NO-FP16-NEXT: RET_ReallyLR implicit $q0 + ; ; FP16-LABEL: name: test_v8f16.fma ; FP16: liveins: $q0, $q1, $q2 - ; FP16: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 - ; FP16: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1 - ; FP16: [[COPY2:%[0-9]+]]:_(<8 x s16>) = COPY $q2 - ; FP16: [[FMA:%[0-9]+]]:_(<8 x s16>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; FP16: $q0 = COPY [[FMA]](<8 x s16>) - ; FP16: RET_ReallyLR implicit $q0 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; FP16-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1 + ; FP16-NEXT: [[COPY2:%[0-9]+]]:_(<8 x s16>) = COPY $q2 + ; FP16-NEXT: [[FMA:%[0-9]+]]:_(<8 x s16>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; FP16-NEXT: $q0 = COPY [[FMA]](<8 x s16>) + ; FP16-NEXT: RET_ReallyLR implicit $q0 %0:_(<8 x s16>) = COPY $q0 %1:_(<8 x s16>) = COPY $q1 %2:_(<8 x s16>) = COPY $q2 @@ -145,20 +102,23 @@ body: | ; NO-FP16-LABEL: name: test_v2f32.fma ; NO-FP16: liveins: $d0, $d1, $d2 - ; NO-FP16: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 - ; NO-FP16: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 - ; NO-FP16: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $d2 - ; NO-FP16: [[FMA:%[0-9]+]]:_(<2 x s32>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; NO-FP16: $d0 = COPY [[FMA]](<2 x s32>) - ; NO-FP16: RET_ReallyLR implicit $d0 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; NO-FP16-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; NO-FP16-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $d2 + ; NO-FP16-NEXT: [[FMA:%[0-9]+]]:_(<2 x s32>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; NO-FP16-NEXT: $d0 = COPY [[FMA]](<2 x s32>) + ; NO-FP16-NEXT: RET_ReallyLR implicit $d0 + ; ; FP16-LABEL: name: test_v2f32.fma ; FP16: liveins: $d0, $d1, $d2 - ; FP16: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 - ; FP16: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 - ; FP16: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $d2 - ; FP16: [[FMA:%[0-9]+]]:_(<2 x s32>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; FP16: $d0 = COPY [[FMA]](<2 x s32>) - ; FP16: RET_ReallyLR implicit $d0 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; FP16-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; FP16-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $d2 + ; FP16-NEXT: [[FMA:%[0-9]+]]:_(<2 x s32>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; FP16-NEXT: $d0 = COPY [[FMA]](<2 x s32>) + ; FP16-NEXT: RET_ReallyLR implicit $d0 %0:_(<2 x s32>) = COPY $d0 %1:_(<2 x s32>) = COPY $d1 %2:_(<2 x s32>) = COPY $d2 @@ -178,20 +138,23 @@ body: | ; NO-FP16-LABEL: name: test_v4f32.fma ; NO-FP16: liveins: $q0, $q1, $q2 - ; NO-FP16: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; NO-FP16: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; NO-FP16: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2 - ; NO-FP16: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; NO-FP16: $q0 = COPY [[FMA]](<4 x s32>) - ; NO-FP16: RET_ReallyLR implicit $q0 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; NO-FP16-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; NO-FP16-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2 + ; NO-FP16-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; NO-FP16-NEXT: $q0 = COPY [[FMA]](<4 x s32>) + ; NO-FP16-NEXT: RET_ReallyLR implicit $q0 + ; ; FP16-LABEL: name: test_v4f32.fma ; FP16: liveins: $q0, $q1, $q2 - ; FP16: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; FP16: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; FP16: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2 - ; FP16: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; FP16: $q0 = COPY [[FMA]](<4 x s32>) - ; FP16: RET_ReallyLR implicit $q0 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; FP16-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; FP16-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2 + ; FP16-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; FP16-NEXT: $q0 = COPY [[FMA]](<4 x s32>) + ; FP16-NEXT: RET_ReallyLR implicit $q0 %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = COPY $q1 %2:_(<4 x s32>) = COPY $q2 @@ -211,20 +174,23 @@ body: | ; NO-FP16-LABEL: name: test_v2f64.fma ; NO-FP16: liveins: $q0, $q1, $q2 - ; NO-FP16: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 - ; NO-FP16: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 - ; NO-FP16: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 - ; NO-FP16: [[FMA:%[0-9]+]]:_(<2 x s64>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; NO-FP16: $q0 = COPY [[FMA]](<2 x s64>) - ; NO-FP16: RET_ReallyLR implicit $q0 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; NO-FP16-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; NO-FP16-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 + ; NO-FP16-NEXT: [[FMA:%[0-9]+]]:_(<2 x s64>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; NO-FP16-NEXT: $q0 = COPY [[FMA]](<2 x s64>) + ; NO-FP16-NEXT: RET_ReallyLR implicit $q0 + ; ; FP16-LABEL: name: test_v2f64.fma ; FP16: liveins: $q0, $q1, $q2 - ; FP16: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 - ; FP16: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 - ; FP16: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 - ; FP16: [[FMA:%[0-9]+]]:_(<2 x s64>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; FP16: $q0 = COPY [[FMA]](<2 x s64>) - ; FP16: RET_ReallyLR implicit $q0 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; FP16-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; FP16-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 + ; FP16-NEXT: [[FMA:%[0-9]+]]:_(<2 x s64>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; FP16-NEXT: $q0 = COPY [[FMA]](<2 x s64>) + ; FP16-NEXT: RET_ReallyLR implicit $q0 %0:_(<2 x s64>) = COPY $q0 %1:_(<2 x s64>) = COPY $q1 %2:_(<2 x s64>) = COPY $q2 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index bb915153c53a1..70114f83e8dd6 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -154,7 +154,6 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_INTRINSIC_LRINT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_INTRINSIC_ROUNDEVEN (opcode {{[0-9]+}}): 1 type index, 0 imm indices @@ -442,6 +441,7 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_FMA (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_FMAD (opcode {{[0-9]+}}): 1 type index, 0 imm indices diff --git a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll deleted file mode 100644 index 0278128b25b62..0000000000000 --- a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll +++ /dev/null @@ -1,514 +0,0 @@ -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=-fullfp16 \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP16 -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=+fullfp16 \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP16 - -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=-fullfp16 \ -; RUN: -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* \ -; RUN: 2>&1 | FileCheck %s --check-prefixes=GISEL,GISEL-NOFP16,FALLBACK -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=+fullfp16 \ -; RUN: -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* \ -; RUN: 2>&1 | FileCheck %s --check-prefixes=GISEL,GISEL-FP16,FALLBACK - -;;; Half vectors - -%v4f16 = type <4 x half> - -define %v4f16 @test_v4f16.powi(%v4f16 %a, i32 %b) { - ; This operation is expanded, whether with or without +fullfp16. - ; CHECK-LABEL: test_v4f16.powi: - ; CHECK-COUNT-4: bl __powi - %1 = call %v4f16 @llvm.powi.v4f16.i32(%v4f16 %a, i32 %b) - ret %v4f16 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v4f16.sin -define %v4f16 @test_v4f16.sin(%v4f16 %a) { - ; This operation is expanded, whether with or without +fullfp16. - ; CHECK-LABEL: test_v4f16.sin: - ; CHECK-COUNT-4: bl sinf - ; GISEL-LABEL: test_v4f16.sin: - ; GISEL-COUNT-4: bl sinf - %1 = call %v4f16 @llvm.sin.v4f16(%v4f16 %a) - ret %v4f16 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v4f16.cos -define %v4f16 @test_v4f16.cos(%v4f16 %a) { - ; This operation is expanded, whether with or without +fullfp16. - ; CHECK-LABEL: test_v4f16.cos: - ; CHECK-COUNT-4: bl cosf - ; GISEL-LABEL: test_v4f16.cos: - ; GISEL-COUNT-4: bl cosf - %1 = call %v4f16 @llvm.cos.v4f16(%v4f16 %a) - ret %v4f16 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v4f16.exp -define %v4f16 @test_v4f16.exp(%v4f16 %a) { - ; This operation is expanded, whether with or without +fullfp16. - ; CHECK-LABEL: test_v4f16.exp: - ; CHECK-COUNT-4: bl exp - ; GISEL-LABEL: test_v4f16.exp: - ; GISEL-COUNT-4: bl exp - %1 = call %v4f16 @llvm.exp.v4f16(%v4f16 %a) - ret %v4f16 %1 -} -define %v4f16 @test_v4f16.exp2(%v4f16 %a) { - ; This operation is expanded, whether with or without +fullfp16. - ; CHECK-LABEL: test_v4f16.exp2: - ; CHECK-COUNT-4: bl exp2 - %1 = call %v4f16 @llvm.exp2.v4f16(%v4f16 %a) - ret %v4f16 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v4f16.log -define %v4f16 @test_v4f16.log(%v4f16 %a) { - ; This operation is expanded, whether with or without +fullfp16. - ; CHECK-LABEL: test_v4f16.log: - ; CHECK-COUNT-4: bl log - ; GISEL-LABEL: test_v4f16.log: - ; GISEL-COUNT-4: bl log - %1 = call %v4f16 @llvm.log.v4f16(%v4f16 %a) - ret %v4f16 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v4f16.log10 -define %v4f16 @test_v4f16.log10(%v4f16 %a) { - ; This operation is expanded, whether with or without +fullfp16. - ; CHECK-LABEL: test_v4f16.log10: - ; CHECK-COUNT-4: bl log10 - ; GISEL-LABEL: test_v4f16.log10: - ; GISEL-COUNT-4: bl log10 - %1 = call %v4f16 @llvm.log10.v4f16(%v4f16 %a) - ret %v4f16 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v4f16.log2 -define %v4f16 @test_v4f16.log2(%v4f16 %a) { - ; This operation is expanded, whether with or without +fullfp16. - ; CHECK-LABEL: test_v4f16.log2: - ; CHECK-COUNT-4: bl log2 - ; GISEL-LABEL: test_v4f16.log2: - ; GISEL-COUNT-4: bl log2 - %1 = call %v4f16 @llvm.log2.v4f16(%v4f16 %a) - ret %v4f16 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v4f16.fma -define %v4f16 @test_v4f16.fma(%v4f16 %a, %v4f16 %b, %v4f16 %c) { - ; CHECK-LABEL: test_v4f16.fma: - ; CHECK-NOFP16-COUNT-4: fmadd s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} - ; CHECK-FP16-NOT: fcvt - ; CHECK-FP16: fmla.4h - ; GISEL-LABEL: test_v4f16.fma: - ; GISEL-NOFP16-COUNT-4: fmadd s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} - ; GISEL-FP16-NOT: fcvt - ; GISEL-FP16: fmla.4h - %1 = call %v4f16 @llvm.fma.v4f16(%v4f16 %a, %v4f16 %b, %v4f16 %c) - ret %v4f16 %1 -} - -declare %v4f16 @llvm.powi.v4f16.i32(%v4f16, i32) #0 -declare %v4f16 @llvm.sin.v4f16(%v4f16) #0 -declare %v4f16 @llvm.cos.v4f16(%v4f16) #0 -declare %v4f16 @llvm.exp.v4f16(%v4f16) #0 -declare %v4f16 @llvm.exp2.v4f16(%v4f16) #0 -declare %v4f16 @llvm.log.v4f16(%v4f16) #0 -declare %v4f16 @llvm.log10.v4f16(%v4f16) #0 -declare %v4f16 @llvm.log2.v4f16(%v4f16) #0 -declare %v4f16 @llvm.fma.v4f16(%v4f16, %v4f16, %v4f16) #0 - -;;; - -%v8f16 = type <8 x half> - -define %v8f16 @test_v8f16.powi(%v8f16 %a, i32 %b) { - ; This operation is expanded, whether with or without +fullfp16. - ; CHECK-LABEL: test_v8f16.powi: - ; CHECK-COUNT-8: bl __powi - ; GISEL-LABEL: test_v8f16.powi: - ; GISEL-COUNT-8: bl __powi - %1 = call %v8f16 @llvm.powi.v8f16.i32(%v8f16 %a, i32 %b) - ret %v8f16 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v8f16.sin -define %v8f16 @test_v8f16.sin(%v8f16 %a) { - ; This operation is expanded, whether with or without +fullfp16. - ; CHECK-LABEL: test_v8f16.sin: - ; CHECK-COUNT-8: bl sinf - ; GISEL-LABEL: test_v8f16.sin: - ; GISEL-COUNT-8: bl sinf - %1 = call %v8f16 @llvm.sin.v8f16(%v8f16 %a) - ret %v8f16 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v8f16.cos -define %v8f16 @test_v8f16.cos(%v8f16 %a) { - ; This operation is expanded, whether with or without +fullfp16. - ; CHECK-LABEL: test_v8f16.cos: - ; CHECK-COUNT-8: bl cosf - ; GISEL-LABEL: test_v8f16.cos: - ; GISEL-COUNT-8: bl cosf - %1 = call %v8f16 @llvm.cos.v8f16(%v8f16 %a) - ret %v8f16 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v8f16.exp -define %v8f16 @test_v8f16.exp(%v8f16 %a) { - ; This operation is expanded, whether with or without +fullfp16. - ; CHECK-LABEL: test_v8f16.exp: - ; CHECK-COUNT-8: bl exp - ; GISEL-LABEL: test_v8f16.exp: - ; GISEL-COUNT-8: bl exp - %1 = call %v8f16 @llvm.exp.v8f16(%v8f16 %a) - ret %v8f16 %1 -} -define %v8f16 @test_v8f16.exp2(%v8f16 %a) { - ; This operation is expanded, whether with or without +fullfp16. - ; CHECK-LABEL: test_v8f16.exp2: - ; CHECK-COUNT-8: bl exp2 - %1 = call %v8f16 @llvm.exp2.v8f16(%v8f16 %a) - ret %v8f16 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v8f16.log -define %v8f16 @test_v8f16.log(%v8f16 %a) { - ; This operation is expanded, whether with or without +fullfp16. - ; CHECK-LABEL: test_v8f16.log: - ; CHECK-COUNT-8: bl log - ; GISEL-LABEL: test_v8f16.log: - ; GISEL-COUNT-8: bl log - %1 = call %v8f16 @llvm.log.v8f16(%v8f16 %a) - ret %v8f16 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v8f16.log10 -define %v8f16 @test_v8f16.log10(%v8f16 %a) { - ; This operation is expanded, whether with or without +fullfp16. - ; CHECK-LABEL: test_v8f16.log10: - ; CHECK-COUNT-8: bl log10 - ; GISEL-LABEL: test_v8f16.log10: - ; GISEL-COUNT-8: bl log10 - %1 = call %v8f16 @llvm.log10.v8f16(%v8f16 %a) - ret %v8f16 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v8f16.log2 -define %v8f16 @test_v8f16.log2(%v8f16 %a) { - ; This operation is expanded, whether with or without +fullfp16. - ; CHECK-LABEL: test_v8f16.log2: - ; CHECK-COUNT-8: bl log2 - ; GISEL-LABEL: test_v8f16.log2: - ; GISEL-COUNT-8: bl log2 - %1 = call %v8f16 @llvm.log2.v8f16(%v8f16 %a) - ret %v8f16 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v8f16.fma -define %v8f16 @test_v8f16.fma(%v8f16 %a, %v8f16 %b, %v8f16 %c) { - ; CHECK-LABEL: test_v8f16.fma: - ; CHECK-NOFP16-COUNT-8: fmadd s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} - ; CHECK-FP16-NOT: fcvt - ; CHECK-FP16: fmla.8h - ; GISEL-LABEL: test_v8f16.fma: - ; GISEL-NOFP16-COUNT-8: fmadd s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} - ; GISEL-FP16-NOT: fcvt - ; GISEL-FP16: fmla.8h - %1 = call %v8f16 @llvm.fma.v8f16(%v8f16 %a, %v8f16 %b, %v8f16 %c) - ret %v8f16 %1 -} - -declare %v8f16 @llvm.powi.v8f16.i32(%v8f16, i32) #0 -declare %v8f16 @llvm.sin.v8f16(%v8f16) #0 -declare %v8f16 @llvm.cos.v8f16(%v8f16) #0 -declare %v8f16 @llvm.exp.v8f16(%v8f16) #0 -declare %v8f16 @llvm.exp2.v8f16(%v8f16) #0 -declare %v8f16 @llvm.log.v8f16(%v8f16) #0 -declare %v8f16 @llvm.log10.v8f16(%v8f16) #0 -declare %v8f16 @llvm.log2.v8f16(%v8f16) #0 -declare %v8f16 @llvm.fma.v8f16(%v8f16, %v8f16, %v8f16) #0 - -;;; Float vectors - -%v2f32 = type <2 x float> - -; CHECK: test_v2f32.powi: -define %v2f32 @test_v2f32.powi(%v2f32 %a, i32 %b) { - ; CHECK: pow - %1 = call %v2f32 @llvm.powi.v2f32.i32(%v2f32 %a, i32 %b) - ret %v2f32 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v2f32.sin -; CHECK: test_v2f32.sin: -define %v2f32 @test_v2f32.sin(%v2f32 %a) { - ; CHECK: sin - ; GISEL: sin - %1 = call %v2f32 @llvm.sin.v2f32(%v2f32 %a) - ret %v2f32 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v2f32.cos -; CHECK: test_v2f32.cos: -define %v2f32 @test_v2f32.cos(%v2f32 %a) { - ; CHECK: cos - ; GISEL: cos - %1 = call %v2f32 @llvm.cos.v2f32(%v2f32 %a) - ret %v2f32 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v2f32.exp -; CHECK: test_v2f32.exp: -; GISEL: test_v2f32.exp: -define %v2f32 @test_v2f32.exp(%v2f32 %a) { - ; CHECK: exp - ; GISEL: exp - %1 = call %v2f32 @llvm.exp.v2f32(%v2f32 %a) - ret %v2f32 %1 -} -; CHECK: test_v2f32.exp2: -define %v2f32 @test_v2f32.exp2(%v2f32 %a) { - ; CHECK: exp - %1 = call %v2f32 @llvm.exp2.v2f32(%v2f32 %a) - ret %v2f32 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v2f32.log -; CHECK: test_v2f32.log: -define %v2f32 @test_v2f32.log(%v2f32 %a) { - ; CHECK: log - ; GISEL: log - %1 = call %v2f32 @llvm.log.v2f32(%v2f32 %a) - ret %v2f32 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v2f32.log10 -; CHECK: test_v2f32.log10: -; GISEL: test_v2f32.log10: -define %v2f32 @test_v2f32.log10(%v2f32 %a) { - ; CHECK: log - ; GISEL: log - %1 = call %v2f32 @llvm.log10.v2f32(%v2f32 %a) - ret %v2f32 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v2f32.log2 -; CHECK: test_v2f32.log2: -; GISEL: test_v2f32.log2: -define %v2f32 @test_v2f32.log2(%v2f32 %a) { - ; CHECK: log - ; GISEL: log - %1 = call %v2f32 @llvm.log2.v2f32(%v2f32 %a) - ret %v2f32 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v2f32.fma -; CHECK-LABEL: test_v2f32.fma: -; GISEL-LABEL: test_v2f32.fma: -define %v2f32 @test_v2f32.fma(%v2f32 %a, %v2f32 %b, %v2f32 %c) { - ; CHECK: fmla.2s - ; GISEL: fmla.2s - %1 = call %v2f32 @llvm.fma.v2f32(%v2f32 %a, %v2f32 %b, %v2f32 %c) - ret %v2f32 %1 -} - -declare %v2f32 @llvm.powi.v2f32.i32(%v2f32, i32) #0 -declare %v2f32 @llvm.sin.v2f32(%v2f32) #0 -declare %v2f32 @llvm.cos.v2f32(%v2f32) #0 -declare %v2f32 @llvm.exp.v2f32(%v2f32) #0 -declare %v2f32 @llvm.exp2.v2f32(%v2f32) #0 -declare %v2f32 @llvm.log.v2f32(%v2f32) #0 -declare %v2f32 @llvm.log10.v2f32(%v2f32) #0 -declare %v2f32 @llvm.log2.v2f32(%v2f32) #0 -declare %v2f32 @llvm.fma.v2f32(%v2f32, %v2f32, %v2f32) #0 - -;;; - -%v4f32 = type <4 x float> - -; CHECK: test_v4f32.powi: -define %v4f32 @test_v4f32.powi(%v4f32 %a, i32 %b) { - ; CHECK: pow - %1 = call %v4f32 @llvm.powi.v4f32.i32(%v4f32 %a, i32 %b) - ret %v4f32 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v4f32.sin -; CHECK: test_v4f32.sin: -define %v4f32 @test_v4f32.sin(%v4f32 %a) { - ; CHECK: sin - ; GISEL: sin - %1 = call %v4f32 @llvm.sin.v4f32(%v4f32 %a) - ret %v4f32 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v4f32.cos -; CHECK: test_v4f32.cos: -define %v4f32 @test_v4f32.cos(%v4f32 %a) { - ; CHECK: cos - ; GISEL: cos - %1 = call %v4f32 @llvm.cos.v4f32(%v4f32 %a) - ret %v4f32 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v4f32.exp -; CHECK: test_v4f32.exp: -; GISEL: test_v4f32.exp: -define %v4f32 @test_v4f32.exp(%v4f32 %a) { - ; CHECK: exp - ; GISEL: exp - %1 = call %v4f32 @llvm.exp.v4f32(%v4f32 %a) - ret %v4f32 %1 -} -; CHECK: test_v4f32.exp2: -define %v4f32 @test_v4f32.exp2(%v4f32 %a) { - ; CHECK: exp - %1 = call %v4f32 @llvm.exp2.v4f32(%v4f32 %a) - ret %v4f32 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v4f32.log -; CHECK: test_v4f32.log: -define %v4f32 @test_v4f32.log(%v4f32 %a) { - ; CHECK: log - ; GISEL: log - %1 = call %v4f32 @llvm.log.v4f32(%v4f32 %a) - ret %v4f32 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v4f32.log10 -; CHECK: test_v4f32.log10: -define %v4f32 @test_v4f32.log10(%v4f32 %a) { - ; CHECK: log - ; GISEL: log - %1 = call %v4f32 @llvm.log10.v4f32(%v4f32 %a) - ret %v4f32 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v4f32.log2 -; CHECK: test_v4f32.log2: -; GISEL: test_v4f32.log2: -define %v4f32 @test_v4f32.log2(%v4f32 %a) { - ; CHECK: log - ; GISEL: log - %1 = call %v4f32 @llvm.log2.v4f32(%v4f32 %a) - ret %v4f32 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v4f32.fma -; CHECK: test_v4f32.fma: -; GISEL: test_v4f32.fma: -define %v4f32 @test_v4f32.fma(%v4f32 %a, %v4f32 %b, %v4f32 %c) { - ; CHECK: fma - ; GISEL: fma - %1 = call %v4f32 @llvm.fma.v4f32(%v4f32 %a, %v4f32 %b, %v4f32 %c) - ret %v4f32 %1 -} - -declare %v4f32 @llvm.powi.v4f32.i32(%v4f32, i32) #0 -declare %v4f32 @llvm.sin.v4f32(%v4f32) #0 -declare %v4f32 @llvm.cos.v4f32(%v4f32) #0 -declare %v4f32 @llvm.exp.v4f32(%v4f32) #0 -declare %v4f32 @llvm.exp2.v4f32(%v4f32) #0 -declare %v4f32 @llvm.log.v4f32(%v4f32) #0 -declare %v4f32 @llvm.log10.v4f32(%v4f32) #0 -declare %v4f32 @llvm.log2.v4f32(%v4f32) #0 -declare %v4f32 @llvm.fma.v4f32(%v4f32, %v4f32, %v4f32) #0 - -;;; Double vector - -%v2f64 = type <2 x double> - -; CHECK: test_v2f64.powi: -define %v2f64 @test_v2f64.powi(%v2f64 %a, i32 %b) { - ; CHECK: pow - %1 = call %v2f64 @llvm.powi.v2f64.i32(%v2f64 %a, i32 %b) - ret %v2f64 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v2f64.sin -; CHECK: test_v2f64.sin: -define %v2f64 @test_v2f64.sin(%v2f64 %a) { - ; CHECK: sin - ; GISEL: sin - %1 = call %v2f64 @llvm.sin.v2f64(%v2f64 %a) - ret %v2f64 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v2f64.cos -; CHECK: test_v2f64.cos: -define %v2f64 @test_v2f64.cos(%v2f64 %a) { - ; CHECK: cos - ; GISEL: cos - %1 = call %v2f64 @llvm.cos.v2f64(%v2f64 %a) - ret %v2f64 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v2f64.exp -; CHECK: test_v2f64.exp: -; GISEL: test_v2f64.exp: -define %v2f64 @test_v2f64.exp(%v2f64 %a) { - ; CHECK: exp - ; GISEL: exp - %1 = call %v2f64 @llvm.exp.v2f64(%v2f64 %a) - ret %v2f64 %1 -} -; CHECK: test_v2f64.exp2: -define %v2f64 @test_v2f64.exp2(%v2f64 %a) { - ; CHECK: exp - %1 = call %v2f64 @llvm.exp2.v2f64(%v2f64 %a) - ret %v2f64 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v2f64.log -; CHECK: test_v2f64.log: -define %v2f64 @test_v2f64.log(%v2f64 %a) { - ; CHECK: log - ; GISEL: log - %1 = call %v2f64 @llvm.log.v2f64(%v2f64 %a) - ret %v2f64 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v2f64.log10 -; CHECK: test_v2f64.log10: -; GISEL: test_v2f64.log10: -define %v2f64 @test_v2f64.log10(%v2f64 %a) { - ; CHECK: log - ; GISEL: log - %1 = call %v2f64 @llvm.log10.v2f64(%v2f64 %a) - ret %v2f64 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v2f64.log2 -; CHECK: test_v2f64.log2: -; GISEL: test_v2f64.log2: -define %v2f64 @test_v2f64.log2(%v2f64 %a) { - ; CHECK: log - ; GISEL: log - %1 = call %v2f64 @llvm.log2.v2f64(%v2f64 %a) - ret %v2f64 %1 -} - -; FALLBACK-NOT: remark{{.*}}test_v2f64.fma -; CHECK: test_v2f64.fma: -; GISEL: test_v2f64.fma: -define %v2f64 @test_v2f64.fma(%v2f64 %a, %v2f64 %b, %v2f64 %c) { - ; CHECK: fma - ; GISEL: fma - %1 = call %v2f64 @llvm.fma.v2f64(%v2f64 %a, %v2f64 %b, %v2f64 %c) - ret %v2f64 %1 -} - -declare %v2f64 @llvm.powi.v2f64.i32(%v2f64, i32) #0 -declare %v2f64 @llvm.sin.v2f64(%v2f64) #0 -declare %v2f64 @llvm.cos.v2f64(%v2f64) #0 -declare %v2f64 @llvm.exp.v2f64(%v2f64) #0 -declare %v2f64 @llvm.exp2.v2f64(%v2f64) #0 -declare %v2f64 @llvm.log.v2f64(%v2f64) #0 -declare %v2f64 @llvm.log10.v2f64(%v2f64) #0 -declare %v2f64 @llvm.log2.v2f64(%v2f64) #0 -declare %v2f64 @llvm.fma.v2f64(%v2f64, %v2f64, %v2f64) #0 - -attributes #0 = { nounwind readonly } diff --git a/llvm/test/CodeGen/AArch64/fmla.ll b/llvm/test/CodeGen/AArch64/fmla.ll index a1782f8e9087c..3ae2158a18868 100644 --- a/llvm/test/CodeGen/AArch64/fmla.ll +++ b/llvm/test/CodeGen/AArch64/fmla.ll @@ -1,21 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16 ; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16 -; RUN: llc -mtriple=aarch64-none-eabi -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 -; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 - -; CHECK-GI: warning: Instruction selection used fallback path for fma_v3f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_v4f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_v3f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_v8f32 -; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for fma_v7f16 -; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for fma_v16f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmuladd_v3f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmuladd_v4f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmuladd_v3f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmuladd_v8f32 -; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for fmuladd_v7f16 -; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for fmuladd_v16f16 +; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 +; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 define double @fma_f64(double %a, double %b, double %c) { ; CHECK-LABEL: fma_f64: @@ -82,27 +69,45 @@ entry: } define <3 x double> @fma_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) { -; CHECK-LABEL: fma_v3f64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d6 killed $d6 def $q6 -; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d7 killed $d7 def $q7 -; CHECK-NEXT: // kill: def $d4 killed $d4 def $q4 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: // kill: def $d5 killed $d5 def $q5 -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: mov v3.d[1], v4.d[0] -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: mov v6.d[1], v7.d[0] -; CHECK-NEXT: fmla v6.2d, v3.2d, v0.2d -; CHECK-NEXT: ldr d3, [sp] -; CHECK-NEXT: fmla v3.2d, v5.2d, v2.2d -; CHECK-NEXT: fmov d0, d6 -; CHECK-NEXT: ext v1.16b, v6.16b, v6.16b, #8 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 -; CHECK-NEXT: fmov d2, d3 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fma_v3f64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d6 killed $d6 def $q6 +; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d7 killed $d7 def $q7 +; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: mov v3.d[1], v4.d[0] +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: mov v6.d[1], v7.d[0] +; CHECK-SD-NEXT: fmla v6.2d, v3.2d, v0.2d +; CHECK-SD-NEXT: ldr d3, [sp] +; CHECK-SD-NEXT: fmla v3.2d, v5.2d, v2.2d +; CHECK-SD-NEXT: fmov d0, d6 +; CHECK-SD-NEXT: ext v1.16b, v6.16b, v6.16b, #8 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: fmov d2, d3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fma_v3f64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-GI-NEXT: // kill: def $d6 killed $d6 def $q6 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4 +; CHECK-GI-NEXT: // kill: def $d7 killed $d7 def $q7 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: mov v3.d[1], v4.d[0] +; CHECK-GI-NEXT: mov v6.d[1], v7.d[0] +; CHECK-GI-NEXT: fmla v6.2d, v3.2d, v0.2d +; CHECK-GI-NEXT: ldr d0, [sp] +; CHECK-GI-NEXT: fmadd d2, d2, d5, d0 +; CHECK-GI-NEXT: mov d1, v6.d[1] +; CHECK-GI-NEXT: fmov d0, d6 +; CHECK-GI-NEXT: ret entry: %d = call <3 x double> @llvm.fma.v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) ret <3 x double> %d @@ -249,67 +254,46 @@ define <7 x half> @fma_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) { ; ; CHECK-GI-NOFP16-LABEL: fma_v7f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[1] -; CHECK-GI-NOFP16-NEXT: mov h5, v2.h[1] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2] -; CHECK-GI-NOFP16-NEXT: mov h16, v2.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s17, h0 -; CHECK-GI-NOFP16-NEXT: fcvt s18, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s19, h2 -; CHECK-GI-NOFP16-NEXT: mov h20, v0.h[4] -; CHECK-GI-NOFP16-NEXT: mov h21, v1.h[4] -; CHECK-GI-NOFP16-NEXT: mov h22, v2.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 -; CHECK-GI-NOFP16-NEXT: fcvt s16, h16 -; CHECK-GI-NOFP16-NEXT: fmadd s17, s17, s18, s19 -; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[3] -; CHECK-GI-NOFP16-NEXT: mov h19, v2.h[3] -; CHECK-GI-NOFP16-NEXT: fmadd s4, s3, s4, s5 -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] -; CHECK-GI-NOFP16-NEXT: fmadd s6, s6, s7, s16 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s17 -; CHECK-GI-NOFP16-NEXT: fcvt s7, h18 -; CHECK-GI-NOFP16-NEXT: fcvt s16, h19 -; CHECK-GI-NOFP16-NEXT: fcvt s17, h20 -; CHECK-GI-NOFP16-NEXT: fcvt s18, h21 -; CHECK-GI-NOFP16-NEXT: fcvt s19, h22 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: mov h20, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h21, v1.h[5] -; CHECK-GI-NOFP16-NEXT: mov h22, v2.h[5] -; CHECK-GI-NOFP16-NEXT: fcvt h6, s6 +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[4] +; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[5] +; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[4] +; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[5] +; CHECK-GI-NOFP16-NEXT: mov h5, v2.h[4] +; CHECK-GI-NOFP16-NEXT: mov h16, v2.h[5] +; CHECK-GI-NOFP16-NEXT: fcvtl v17.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v18.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v19.4s, v2.4h ; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[6] ; CHECK-GI-NOFP16-NEXT: mov h2, v2.h[6] -; CHECK-GI-NOFP16-NEXT: fmadd s5, s5, s7, s16 -; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fmadd s4, s17, s18, s19 -; CHECK-GI-NOFP16-NEXT: fcvt s7, h20 -; CHECK-GI-NOFP16-NEXT: fcvt s16, h21 -; CHECK-GI-NOFP16-NEXT: fcvt s17, h22 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s5 -; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: fmadd s6, s7, s16, s17 -; CHECK-GI-NOFP16-NEXT: fmadd s0, s0, s1, s2 -; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v5.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h5, s6 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v4.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v3.16b +; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v6.h[0] +; CHECK-GI-NOFP16-NEXT: mov v4.h[1], v7.h[0] +; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v16.h[0] +; CHECK-GI-NOFP16-NEXT: fmla v19.4s, v18.4s, v17.4s +; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v0.h[0] +; CHECK-GI-NOFP16-NEXT: mov v4.h[2], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v19.4s +; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v0.h[0] +; CHECK-GI-NOFP16-NEXT: mov v4.h[3], v0.h[0] +; CHECK-GI-NOFP16-NEXT: mov v5.h[3], v0.h[0] +; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v4.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v5.4h +; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NOFP16-NEXT: fmla v4.4s, v3.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v5.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v4.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v6.h[0] +; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] +; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fma_v7f16: @@ -371,42 +355,11 @@ define <4 x half> @fma_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) { ; ; CHECK-GI-NOFP16-LABEL: fma_v4f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NOFP16-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[1] -; CHECK-GI-NOFP16-NEXT: mov h5, v2.h[1] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2] -; CHECK-GI-NOFP16-NEXT: mov h16, v2.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s17, h0 -; CHECK-GI-NOFP16-NEXT: fcvt s18, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s19, h2 -; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[3] -; CHECK-GI-NOFP16-NEXT: mov h2, v2.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 -; CHECK-GI-NOFP16-NEXT: fcvt s16, h16 -; CHECK-GI-NOFP16-NEXT: fmadd s17, s17, s18, s19 -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: fmadd s3, s3, s4, s5 -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] -; CHECK-GI-NOFP16-NEXT: fmadd s5, s6, s7, s16 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s17 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s5 -; CHECK-GI-NOFP16-NEXT: fmadd s1, s4, s1, s2 -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v5.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h +; CHECK-GI-NOFP16-NEXT: fmla v2.4s, v1.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fma_v4f16: @@ -501,75 +454,16 @@ define <8 x half> @fma_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; ; CHECK-GI-NOFP16-LABEL: fma_v8f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[1] -; CHECK-GI-NOFP16-NEXT: mov h5, v2.h[1] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2] -; CHECK-GI-NOFP16-NEXT: mov h16, v2.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s17, h0 -; CHECK-GI-NOFP16-NEXT: fcvt s18, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s19, h2 -; CHECK-GI-NOFP16-NEXT: mov h20, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h21, v1.h[3] -; CHECK-GI-NOFP16-NEXT: mov h22, v2.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 -; CHECK-GI-NOFP16-NEXT: fcvt s16, h16 -; CHECK-GI-NOFP16-NEXT: fmadd s17, s17, s18, s19 -; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt s19, h22 -; CHECK-GI-NOFP16-NEXT: mov h22, v2.h[5] -; CHECK-GI-NOFP16-NEXT: fmadd s4, s3, s4, s5 -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fmadd s6, s6, s7, s16 -; CHECK-GI-NOFP16-NEXT: fcvt s7, h20 -; CHECK-GI-NOFP16-NEXT: fcvt s16, h21 -; CHECK-GI-NOFP16-NEXT: mov h20, v2.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt h3, s17 -; CHECK-GI-NOFP16-NEXT: mov h17, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h21, v1.h[5] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: fmadd s7, s7, s16, s19 -; CHECK-GI-NOFP16-NEXT: fcvt h6, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s16, h18 -; CHECK-GI-NOFP16-NEXT: fcvt s18, h20 -; CHECK-GI-NOFP16-NEXT: fcvt s19, h22 -; CHECK-GI-NOFP16-NEXT: mov h20, v0.h[6] -; CHECK-GI-NOFP16-NEXT: mov h22, v2.h[6] -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] -; CHECK-GI-NOFP16-NEXT: mov h2, v2.h[7] -; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt s4, h17 -; CHECK-GI-NOFP16-NEXT: fcvt s17, h21 -; CHECK-GI-NOFP16-NEXT: mov h21, v1.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt h7, s7 -; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7] -; CHECK-GI-NOFP16-NEXT: fmadd s5, s5, s16, s18 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: fmadd s4, s4, s17, s19 -; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt s6, h20 -; CHECK-GI-NOFP16-NEXT: fcvt s16, h21 -; CHECK-GI-NOFP16-NEXT: fcvt s17, h22 -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s5 -; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v7.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: fmadd s6, s6, s16, s17 -; CHECK-GI-NOFP16-NEXT: fmadd s0, s0, s1, s2 -; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v5.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h5, s6 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v4.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v5.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v3.16b +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v5.4s, v2.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h +; CHECK-GI-NOFP16-NEXT: fmla v5.4s, v4.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: fmla v2.4s, v1.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v5.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fma_v8f16: @@ -735,148 +629,26 @@ define <16 x half> @fma_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c) { ; ; CHECK-GI-NOFP16-LABEL: fma_v16f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NOFP16-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NOFP16-NEXT: .cfi_offset b8, -16 -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[1] -; CHECK-GI-NOFP16-NEXT: fcvt s16, h0 -; CHECK-GI-NOFP16-NEXT: fcvt s17, h2 -; CHECK-GI-NOFP16-NEXT: fcvt s18, h4 -; CHECK-GI-NOFP16-NEXT: mov h19, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h20, v2.h[1] -; CHECK-GI-NOFP16-NEXT: mov h21, v4.h[1] -; CHECK-GI-NOFP16-NEXT: mov h22, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h23, v2.h[2] -; CHECK-GI-NOFP16-NEXT: mov h24, v4.h[2] -; CHECK-GI-NOFP16-NEXT: mov h26, v2.h[3] -; CHECK-GI-NOFP16-NEXT: mov h27, v4.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: mov h25, v0.h[4] -; CHECK-GI-NOFP16-NEXT: mov h28, v1.h[1] -; CHECK-GI-NOFP16-NEXT: fmadd s16, s16, s17, s18 -; CHECK-GI-NOFP16-NEXT: fcvt s17, h20 -; CHECK-GI-NOFP16-NEXT: fcvt s19, h19 -; CHECK-GI-NOFP16-NEXT: fcvt s18, h21 -; CHECK-GI-NOFP16-NEXT: fcvt s20, h23 -; CHECK-GI-NOFP16-NEXT: fcvt s22, h22 -; CHECK-GI-NOFP16-NEXT: fcvt s21, h24 -; CHECK-GI-NOFP16-NEXT: fcvt s23, h26 -; CHECK-GI-NOFP16-NEXT: fcvt s24, h27 -; CHECK-GI-NOFP16-NEXT: fcvt s26, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s27, h3 -; CHECK-GI-NOFP16-NEXT: fcvt s29, h5 -; CHECK-GI-NOFP16-NEXT: mov h31, v2.h[4] -; CHECK-GI-NOFP16-NEXT: mov h8, v3.h[1] -; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2] -; CHECK-GI-NOFP16-NEXT: fmadd s17, s6, s17, s18 -; CHECK-GI-NOFP16-NEXT: fcvt h6, s16 -; CHECK-GI-NOFP16-NEXT: fcvt s16, h28 -; CHECK-GI-NOFP16-NEXT: fmadd s19, s19, s20, s21 -; CHECK-GI-NOFP16-NEXT: fmadd s18, s22, s23, s24 -; CHECK-GI-NOFP16-NEXT: mov h20, v5.h[1] -; CHECK-GI-NOFP16-NEXT: fmadd s24, s26, s27, s29 -; CHECK-GI-NOFP16-NEXT: mov h22, v4.h[4] -; CHECK-GI-NOFP16-NEXT: mov h21, v3.h[2] -; CHECK-GI-NOFP16-NEXT: mov h26, v5.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s25, h25 -; CHECK-GI-NOFP16-NEXT: fcvt s28, h31 -; CHECK-GI-NOFP16-NEXT: fcvt h29, s17 -; CHECK-GI-NOFP16-NEXT: fcvt s17, h8 -; CHECK-GI-NOFP16-NEXT: mov h30, v1.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s20, h20 -; CHECK-GI-NOFP16-NEXT: mov h23, v3.h[3] -; CHECK-GI-NOFP16-NEXT: mov h27, v5.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s22, h22 -; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 -; CHECK-GI-NOFP16-NEXT: fcvt s21, h21 -; CHECK-GI-NOFP16-NEXT: fcvt s26, h26 -; CHECK-GI-NOFP16-NEXT: mov h31, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h8, v1.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt s30, h30 -; CHECK-GI-NOFP16-NEXT: fcvt h19, s19 -; CHECK-GI-NOFP16-NEXT: mov v6.h[1], v29.h[0] -; CHECK-GI-NOFP16-NEXT: fmadd s20, s16, s17, s20 -; CHECK-GI-NOFP16-NEXT: fcvt s23, h23 -; CHECK-GI-NOFP16-NEXT: fcvt s27, h27 -; CHECK-GI-NOFP16-NEXT: fmadd s16, s25, s28, s22 -; CHECK-GI-NOFP16-NEXT: mov h22, v2.h[5] -; CHECK-GI-NOFP16-NEXT: mov h25, v4.h[5] -; CHECK-GI-NOFP16-NEXT: fmadd s21, s7, s21, s26 -; CHECK-GI-NOFP16-NEXT: mov h26, v3.h[4] -; CHECK-GI-NOFP16-NEXT: mov h28, v5.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt h7, s24 -; CHECK-GI-NOFP16-NEXT: fcvt s24, h31 -; CHECK-GI-NOFP16-NEXT: mov h29, v1.h[5] -; CHECK-GI-NOFP16-NEXT: fmadd s17, s30, s23, s27 -; CHECK-GI-NOFP16-NEXT: fcvt h20, s20 -; CHECK-GI-NOFP16-NEXT: fcvt s27, h8 -; CHECK-GI-NOFP16-NEXT: fcvt s22, h22 -; CHECK-GI-NOFP16-NEXT: fcvt s25, h25 -; CHECK-GI-NOFP16-NEXT: fcvt h18, s18 -; CHECK-GI-NOFP16-NEXT: fcvt s26, h26 -; CHECK-GI-NOFP16-NEXT: fcvt s28, h28 -; CHECK-GI-NOFP16-NEXT: mov v6.h[2], v19.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h21, s21 -; CHECK-GI-NOFP16-NEXT: mov h23, v0.h[6] -; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[6] -; CHECK-GI-NOFP16-NEXT: mov v7.h[1], v20.h[0] -; CHECK-GI-NOFP16-NEXT: mov h20, v3.h[5] -; CHECK-GI-NOFP16-NEXT: fcvt h17, s17 -; CHECK-GI-NOFP16-NEXT: fmadd s22, s24, s22, s25 -; CHECK-GI-NOFP16-NEXT: mov h24, v5.h[5] -; CHECK-GI-NOFP16-NEXT: mov h25, v2.h[6] -; CHECK-GI-NOFP16-NEXT: fmadd s26, s27, s26, s28 -; CHECK-GI-NOFP16-NEXT: mov h27, v4.h[6] -; CHECK-GI-NOFP16-NEXT: mov h28, v3.h[6] -; CHECK-GI-NOFP16-NEXT: mov v6.h[3], v18.h[0] -; CHECK-GI-NOFP16-NEXT: mov h18, v5.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt h16, s16 -; CHECK-GI-NOFP16-NEXT: mov v7.h[2], v21.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt s21, h29 -; CHECK-GI-NOFP16-NEXT: fcvt s20, h20 -; CHECK-GI-NOFP16-NEXT: fcvt s24, h24 -; CHECK-GI-NOFP16-NEXT: fcvt s23, h23 -; CHECK-GI-NOFP16-NEXT: fcvt s25, h25 -; CHECK-GI-NOFP16-NEXT: fcvt s27, h27 -; CHECK-GI-NOFP16-NEXT: fcvt s19, h19 -; CHECK-GI-NOFP16-NEXT: fcvt s28, h28 -; CHECK-GI-NOFP16-NEXT: fcvt s18, h18 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] -; CHECK-GI-NOFP16-NEXT: mov h2, v2.h[7] -; CHECK-GI-NOFP16-NEXT: mov v7.h[3], v17.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h17, s26 -; CHECK-GI-NOFP16-NEXT: mov h4, v4.h[7] -; CHECK-GI-NOFP16-NEXT: fmadd s20, s21, s20, s24 -; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7] -; CHECK-GI-NOFP16-NEXT: mov h3, v3.h[7] -; CHECK-GI-NOFP16-NEXT: fmadd s21, s23, s25, s27 -; CHECK-GI-NOFP16-NEXT: mov h5, v5.h[7] -; CHECK-GI-NOFP16-NEXT: mov v6.h[4], v16.h[0] -; CHECK-GI-NOFP16-NEXT: fmadd s18, s19, s28, s18 -; CHECK-GI-NOFP16-NEXT: fcvt h16, s22 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: mov v7.h[4], v17.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt h17, s20 -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: mov v6.h[5], v16.h[0] -; CHECK-GI-NOFP16-NEXT: fmadd s0, s0, s2, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s21 -; CHECK-GI-NOFP16-NEXT: mov v7.h[5], v17.h[0] -; CHECK-GI-NOFP16-NEXT: fmadd s1, s1, s3, s5 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s18 -; CHECK-GI-NOFP16-NEXT: mov v6.h[6], v2.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: mov v7.h[6], v3.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 -; CHECK-GI-NOFP16-NEXT: mov v6.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v7.h[7], v1.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v6.16b -; CHECK-GI-NOFP16-NEXT: mov v1.16b, v7.16b -; CHECK-GI-NOFP16-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v16.4s, v2.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v17.4s, v3.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v18.4s, v4.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v19.4s, v5.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v4.4s, v4.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v5.4s, v5.8h +; CHECK-GI-NOFP16-NEXT: fmla v18.4s, v16.4s, v6.4s +; CHECK-GI-NOFP16-NEXT: fmla v19.4s, v17.4s, v7.4s +; CHECK-GI-NOFP16-NEXT: fmla v4.4s, v2.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fmla v5.4s, v3.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v18.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v19.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fma_v16f16: @@ -962,27 +734,45 @@ entry: } define <3 x double> @fmuladd_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) { -; CHECK-LABEL: fmuladd_v3f64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d6 killed $d6 def $q6 -; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d7 killed $d7 def $q7 -; CHECK-NEXT: // kill: def $d4 killed $d4 def $q4 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: // kill: def $d5 killed $d5 def $q5 -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: mov v3.d[1], v4.d[0] -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: mov v6.d[1], v7.d[0] -; CHECK-NEXT: fmla v6.2d, v3.2d, v0.2d -; CHECK-NEXT: ldr d3, [sp] -; CHECK-NEXT: fmla v3.2d, v5.2d, v2.2d -; CHECK-NEXT: fmov d0, d6 -; CHECK-NEXT: ext v1.16b, v6.16b, v6.16b, #8 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 -; CHECK-NEXT: fmov d2, d3 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fmuladd_v3f64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d6 killed $d6 def $q6 +; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d7 killed $d7 def $q7 +; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: mov v3.d[1], v4.d[0] +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: mov v6.d[1], v7.d[0] +; CHECK-SD-NEXT: fmla v6.2d, v3.2d, v0.2d +; CHECK-SD-NEXT: ldr d3, [sp] +; CHECK-SD-NEXT: fmla v3.2d, v5.2d, v2.2d +; CHECK-SD-NEXT: fmov d0, d6 +; CHECK-SD-NEXT: ext v1.16b, v6.16b, v6.16b, #8 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: fmov d2, d3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fmuladd_v3f64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-GI-NEXT: // kill: def $d6 killed $d6 def $q6 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4 +; CHECK-GI-NEXT: // kill: def $d7 killed $d7 def $q7 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: mov v3.d[1], v4.d[0] +; CHECK-GI-NEXT: mov v6.d[1], v7.d[0] +; CHECK-GI-NEXT: fmla v6.2d, v3.2d, v0.2d +; CHECK-GI-NEXT: ldr d0, [sp] +; CHECK-GI-NEXT: fmadd d2, d2, d5, d0 +; CHECK-GI-NEXT: mov d1, v6.d[1] +; CHECK-GI-NEXT: fmov d0, d6 +; CHECK-GI-NEXT: ret entry: %d = call <3 x double> @llvm.fmuladd.v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) ret <3 x double> %d From 343bed8d3a9b632594a3f786bbb189613975d51e Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Sat, 14 Oct 2023 10:09:23 -0400 Subject: [PATCH 135/720] Canonically identical types are allowed in compound expressions in C We did not have a catch-all for when the two operand types are identical after canonicalization. Instead, we handled that on a case by case basis. Thus, we would diagnose code like: ``` mat4 test(int a) { typedef float mat4 __attribute((matrix_type(4, 4))); mat4 transform; return (a > 0) ? transform : transform; } ``` This simplifies the logic and will be more forwards compatible with other extended datatypes. Fixes https://github.com/llvm/llvm-project/issues/69008 --- clang/docs/ReleaseNotes.rst | 5 ++++- clang/lib/Sema/SemaExpr.cpp | 17 ++++++----------- clang/test/Sema/conditional.c | 14 +++++++++++++- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index ade3c33b3b944..be7c8bf247f7a 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -386,10 +386,13 @@ Bug Fixes in This Version cannot be used with ``Release`` mode builds. (`#68237 `_). - Fix crash in evaluating ``constexpr`` value for invalid template function. Fixes (`#68542 `_) - - Fixed an issue when a shift count larger than ``__INT64_MAX__``, in a right shift operation, could result in missing warnings about ``shift count >= width of type`` or internal compiler error. +- Fixed an issue with computing the common type for the LHS and RHS of a `?:` + operator in C. No longer issuing a confusing diagnostic along the lines of + "incompatible operand types ('foo' and 'foo')" with extensions such as matrix + types. Fixes (`#69008 `_) Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index d78f923b2cb2c..aa30a3a038875 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -9186,7 +9186,7 @@ QualType Sema::CheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, if (checkCondition(*this, Cond.get(), QuestionLoc)) return QualType(); - // Now check the two expressions. + // Handle vectors. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) return CheckVectorOperands(LHS, RHS, QuestionLoc, /*isCompAssign*/ false, @@ -9244,11 +9244,6 @@ QualType Sema::CheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, return ResTy; } - // And if they're both bfloat (which isn't arithmetic), that's fine too. - if (LHSTy->isBFloat16Type() && RHSTy->isBFloat16Type()) { - return Context.getCommonSugaredType(LHSTy, RHSTy); - } - // If both operands are the same structure or union type, the result is that // type. if (const RecordType *LHSRT = LHSTy->getAs()) { // C99 6.5.15p3 @@ -9320,17 +9315,17 @@ QualType Sema::CheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, /*IsIntFirstExpr=*/false)) return LHSTy; - // Allow ?: operations in which both operands have the same - // built-in sizeless type. - if (LHSTy->isSizelessBuiltinType() && Context.hasSameType(LHSTy, RHSTy)) - return Context.getCommonSugaredType(LHSTy, RHSTy); - // Emit a better diagnostic if one of the expressions is a null pointer // constant and the other is not a pointer type. In this case, the user most // likely forgot to take the address of the other expression. if (DiagnoseConditionalForNull(LHS.get(), RHS.get(), QuestionLoc)) return QualType(); + // Finally, if the LHS and RHS types are canonically the same type, we can + // use the common sugared type. + if (Context.hasSameType(LHSTy, RHSTy)) + return Context.getCommonSugaredType(LHSTy, RHSTy); + // Otherwise, the operands are not compatible. Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands) << LHSTy << RHSTy << LHS.get()->getSourceRange() diff --git a/clang/test/Sema/conditional.c b/clang/test/Sema/conditional.c index 666ac5416322d..cebdb7b4043a3 100644 --- a/clang/test/Sema/conditional.c +++ b/clang/test/Sema/conditional.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -fsyntax-only -verify +// RUN: %clang_cc1 %s -fsyntax-only -fenable-matrix -verify const char* test1 = 1 ? "i" : 1 == 1 ? "v" : "r"; @@ -19,3 +19,15 @@ void pr39809(void) { _Generic(0 ? (int volatile*)0 : (void const*)1, void volatile const*: (void)0); _Generic(0 ? (int volatile*)0 : (void const*)0, void volatile const*: (void)0); } + +// Ensure we compute the correct common type for extension types as well. +void GH69008(void) { + typedef float mat4 __attribute((matrix_type(4, 4))); + typedef float mat5 __attribute((matrix_type(5, 5))); + + mat4 transform; + (void)(1 ? transform : transform); // ok + + mat5 other_transform; + (void)(1 ? other_transform : transform); // expected-error {{incompatible operand types ('mat5' (aka 'float __attribute__((matrix_type(5, 5)))') and 'mat4' (aka 'float __attribute__((matrix_type(4, 4)))'))}} +} From 6620376270165688cf3cefe56ae27aaa6ec06675 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 14 Oct 2023 08:12:46 -0700 Subject: [PATCH 136/720] [RISCV] Add CompressPat for c.beqz/bnez with X0 in the first operand. (#69042) --- llvm/lib/Target/RISCV/RISCVInstrInfoC.td | 6 +++++ llvm/test/MC/RISCV/compress-rv32i.s | 28 ++++++++++++++++++++---- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td index aff6e77e0cfc4..07137031d9fc7 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td @@ -956,8 +956,14 @@ def : CompressPat<(JAL X0, simm12_lsb0:$offset), (C_J simm12_lsb0:$offset)>; def : CompressPat<(BEQ GPRC:$rs1, X0, simm9_lsb0:$imm), (C_BEQZ GPRC:$rs1, simm9_lsb0:$imm)>; +let isCompressOnly = true in +def : CompressPat<(BEQ X0, GPRC:$rs1, simm9_lsb0:$imm), + (C_BEQZ GPRC:$rs1, simm9_lsb0:$imm)>; def : CompressPat<(BNE GPRC:$rs1, X0, simm9_lsb0:$imm), (C_BNEZ GPRC:$rs1, simm9_lsb0:$imm)>; +let isCompressOnly = true in +def : CompressPat<(BNE X0, GPRC:$rs1, simm9_lsb0:$imm), + (C_BNEZ GPRC:$rs1, simm9_lsb0:$imm)>; } // Predicates = [HasStdExtCOrZca] // Quadrant 2 diff --git a/llvm/test/MC/RISCV/compress-rv32i.s b/llvm/test/MC/RISCV/compress-rv32i.s index 7869481bb78d6..165defc3435fc 100644 --- a/llvm/test/MC/RISCV/compress-rv32i.s +++ b/llvm/test/MC/RISCV/compress-rv32i.s @@ -154,16 +154,36 @@ jal zero, -2048 # CHECK: # encoding: [0x01,0xd0] beq s0, zero, -256 +# CHECK-BYTES: 01 d0 +# CHECK-ALIASASM: beqz s0, -256 +# CHECK-ALIASOBJ32: beqz s0, 0xffffff2a +# CHECK-ALIASOBJ64: beqz s0, 0xffffffffffffff2a +# CHECK-INSTASM: c.beqz s0, -256 +# CHECK-INSTOBJ32: c.beqz s0, 0xffffff2a +# CHECK-INSTOBJ64: c.beqz s0, 0xffffffffffffff2a +# CHECK: # encoding: [0x01,0xd0] +beq zero, s0, -256 + # CHECK-BYTES: 7d ec # CHECK-ALIASASM: bnez s0, 254 -# CHECK-ALIASOBJ32: bnez s0, 0x128 -# CHECK-ALIASOBJ64: bnez s0, 0x128 +# CHECK-ALIASOBJ32: bnez s0, 0x12a +# CHECK-ALIASOBJ64: bnez s0, 0x12a # CHECK-INSTASM: c.bnez s0, 254 -# CHECK-INSTOBJ32: c.bnez s0, 0x128 -# CHECK-INSTOBJ64: c.bnez s0, 0x128 +# CHECK-INSTOBJ32: c.bnez s0, 0x12a +# CHECK-INSTOBJ64: c.bnez s0, 0x12a # CHECK: # encoding: [0x7d,0xec] bne s0, zero, 254 +# CHECK-BYTES: 7d ec +# CHECK-ALIASASM: bnez s0, 254 +# CHECK-ALIASOBJ32: bnez s0, 0x12c +# CHECK-ALIASOBJ64: bnez s0, 0x12c +# CHECK-INSTASM: c.bnez s0, 254 +# CHECK-INSTOBJ32: c.bnez s0, 0x12c +# CHECK-INSTOBJ64: c.bnez s0, 0x12c +# CHECK: # encoding: [0x7d,0xec] +bne zero, s0, 254 + # CHECK-BYTES: 7e 04 # CHECK-ALIAS: slli s0, s0, 31 # CHECK-INST: c.slli s0, 31 From 649c2f6c1081a0706963a5d09478223a7faaa504 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Sat, 14 Oct 2023 11:14:34 -0400 Subject: [PATCH 137/720] [mlir][mlir-query] Fix shared build. NFC --- mlir/lib/Query/Matcher/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mlir/lib/Query/Matcher/CMakeLists.txt b/mlir/lib/Query/Matcher/CMakeLists.txt index 6afd24722bb70..3adff9f99243f 100644 --- a/mlir/lib/Query/Matcher/CMakeLists.txt +++ b/mlir/lib/Query/Matcher/CMakeLists.txt @@ -7,4 +7,8 @@ add_mlir_library(MLIRQueryMatcher ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Query/Matcher + + LINK_LIBS PUBLIC + MLIRIR + MLIRParser ) From 311bc6683deea98b09f605a0fb82f52018eb8c30 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Sat, 14 Oct 2023 16:35:03 +0100 Subject: [PATCH 138/720] [AArch64][compiler-rt] Only build SME ABI routines when compiler supports asm. (#68991) This also adds the .variant_pcs directive to some functions from which it was previously missing. --- compiler-rt/lib/builtins/CMakeLists.txt | 9 ++-- compiler-rt/lib/builtins/aarch64/sme-abi.S | 59 +++++++--------------- 2 files changed, 25 insertions(+), 43 deletions(-) diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index b1863746a57e7..753d08273ea54 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -551,10 +551,14 @@ set(aarch64_SOURCES ${GENERIC_SOURCES} cpu_model.c aarch64/fp_mode.c - aarch64/sme-abi.S - aarch64/sme-abi-init.c ) +if(COMPILER_RT_HAS_ASM_SME) + list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-abi-init.c) +else() + message(STATUS "AArch64 SME ABI routines disabled") +endif() + # Generate outline atomics helpers from lse.S base set(OA_HELPERS_DIR "${CMAKE_CURRENT_BINARY_DIR}/outline_atomic_helpers.dir") file(MAKE_DIRECTORY "${OA_HELPERS_DIR}") @@ -782,7 +786,6 @@ else () endif() append_list_if(COMPILER_RT_HAS_ASM_LSE HAS_ASM_LSE BUILTIN_DEFS) - append_list_if(COMPILER_RT_HAS_ASM_SME HAS_ASM_SME BUILTIN_DEFS) foreach (arch ${BUILTIN_SUPPORTED_ARCH}) if (CAN_TARGET_${arch}) diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S index 207810b2e2521..b3612c68066f2 100644 --- a/compiler-rt/lib/builtins/aarch64/sme-abi.S +++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S @@ -8,29 +8,6 @@ #include "../assembly.h" -#ifdef HAS_ASM_SME -#define ARCH armv9-a+sme -#define SMSTOP_SM smstop sm -#define SMSTOP_ZA smstop za -#define REG_TPIDR2_EL0 TPIDR2_EL0 -#define REG_SVCR SVCR -#define ADDSVL_X16_X16_1 addsvl x16, x16, #1 -#define LDR_ZA_W15_0_X16 ldr za[w15,0], [x16] -#define STR_ZA_W15_0_X16 str za[w15,0], [x16] -#define CNTD_X0 cntd x0 -#define CFI_OFFSET_VG_MINUS_16 .cfi_offset vg, -16 -#else -#define ARCH armv8-a -#define SMSTOP_SM .inst 0xd503427f -#define SMSTOP_ZA .inst 0xd503447f -#define REG_TPIDR2_EL0 S3_3_C13_C0_5 -#define REG_SVCR S3_3_C4_C2_2 -#define ADDSVL_X16_X16_1 .inst 0x04305830 -#define LDR_ZA_W15_0_X16 .inst 0xe1006200 -#define STR_ZA_W15_0_X16 .inst 0xe1206200 -#define CNTD_X0 .inst 0x04e0e3e0 -#define CFI_OFFSET_VG_MINUS_16 .cfi_escape 0x10, 0x2e, 0x03, 0x11, 0x70, 0x22 // $vg @ cfa - 16 -#endif #if !defined(__APPLE__) #define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0) @@ -42,7 +19,7 @@ #define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff #endif -.arch ARCH +.arch armv9-a+sme // Utility function which calls a system's abort() routine. Because the function // is streaming-compatible it should disable streaming-SVE mode before calling @@ -50,19 +27,19 @@ // because the function does not return. DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort) .cfi_startproc - .variant_pcs SYMBOL_NAME(do_abort) + .variant_pcs SYMBOL_NAME(do_abort) stp x29, x30, [sp, #-32]! - CNTD_X0 + cntd x0 // Store VG to a stack location that we describe with .cfi_offset str x0, [sp, #16] .cfi_def_cfa_offset 32 .cfi_offset w30, -24 .cfi_offset w29, -32 - CFI_OFFSET_VG_MINUS_16 + .cfi_offset vg, -16 bl __arm_sme_state tbz x0, #0, 2f 1: - SMSTOP_SM + smstop sm 2: // We can't make this into a tail-call because the unwinder would // need to restore the value of VG. @@ -74,7 +51,7 @@ END_COMPILERRT_FUNCTION(do_abort) // that is set as part of the compiler-rt startup code. // __aarch64_has_sme_and_tpidr2_el0 DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state) - .variant_pcs __arm_sme_state + .variant_pcs __arm_sme_state mov x0, xzr mov x1, xzr @@ -83,18 +60,18 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state) cbz w16, 1f 0: orr x0, x0, #0xC000000000000000 - mrs x16, REG_SVCR + mrs x16, SVCR bfxil x0, x16, #0, #2 - mrs x1, REG_TPIDR2_EL0 + mrs x1, TPIDR2_EL0 1: ret END_COMPILERRT_OUTLINE_FUNCTION(__arm_sme_state) DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore) - .variant_pcs __arm_tpidr2_restore + .variant_pcs __arm_tpidr2_restore // If TPIDR2_EL0 is nonnull, the subroutine aborts in some platform-specific // manner. - mrs x14, REG_TPIDR2_EL0 + mrs x14, TPIDR2_EL0 cbnz x14, 2f // If any of the reserved bytes in the first 16 bytes of BLK are nonzero, @@ -114,8 +91,8 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore) mov x15, xzr 0: - LDR_ZA_W15_0_X16 - ADDSVL_X16_X16_1 + ldr za[w15,0], [x16] + addsvl x16, x16, #1 add x15, x15, #1 cmp x14, x15 b.ne 0b @@ -126,6 +103,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore) END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_restore) DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save) + .variant_pcs __arm_tpidr2_restore // If the current thread does not have access to TPIDR2_EL0, the subroutine // does nothing. adrp x14, TPIDR2_SYMBOL @@ -133,7 +111,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save) cbz w14, 1f // If TPIDR2_EL0 is null, the subroutine does nothing. - mrs x16, REG_TPIDR2_EL0 + mrs x16, TPIDR2_EL0 cbz x16, 1f // If any of the reserved bytes in the first 16 bytes of the TPIDR2 block are @@ -153,8 +131,8 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save) mov x15, xzr 0: - STR_ZA_W15_0_X16 - ADDSVL_X16_X16_1 + str za[w15,0], [x16] + addsvl x16, x16, #1 add x15, x15, #1 cmp x14, x15 b.ne 0b @@ -165,6 +143,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save) END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_save) DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable) + .variant_pcs __arm_tpidr2_restore // If the current thread does not have access to SME, the subroutine does // nothing. adrp x14, TPIDR2_SYMBOL @@ -182,10 +161,10 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable) bl __arm_tpidr2_save // * Set TPIDR2_EL0 to null. - msr REG_TPIDR2_EL0, xzr + msr TPIDR2_EL0, xzr // * Set PSTATE.ZA to 0. - SMSTOP_ZA + smstop za .cfi_def_cfa wsp, 16 ldp x29, x30, [sp], #16 From f4a0cb5c95939abd99fc2a7f5ba0a9febfffd78a Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Sat, 14 Oct 2023 15:35:13 +0000 Subject: [PATCH 139/720] [gn build] Port 311bc6683dee --- llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn index 0649daf46b927..d932d2db1c98a 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn @@ -473,7 +473,6 @@ static_library("builtins") { sources -= [ "fp_mode.c" ] sources += [ "aarch64/fp_mode.c", - "aarch64/sme-abi-init.c", "aarch64/sme-abi.S", "cpu_model.c", ] From c442d20e23d739ec89c719cb7b96c3623a58126d Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Sat, 14 Oct 2023 15:35:14 +0000 Subject: [PATCH 140/720] [gn build] Port f445be9790f9 --- llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn index d932d2db1c98a..303a6c29d7b91 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn @@ -473,7 +473,6 @@ static_library("builtins") { sources -= [ "fp_mode.c" ] sources += [ "aarch64/fp_mode.c", - "aarch64/sme-abi.S", "cpu_model.c", ] if (current_os == "mingw") { From 0ad92c0cbb34a6e24a9a32f03f3ddeb2114b378e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20B=C3=B6ck?= Date: Sat, 14 Oct 2023 18:38:18 +0200 Subject: [PATCH 141/720] [StatepointLowering] Take return attributes of `gc.result` into account (#68439) The current lowering of statepoints does not take into account return attributes present on the `gc.result` leading to different code being generated than if one were to not use statepoints. These return attributes can affect the ABI which is why it is important that they are applied in the lowering. --- llvm/include/llvm/CodeGen/TargetLowering.h | 8 ++++++- .../SelectionDAG/SelectionDAGBuilder.cpp | 7 ++++--- .../SelectionDAG/SelectionDAGBuilder.h | 3 ++- .../SelectionDAG/StatepointLowering.cpp | 12 +++++++---- .../AArch64/statepoint-call-lowering.ll | 3 --- .../CodeGen/X86/statepoint-call-lowering.ll | 21 ++++++++++++++++--- 6 files changed, 39 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 187e000d0272d..da92f7d99df43 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4397,8 +4397,14 @@ class TargetLowering : public TargetLoweringBase { } CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType, - SDValue Target, ArgListTy &&ArgsList) { + SDValue Target, ArgListTy &&ArgsList, + AttributeSet ResultAttrs = {}) { RetTy = ResultType; + IsInReg = ResultAttrs.hasAttribute(Attribute::InReg); + RetSExt = ResultAttrs.hasAttribute(Attribute::SExt); + RetZExt = ResultAttrs.hasAttribute(Attribute::ZExt); + NoMerge = ResultAttrs.hasAttribute(Attribute::NoMerge); + Callee = Target; CallConv = CC; NumFixedArgs = ArgsList.size(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c5fd56795a520..4bb0ba6f08310 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -9728,7 +9728,7 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, void SelectionDAGBuilder::populateCallLoweringInfo( TargetLowering::CallLoweringInfo &CLI, const CallBase *Call, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy, - bool IsPatchPoint) { + AttributeSet RetAttrs, bool IsPatchPoint) { TargetLowering::ArgListTy Args; Args.reserve(NumArgs); @@ -9749,7 +9749,8 @@ void SelectionDAGBuilder::populateCallLoweringInfo( CLI.setDebugLoc(getCurSDLoc()) .setChain(getRoot()) - .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args)) + .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args), + RetAttrs) .setDiscardResult(Call->use_empty()) .setIsPatchPoint(IsPatchPoint) .setIsPreallocated( @@ -9898,7 +9899,7 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, TargetLowering::CallLoweringInfo CLI(DAG); populateCallLoweringInfo(CLI, &CB, NumMetaOpers, NumCallArgs, Callee, - ReturnTy, true); + ReturnTy, CB.getAttributes().getRetAttrs(), true); std::pair Result = lowerInvokable(CLI, EHPadBB); SDNode *CallEnd = Result.second.getNode(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index ec23445b01640..a97884f0efb9a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -426,7 +426,8 @@ class SelectionDAGBuilder { void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI, const CallBase *Call, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, - Type *ReturnTy, bool IsPatchPoint); + Type *ReturnTy, AttributeSet RetAttrs, + bool IsPatchPoint); std::pair lowerInvokable(TargetLowering::CallLoweringInfo &CLI, diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index f313d60424c23..cf32350036d41 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -1033,10 +1033,16 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I, ActualCallee = Callee; } + const auto GCResultLocality = getGCResultLocality(I); + AttributeSet retAttrs; + if (GCResultLocality.first) + retAttrs = GCResultLocality.first->getAttributes().getRetAttrs(); + StatepointLoweringInfo SI(DAG); populateCallLoweringInfo(SI.CLI, &I, GCStatepointInst::CallArgsBeginPos, I.getNumCallArgs(), ActualCallee, - I.getActualReturnType(), false /* IsPatchPoint */); + I.getActualReturnType(), retAttrs, + /*IsPatchPoint=*/false); // There may be duplication in the gc.relocate list; such as two copies of // each relocation on normal and exceptional path for an invoke. We only @@ -1092,8 +1098,6 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I, SDValue ReturnValue = LowerAsSTATEPOINT(SI); // Export the result value if needed - const auto GCResultLocality = getGCResultLocality(I); - if (!GCResultLocality.first && !GCResultLocality.second) { // The return value is not needed, just generate a poison value. // Note: This covers the void return case. @@ -1138,7 +1142,7 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl( populateCallLoweringInfo( SI.CLI, Call, ArgBeginIndex, Call->arg_size(), Callee, ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : Call->getType(), - false); + Call->getAttributes().getRetAttrs(), /*IsPatchPoint=*/false); if (!VarArgDisallowed) SI.CLI.IsVarArg = Call->getFunctionType()->isVarArg(); diff --git a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll index 6326d3db9afb8..9619895c450ca 100644 --- a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll +++ b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll @@ -23,7 +23,6 @@ define i1 @test_i1_return() gc "statepoint-example" { ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl return_i1 ; CHECK-NEXT: .Ltmp0: -; CHECK-NEXT: and w0, w0, #0x1 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret ; This is just checking that a i1 gets lowered normally when there's no extra @@ -106,7 +105,6 @@ define i1 @test_relocate(ptr addrspace(1) %a) gc "statepoint-example" { ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl return_i1 ; CHECK-NEXT: .Ltmp5: -; CHECK-NEXT: and w0, w0, #0x1 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret ; Check that an ununsed relocate has no code-generation impact @@ -145,7 +143,6 @@ define i1 @test_i1_return_patchable() gc "statepoint-example" { ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: nop ; CHECK-NEXT: .Ltmp7: -; CHECK-NEXT: and w0, w0, #0x1 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret ; A patchable variant of test_i1_return diff --git a/llvm/test/CodeGen/X86/statepoint-call-lowering.ll b/llvm/test/CodeGen/X86/statepoint-call-lowering.ll index 66088046ef5ec..758cb8b7b63d5 100644 --- a/llvm/test/CodeGen/X86/statepoint-call-lowering.ll +++ b/llvm/test/CodeGen/X86/statepoint-call-lowering.ll @@ -247,8 +247,6 @@ define i8 @test_signext_return(ptr) gc "statepoint-example" { ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: callq signext_return_i1@PLT ; CHECK-NEXT: .Ltmp10: -; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: negb %al ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -266,7 +264,6 @@ define i8 @test_zeroext_return() gc "statepoint-example" { ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: callq return_i1@PLT ; CHECK-NEXT: .Ltmp11: -; CHECK-NEXT: andb $1, %al ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -277,6 +274,24 @@ entry: ret i8 %ext } +define signext i1 @test_noext_signext_return() gc "statepoint-example" { +; CHECK-LABEL: test_noext_signext_return: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq return_i1@PLT +; CHECK-NEXT: .Ltmp12: +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: negb %al +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i1 ()) @return_i1, i32 0, i32 0, i32 0, i32 0) + %call1 = call i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + ret i1 %call1 +} + declare token @llvm.experimental.gc.statepoint.p0(i64, i32, ptr, i32, i32, ...) declare i1 @llvm.experimental.gc.result.i1(token) From 80737d2ddf05507d96cdd723fb33a6e44ac72a48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Sat, 14 Oct 2023 17:42:27 +0200 Subject: [PATCH 142/720] [clang][Interp][NFC] Pass PrimType to visitZeroInitializer() This fixes an old FIXME comment. Almost all callers already classify() the type anyway, so just pass the result of that to visitZeroInitializer(). --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 21 +++++++++------------ clang/lib/AST/Interp/ByteCodeExprGen.h | 2 +- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 71aac8c6245c5..bda9cf1500804 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -493,8 +493,8 @@ template bool ByteCodeExprGen::VisitImplicitValueInitExpr(const ImplicitValueInitExpr *E) { QualType QT = E->getType(); - if (classify(QT)) - return this->visitZeroInitializer(QT, E); + if (std::optional T = classify(QT)) + return this->visitZeroInitializer(*T, QT, E); if (QT->isRecordType()) return false; @@ -510,7 +510,7 @@ bool ByteCodeExprGen::VisitImplicitValueInitExpr(const ImplicitValueIni // since we memset our Block*s to 0 and so we have the desired value // without this. for (size_t I = 0; I != NumElems; ++I) { - if (!this->visitZeroInitializer(CAT->getElementType(), E)) + if (!this->visitZeroInitializer(*ElemT, CAT->getElementType(), E)) return false; if (!this->emitInitElem(*ElemT, I, E)) return false; @@ -620,7 +620,7 @@ bool ByteCodeExprGen::VisitInitListExpr(const InitListExpr *E) { if (std::optional T = classify(E->getType())) { assert(!DiscardResult); if (E->getNumInits() == 0) - return this->visitZeroInitializer(E->getType(), E); + return this->visitZeroInitializer(*T, E->getType(), E); assert(E->getNumInits() == 1); return this->delegate(E->inits()[0]); } @@ -1560,7 +1560,8 @@ bool ByteCodeExprGen::VisitOffsetOfExpr(const OffsetOfExpr *E) { template bool ByteCodeExprGen::VisitCXXScalarValueInitExpr( const CXXScalarValueInitExpr *E) { - return this->visitZeroInitializer(E->getType(), E); + return this->visitZeroInitializer(classifyPrim(E->getType()), E->getType(), + E); } template bool ByteCodeExprGen::discard(const Expr *E) { @@ -1648,12 +1649,8 @@ bool ByteCodeExprGen::visitBool(const Expr *E) { } template -bool ByteCodeExprGen::visitZeroInitializer(QualType QT, +bool ByteCodeExprGen::visitZeroInitializer(PrimType T, QualType QT, const Expr *E) { - // FIXME: We need the QualType to get the float semantics, but that means we - // classify it over and over again in array situations. - PrimType T = classifyPrim(QT); - switch (T) { case PT_Bool: return this->emitZeroBool(E); @@ -1699,7 +1696,7 @@ bool ByteCodeExprGen::visitZeroRecordInitializer(const Record *R, if (D->isPrimitive()) { QualType QT = D->getType(); PrimType T = classifyPrim(D->getType()); - if (!this->visitZeroInitializer(QT, E)) + if (!this->visitZeroInitializer(T, QT, E)) return false; if (!this->emitInitField(T, Field.Offset, E)) return false; @@ -1716,7 +1713,7 @@ bool ByteCodeExprGen::visitZeroRecordInitializer(const Record *R, QualType ET = D->getElemQualType(); PrimType T = classifyPrim(ET); for (uint32_t I = 0, N = D->getNumElems(); I != N; ++I) { - if (!this->visitZeroInitializer(ET, E)) + if (!this->visitZeroInitializer(T, ET, E)) return false; if (!this->emitInitElem(T, I, E)) return false; diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.h b/clang/lib/AST/Interp/ByteCodeExprGen.h index 7cfe4d9251c5f..2049dab140eaa 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.h +++ b/clang/lib/AST/Interp/ByteCodeExprGen.h @@ -222,7 +222,7 @@ class ByteCodeExprGen : public ConstStmtVisitor, bool>, friend class SourceLocScope; /// Emits a zero initializer. - bool visitZeroInitializer(QualType QT, const Expr *E); + bool visitZeroInitializer(PrimType T, QualType QT, const Expr *E); bool visitZeroRecordInitializer(const Record *R, const Expr *E); enum class DerefKind { From 8dd3bc18081657fee2352cf5b1c6abacb18fcc84 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 14 Oct 2023 10:51:15 -0700 Subject: [PATCH 143/720] [Support] Remove the migration helpers to llvm::endianness I've migrated all known uses of llvm::support::endianness to llvm::endianness. This patch removes the migration helpers. --- llvm/include/llvm/Support/Endian.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/llvm/include/llvm/Support/Endian.h b/llvm/include/llvm/Support/Endian.h index d4fc6b59e252f..4c0405cf1e2f6 100644 --- a/llvm/include/llvm/Support/Endian.h +++ b/llvm/include/llvm/Support/Endian.h @@ -25,13 +25,6 @@ namespace llvm { namespace support { -// TODO: Remove the following once we are done migrating to llvm::endianness, -// llvm::endianness::big, etc. -using endianness = llvm::endianness; -constexpr llvm::endianness big = llvm::endianness::big; -constexpr llvm::endianness little = llvm::endianness::little; -constexpr llvm::endianness native = llvm::endianness::native; - // These are named values for common alignments. enum {aligned = 0, unaligned = 1}; @@ -47,10 +40,6 @@ struct PickAlignment { namespace endian { -LLVM_DEPRECATED("Use llvm::endianness::native instead", - "llvm::endianness::native") -constexpr endianness system_endianness() { return llvm::endianness::native; } - template [[nodiscard]] inline value_type byte_swap(value_type value, endianness endian) { if (endian != llvm::endianness::native) From 0603737ac0f73ff33326d0274b3152cc5a81e1ec Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 14 Oct 2023 12:06:59 -0700 Subject: [PATCH 144/720] [llvm-remarkutil] Fix issues after #66214 and its fixups Don't use reserved identifier (_GroupBy) Fix GCC 7.4/7.5 builds (return Filter => return std::move(Filter)) Remove trailing spaces --- llvm/docs/CommandGuide/llvm-remarkutil.rst | 22 ++++++++--------- .../Inputs/annotation-count-with-dbg-loc.yaml | 2 +- .../Inputs/annotation-count.yaml | 4 ++-- .../count/Inputs/remark-count-by.yaml | 6 ++--- .../count/Inputs/remark-filter-by.yaml | 6 ++--- .../count/Inputs/remark-group-by.yaml | 12 +++++----- .../llvm-remarkutil/count/count-by-keys.test | 2 +- .../count/count-by-remark.test | 2 +- .../count/filter-by-pass-name.test | 4 ++-- .../count/filter-by-remark-name.test | 4 ++-- .../llvm-remarkutil/count/filter-by-type.test | 2 +- .../count/group-by-function.test | 2 +- llvm/tools/llvm-remarkutil/RemarkCounter.cpp | 9 ++++--- llvm/tools/llvm-remarkutil/RemarkCounter.h | 24 +++++++++---------- 14 files changed, 50 insertions(+), 51 deletions(-) diff --git a/llvm/docs/CommandGuide/llvm-remarkutil.rst b/llvm/docs/CommandGuide/llvm-remarkutil.rst index 2e4953eb188ae..6fd739e844c4a 100644 --- a/llvm/docs/CommandGuide/llvm-remarkutil.rst +++ b/llvm/docs/CommandGuide/llvm-remarkutil.rst @@ -75,7 +75,7 @@ CSV format is as follows: Function,InstructionCount foo,123 -if `--use-debug-loc` is passed then the CSV will include the source path, line number and column. +if `--use-debug-loc` is passed then the CSV will include the source path, line number and column. :: Source,Function,InstructionCount @@ -104,7 +104,7 @@ CSV format is as follows: Function,Count foo,123 -if `--use-debug-loc` is passed then the CSV will include the source path, line number and column. +if `--use-debug-loc` is passed then the CSV will include the source path, line number and column. :: Source,Function,Count @@ -122,7 +122,7 @@ USAGE: :program:`llvm-remarkutil` count [*options*] Summary ^^^^^^^ -:program:`llvm-remarkutil count` counts `remarks ` based on specified properties. +:program:`llvm-remarkutil count` counts `remarks ` based on specified properties. By default the tool counts remarks based on how many occour in a source file or function or total for the generated remark file. The tool also supports collecting count based on specific remark arguments. The specified arguments should have an integer value to be able to report a count. @@ -143,15 +143,15 @@ OPTIONS .. option:: --group-by= group count of remarks by property. - * ``source``: Count will be collected per source path. Remarks with no debug location will not be counted. + * ``source``: Count will be collected per source path. Remarks with no debug location will not be counted. * ``function``: Count is collected per function. - * ``function-with-loc``: Count is collected per function per source. Remarks with no debug location will not be counted. + * ``function-with-loc``: Count is collected per function per source. Remarks with no debug location will not be counted. * ``Total``: Report a count for the provided remark file. .. option:: --args[=arguments] If `count-by` is set to `arg` this flag can be used to collect from specified remark arguments represented as a comma seperated string. The arguments must have a numeral value to be able to count remarks by - + .. option:: --rargs[=arguments] If `count-by` is set to `arg` this flag can be used to collect from specified remark arguments using regular expression. The arguments must have a numeral value to be able to count remarks by @@ -177,12 +177,12 @@ OPTIONS .. option:: --remark-type= Filter remarks by type with the following options. * ``unknown`` - * ``passed`` - * ``missed`` - * ``analysis`` + * ``passed`` + * ``missed`` + * ``analysis`` * ``analysis-fp-commute`` - * ``analysis-aliasing`` - * ``failure`` + * ``analysis-aliasing`` + * ``failure`` .. _size-diff_subcommand: diff --git a/llvm/test/tools/llvm-remarkutil/Inputs/annotation-count-with-dbg-loc.yaml b/llvm/test/tools/llvm-remarkutil/Inputs/annotation-count-with-dbg-loc.yaml index 6262ee262f499..ebdd722774d4d 100644 --- a/llvm/test/tools/llvm-remarkutil/Inputs/annotation-count-with-dbg-loc.yaml +++ b/llvm/test/tools/llvm-remarkutil/Inputs/annotation-count-with-dbg-loc.yaml @@ -12,7 +12,7 @@ Args: Pass: annotation-remarks Name: AnnotationSummary DebugLoc: { File: path/to/anno2.c, Line: 1, Column: 2 } -Function: func2 +Function: func2 Args: - String: 'Annotated ' - count: '2' diff --git a/llvm/test/tools/llvm-remarkutil/Inputs/annotation-count.yaml b/llvm/test/tools/llvm-remarkutil/Inputs/annotation-count.yaml index f29808a7772cf..9b428f013c00c 100644 --- a/llvm/test/tools/llvm-remarkutil/Inputs/annotation-count.yaml +++ b/llvm/test/tools/llvm-remarkutil/Inputs/annotation-count.yaml @@ -1,7 +1,7 @@ --- !Analysis Pass: annotation-remarks Name: AnnotationSummary -Function: func1 +Function: func1 Args: - String: 'Annotated ' - count: '1' @@ -10,7 +10,7 @@ Args: --- !Analysis Pass: annotation-remarks Name: AnnotationSummary -Function: func2 +Function: func2 Args: - String: 'Annotated ' - count: '2' diff --git a/llvm/test/tools/llvm-remarkutil/count/Inputs/remark-count-by.yaml b/llvm/test/tools/llvm-remarkutil/count/Inputs/remark-count-by.yaml index 3bd0783b7a0a1..7d1b76eaf0ada 100644 --- a/llvm/test/tools/llvm-remarkutil/count/Inputs/remark-count-by.yaml +++ b/llvm/test/tools/llvm-remarkutil/count/Inputs/remark-count-by.yaml @@ -1,6 +1,6 @@ --- !Analysis Pass: generic-remarks-pass -Name: Remark +Name: Remark DebugLoc: { File: path/to/anno.c, Line: 1, Column: 2 } Function: func1 Args: @@ -23,7 +23,7 @@ Args: - type: remark --- !Analysis Pass: generic-remarks-pass -Name: Remark3 +Name: Remark3 DebugLoc: { File: path/to/anno.c, Line: 1, Column: 2 } Function: func1 Args: @@ -32,7 +32,7 @@ Args: - type: remark --- !Analysis Pass: generic-remarks-pass -Name: Remark +Name: Remark DebugLoc: { File: path/to/anno.c, Line: 1, Column: 2 } Function: func2 Args: diff --git a/llvm/test/tools/llvm-remarkutil/count/Inputs/remark-filter-by.yaml b/llvm/test/tools/llvm-remarkutil/count/Inputs/remark-filter-by.yaml index e9267bd940484..b4318370b2953 100644 --- a/llvm/test/tools/llvm-remarkutil/count/Inputs/remark-filter-by.yaml +++ b/llvm/test/tools/llvm-remarkutil/count/Inputs/remark-filter-by.yaml @@ -1,6 +1,6 @@ --- !Analysis Pass: generic-remarks-pass -Name: Remark +Name: Remark DebugLoc: { File: path/to/anno2.c, Line: 1, Column: 2 } Function: func1 Args: @@ -20,7 +20,7 @@ Args: - type: remark --- !Missed Pass: generic-remarks-pass -Name: Remark3 +Name: Remark3 DebugLoc: { File: path/to/anno.c, Line: 1, Column: 2 } Function: func1 Args: @@ -29,7 +29,7 @@ Args: - type: remark --- !Passed Pass: generic-remarks-pass -Name: Remark +Name: Remark DebugLoc: { File: path/to/anno.c, Line: 1, Column: 2 } Function: func1 Args: diff --git a/llvm/test/tools/llvm-remarkutil/count/Inputs/remark-group-by.yaml b/llvm/test/tools/llvm-remarkutil/count/Inputs/remark-group-by.yaml index 5f9222214f2c7..a61870a87e5e2 100644 --- a/llvm/test/tools/llvm-remarkutil/count/Inputs/remark-group-by.yaml +++ b/llvm/test/tools/llvm-remarkutil/count/Inputs/remark-group-by.yaml @@ -1,6 +1,6 @@ --- !Analysis Pass: generic-remarks-pass -Name: Remark +Name: Remark DebugLoc: { File: path/to/anno.c, Line: 1, Column: 2 } Function: func1 Args: @@ -9,7 +9,7 @@ Args: - type: remark --- !Missed Pass: generic-remarks-pass -Name: Remark +Name: Remark DebugLoc: { File: path/to/anno.c, Line: 1, Column: 2 } Function: func1 Args: @@ -18,7 +18,7 @@ Args: - type: remark --- !Passed Pass: generic-remarks-pass -Name: Remark +Name: Remark DebugLoc: { File: path/to/anno.c, Line: 1, Column: 2 } Function: func2 Args: @@ -27,7 +27,7 @@ Args: - type: remark --- !Analysis Pass: generic-remarks-pass2 -Name: Remark +Name: Remark DebugLoc: { File: path/to/anno3.c, Line: 1, Column: 2 } Function: func1 Args: @@ -36,7 +36,7 @@ Args: - type: remark --- !Analysis Pass: generic-remarks-pass3 -Name: Remark +Name: Remark DebugLoc: { File: path/to/anno.c, Line: 1, Column: 2 } Function: func2 Args: @@ -45,7 +45,7 @@ Args: - type: remark --- !Analysis Pass: generic-remarks-pass4 -Name: Remark +Name: Remark DebugLoc: { File: path/to/anno2.c, Line: 1, Column: 2 } Function: func3 Args: diff --git a/llvm/test/tools/llvm-remarkutil/count/count-by-keys.test b/llvm/test/tools/llvm-remarkutil/count/count-by-keys.test index dc414620c3aa5..c0dfbec501ccd 100644 --- a/llvm/test/tools/llvm-remarkutil/count/count-by-keys.test +++ b/llvm/test/tools/llvm-remarkutil/count/count-by-keys.test @@ -1,7 +1,7 @@ RUN: llvm-remarkutil count --parser=yaml --count-by=arg --group-by=source %p/Inputs/remark-count-by.yaml | FileCheck %s RUN: llvm-remarkutil count --parser=yaml --count-by=arg --group-by=function %p/Inputs/remark-count-by.yaml | FileCheck %s --check-prefix=CHECKFUNC RUN: llvm-remarkutil count --parser=yaml --count-by=arg --group-by=function-with-loc %p/Inputs/remark-count-by.yaml | FileCheck %s --check-prefix=CHECKFUNCLOC -RUN: llvm-remarkutil count --parser=yaml --count-by=arg --group-by=total %p/Inputs/remark-count-by.yaml | FileCheck %s --check-prefix=CHECKTOTAL +RUN: llvm-remarkutil count --parser=yaml --count-by=arg --group-by=total %p/Inputs/remark-count-by.yaml | FileCheck %s --check-prefix=CHECKTOTAL ; CHECK-LABEL: Source,count1,count2,count3,count4 ; CHECK: path/to/anno.c,3,4,6,4 diff --git a/llvm/test/tools/llvm-remarkutil/count/count-by-remark.test b/llvm/test/tools/llvm-remarkutil/count/count-by-remark.test index b0248b9b6ec71..607ef78669473 100644 --- a/llvm/test/tools/llvm-remarkutil/count/count-by-remark.test +++ b/llvm/test/tools/llvm-remarkutil/count/count-by-remark.test @@ -1,7 +1,7 @@ RUN: llvm-remarkutil count --parser=yaml --count-by=remark-name --group-by=source %p/Inputs/remark-count-by.yaml | FileCheck %s RUN: llvm-remarkutil count --parser=yaml --count-by=remark-name --group-by=function %p/Inputs/remark-count-by.yaml | FileCheck %s --check-prefix=CHECKFUNC RUN: llvm-remarkutil count --parser=yaml --count-by=remark-name --group-by=function-with-loc %p/Inputs/remark-count-by.yaml | FileCheck %s --check-prefix=CHECKFUNCLOC -RUN: llvm-remarkutil count --parser=yaml --count-by=remark-name --group-by=total %p/Inputs/remark-count-by.yaml | FileCheck %s --check-prefix=CHECKTOTAL +RUN: llvm-remarkutil count --parser=yaml --count-by=remark-name --group-by=total %p/Inputs/remark-count-by.yaml | FileCheck %s --check-prefix=CHECKTOTAL ; CHECK-LABEL: Source,Count ; CHECK: path/to/anno.c,3 diff --git a/llvm/test/tools/llvm-remarkutil/count/filter-by-pass-name.test b/llvm/test/tools/llvm-remarkutil/count/filter-by-pass-name.test index 481d6fd2f5820..caf630484caf2 100644 --- a/llvm/test/tools/llvm-remarkutil/count/filter-by-pass-name.test +++ b/llvm/test/tools/llvm-remarkutil/count/filter-by-pass-name.test @@ -1,5 +1,5 @@ -RUN: llvm-remarkutil count --parser=yaml --pass-name=generic-remarks-pass %p/Inputs/remark-filter-by.yaml | FileCheck %s -RUN: llvm-remarkutil count --parser=yaml --rpass-name=.* %p/Inputs/remark-filter-by.yaml | FileCheck %s --check-prefix=CHECKALL +RUN: llvm-remarkutil count --parser=yaml --pass-name=generic-remarks-pass %p/Inputs/remark-filter-by.yaml | FileCheck %s +RUN: llvm-remarkutil count --parser=yaml --rpass-name=.* %p/Inputs/remark-filter-by.yaml | FileCheck %s --check-prefix=CHECKALL ; CHECK-LABEL: Source,Count ; CHECK: path/to/anno.c,2 diff --git a/llvm/test/tools/llvm-remarkutil/count/filter-by-remark-name.test b/llvm/test/tools/llvm-remarkutil/count/filter-by-remark-name.test index 20684d57f648c..24d0399e36aef 100644 --- a/llvm/test/tools/llvm-remarkutil/count/filter-by-remark-name.test +++ b/llvm/test/tools/llvm-remarkutil/count/filter-by-remark-name.test @@ -1,5 +1,5 @@ -RUN: llvm-remarkutil count --parser=yaml --remark-name=Remark %p/Inputs/remark-filter-by.yaml | FileCheck %s -RUN: llvm-remarkutil count --parser=yaml --rremark-name=R.* %p/Inputs/remark-filter-by.yaml | FileCheck %s --check-prefix=CHECKALL +RUN: llvm-remarkutil count --parser=yaml --remark-name=Remark %p/Inputs/remark-filter-by.yaml | FileCheck %s +RUN: llvm-remarkutil count --parser=yaml --rremark-name=R.* %p/Inputs/remark-filter-by.yaml | FileCheck %s --check-prefix=CHECKALL ; CHECK-LABEL: Source,Count ; CHECK: path/to/anno.c,1 diff --git a/llvm/test/tools/llvm-remarkutil/count/filter-by-type.test b/llvm/test/tools/llvm-remarkutil/count/filter-by-type.test index c392fe43aa199..db4f4610ae559 100644 --- a/llvm/test/tools/llvm-remarkutil/count/filter-by-type.test +++ b/llvm/test/tools/llvm-remarkutil/count/filter-by-type.test @@ -13,4 +13,4 @@ RUN: llvm-remarkutil count --parser=yaml --remark-type=unknown %p/Inputs/remark- ; ANALYSIS: path/to/anno2.c,2 ; UNKNOWN: Source,Count -; UNKNOWN-EMPTY: +; UNKNOWN-EMPTY: diff --git a/llvm/test/tools/llvm-remarkutil/count/group-by-function.test b/llvm/test/tools/llvm-remarkutil/count/group-by-function.test index f3d04bb00c269..67643089d4103 100644 --- a/llvm/test/tools/llvm-remarkutil/count/group-by-function.test +++ b/llvm/test/tools/llvm-remarkutil/count/group-by-function.test @@ -4,4 +4,4 @@ RUN: llvm-remarkutil count --parser=yaml --group-by=function %p/Inputs/remark-gr ; CHECK-LABEL: Function,Count ; CHECK: func1,3 ; CHECK: func2,2 -; CHECK: func3,1 +; CHECK: func3,1 diff --git a/llvm/tools/llvm-remarkutil/RemarkCounter.cpp b/llvm/tools/llvm-remarkutil/RemarkCounter.cpp index 8bde0b8830182..dc0685f342886 100644 --- a/llvm/tools/llvm-remarkutil/RemarkCounter.cpp +++ b/llvm/tools/llvm-remarkutil/RemarkCounter.cpp @@ -166,8 +166,7 @@ Error ArgumentCounter::getAllMatchingArgumentsInRemark( } std::optional Counter::getGroupByKey(const Remark &Remark) { - - switch (_GroupBy) { + switch (Group) { case GroupBy::PER_FUNCTION: return Remark.FunctionName.str(); case GroupBy::TOTAL: @@ -177,7 +176,7 @@ std::optional Counter::getGroupByKey(const Remark &Remark) { if (!Remark.Loc.has_value()) return std::nullopt; - if (_GroupBy == GroupBy::PER_FUNCTION_WITH_DEBUG_LOC) + if (Group == GroupBy::PER_FUNCTION_WITH_DEBUG_LOC) return Remark.Loc->SourceFilePath.str() + ":" + Remark.FunctionName.str(); return Remark.Loc->SourceFilePath.str(); } @@ -214,7 +213,7 @@ Error ArgumentCounter::print(StringRef OutputFileName) { return MaybeOF.takeError(); auto OF = std::move(*MaybeOF); - OF->os() << groupByToStr(_GroupBy) << ","; + OF->os() << groupByToStr(Group) << ","; unsigned Idx = 0; for (auto [Key, _] : ArgumentSetIdxMap) { OF->os() << Key; @@ -244,7 +243,7 @@ Error RemarkCounter::print(StringRef OutputFileName) { return MaybeOF.takeError(); auto OF = std::move(*MaybeOF); - OF->os() << groupByToStr(_GroupBy) << "," + OF->os() << groupByToStr(Group) << "," << "Count\n"; for (auto [Key, Count] : CountedByRemarksMap) OF->os() << Key << "," << Count << "\n"; diff --git a/llvm/tools/llvm-remarkutil/RemarkCounter.h b/llvm/tools/llvm-remarkutil/RemarkCounter.h index 54bba8d7cc995..3dd06622bc03f 100644 --- a/llvm/tools/llvm-remarkutil/RemarkCounter.h +++ b/llvm/tools/llvm-remarkutil/RemarkCounter.h @@ -88,7 +88,7 @@ struct Filters { Filter.RemarkTypeFilter = std::move(RemarkTypeFilter); if (auto E = Filter.regexArgumentsValid()) return std::move(E); - return Filter; + return std::move(Filter); } /// Returns true if \p Remark satisfies all the provided filters. bool filterRemark(const Remark &Remark); @@ -110,15 +110,15 @@ inline Error checkRegex(const Regex &Regex) { /// Abstract counter class used to define the general required methods for /// counting a remark. struct Counter { - GroupBy _GroupBy; - Counter(){}; - Counter(enum GroupBy GroupBy) : _GroupBy(GroupBy) {} + GroupBy Group = GroupBy::TOTAL; + Counter() = default; + Counter(enum GroupBy GroupBy) : Group(GroupBy) {} /// Obtain the field for collecting remark info based on how we are /// collecting. Remarks are grouped by FunctionName, Source, Source and /// Function or collect by file. std::optional getGroupByKey(const Remark &Remark); - /// Collect count information from \p Remark organized based on \p GroupBy + /// Collect count information from \p Remark organized based on \p Group /// property. virtual void collect(const Remark &) = 0; /// Output the final count to the file \p OutputFileName @@ -158,10 +158,10 @@ struct ArgumentCounter : Counter { /// vector then we need to check that the provided regular expressions are /// valid if not we return an Error. static Expected - createArgumentCounter(enum GroupBy GroupBy, ArrayRef Arguments, + createArgumentCounter(GroupBy Group, ArrayRef Arguments, StringRef Buffer, Filters &Filter) { ArgumentCounter AC; - AC._GroupBy = GroupBy; + AC.Group = Group; for (auto &Arg : Arguments) { if (Arg.IsRegex) { if (auto E = checkRegex(Arg.FilterRE)) @@ -178,7 +178,7 @@ struct ArgumentCounter : Counter { void collect(const Remark &) override; /// Print a CSV table consisting of an index which is specified by \p - /// `GroupBy` and can be a function name, source file name or function name + /// `Group` and can be a function name, source file name or function name /// with the full source path and columns of user specified remark arguments /// to collect the count for. Error print(StringRef OutputFileName) override; @@ -194,19 +194,19 @@ struct ArgumentCounter : Counter { }; /// Collect remarks based by counting the existance of individual remarks. The -/// reported table will be structured based on the provided \p GroupBy argument +/// reported table will be structured based on the provided \p Group argument /// by reporting count for functions, source or total count for the provided /// remark file. struct RemarkCounter : Counter { std::map CountedByRemarksMap; - RemarkCounter(enum GroupBy GroupBy) : Counter(GroupBy) {} + RemarkCounter(GroupBy Group) : Counter(Group) {} - /// Advance the internal map count broken by \p GroupBy when + /// Advance the internal map count broken by \p Group when /// seeing \p Remark. void collect(const Remark &) override; /// Print a CSV table consisting of an index which is specified by \p - /// `GroupBy` and can be a function name, source file name or function name + /// `Group` and can be a function name, source file name or function name /// with the full source path and a counts column corresponding to the count /// of each individual remark at th index. Error print(StringRef OutputFileName) override; From bb6a98c8d2beee78ab9dc1a4c81009410e4911a3 Mon Sep 17 00:00:00 2001 From: AMS21 Date: Sat, 14 Oct 2023 22:51:50 +0200 Subject: [PATCH 145/720] [clang-tidy] Ignore unused parameters in `rvalue-reference-param-not-moved check` (#69045) With this patch we no longer issue a warning for unused parameters which are marked as such. This fixes #68209 --- .../RvalueReferenceParamNotMovedCheck.cpp | 3 +++ clang-tools-extra/docs/ReleaseNotes.rst | 4 +++ .../rvalue-reference-param-not-moved.rst | 10 ++++++++ .../rvalue-reference-param-not-moved.cpp | 25 +++++++++++++++++++ 4 files changed, 42 insertions(+) diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/RvalueReferenceParamNotMovedCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/RvalueReferenceParamNotMovedCheck.cpp index efcaffb45d9ad..88b00dc17470f 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/RvalueReferenceParamNotMovedCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/RvalueReferenceParamNotMovedCheck.cpp @@ -84,6 +84,9 @@ void RvalueReferenceParamNotMovedCheck::check( if (IgnoreUnnamedParams && Param->getName().empty()) return; + if (!Param->isUsed() && Param->hasAttr()) + return; + const auto *Function = dyn_cast(Param->getDeclContext()); if (!Function) return; diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 03e5dc6f164af..c732d4904df13 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -237,6 +237,10 @@ Changes in existing checks ` check to ignore false-positives in unevaluated context (e.g., ``decltype``, ``sizeof``, ...). +- Improved :doc:`cppcoreguidelines-rvalue-reference-param-not-moved + ` check + to ignore unused parameters when they are marked as unused. + - Improved :doc:`llvm-namespace-comment ` check to provide fixes for ``inline`` namespaces in the same format as :program:`clang-format`. diff --git a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines/rvalue-reference-param-not-moved.rst b/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines/rvalue-reference-param-not-moved.rst index 9ab4ae7871e46..ffa3a9d61e48e 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines/rvalue-reference-param-not-moved.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines/rvalue-reference-param-not-moved.rst @@ -18,6 +18,16 @@ Example: std::string Copy(Input); // Oops - forgot to std::move } +Note that parameters that are unused and marked as such will not be diagnosed. + +Example: + +.. code-block:: c++ + + void conditional_use([[maybe_unused]] std::string&& Input) { + // No diagnostic here since Input is unused and marked as such + } + Options ------- diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/rvalue-reference-param-not-moved.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/rvalue-reference-param-not-moved.cpp index 8f8e272e1e8a9..a9b87567a08cc 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/rvalue-reference-param-not-moved.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/rvalue-reference-param-not-moved.cpp @@ -333,3 +333,28 @@ void instantiate_a_class_template() { AClassTemplate withObjRef(o); withObjRef.never_moves(o); } + +namespace gh68209 +{ + void f1([[maybe_unused]] int&& x) {} + + void f2(__attribute__((unused)) int&& x) {} + + void f3(int&& x) {} + // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: rvalue reference parameter 'x' is never moved from inside the function body [cppcoreguidelines-rvalue-reference-param-not-moved] + + template + void f4([[maybe_unused]] T&& x) {} + + template + void f5(__attribute((unused)) T&& x) {} + + template + void f6(T&& x) {} + + void f7([[maybe_unused]] int&& x) { x += 1; } + // CHECK-MESSAGES: :[[@LINE-1]]:34: warning: rvalue reference parameter 'x' is never moved from inside the function body [cppcoreguidelines-rvalue-reference-param-not-moved] + + void f8(__attribute__((unused)) int&& x) { x += 1; } + // CHECK-MESSAGES: :[[@LINE-1]]:41: warning: rvalue reference parameter 'x' is never moved from inside the function body [cppcoreguidelines-rvalue-reference-param-not-moved] +} // namespace gh68209 From 4fb49f44fdf558a942de2b0fc81e7f1fdf1c798c Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 14 Oct 2023 14:30:44 -0700 Subject: [PATCH 146/720] [ELF][test] Test relocations referencing symbols relative to sections discarded by /DISCARD/ --- lld/test/ELF/linkerscript/discard-section.s | 23 ++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/lld/test/ELF/linkerscript/discard-section.s b/lld/test/ELF/linkerscript/discard-section.s index fbdff5dfbe849..df73f715494f4 100644 --- a/lld/test/ELF/linkerscript/discard-section.s +++ b/lld/test/ELF/linkerscript/discard-section.s @@ -1,14 +1,23 @@ # REQUIRES: x86 -# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t -# RUN: echo "SECTIONS { /DISCARD/ : { *(.aaa*) } }" > %t.script -# RUN: ld.lld -o %t1 --script %t.script %t -# RUN: llvm-objdump --section-headers %t1 | FileCheck %s +## Test relocations referencing symbols defined relative to sections discarded by /DISCARD/. -# CHECK-NOT: .aaa +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: echo "SECTIONS { /DISCARD/ : { *(.aaa*) } }" > %t.lds +# RUN: ld.lld -T %t.lds %t.o -z undefs -o /dev/null 2>&1 | count 0 +# RUN: ld.lld -T %t.lds %t.o -o /dev/null 2>&1 | count 0 +# RUN: ld.lld -r -T %t.lds %t.o -o /dev/null 2>&1 | count 0 + +.globl _start +_start: .section .aaa,"a" -aab: +.globl global +.weak weak +global: +weak: .quad 0 .section .zzz,"a" - .quad aab + .quad .aaa + .quad global + .quad weak From 557299c9b6464f27968904aad5429cfb1512434e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 14 Oct 2023 14:59:10 -0700 Subject: [PATCH 147/720] [ELF][test] Test relocations referencing weak symbol, which is defined relative to a section discarded by /DISCARD/ --- lld/test/ELF/linkerscript/discard-section.s | 29 +++++++++++++++------ 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/lld/test/ELF/linkerscript/discard-section.s b/lld/test/ELF/linkerscript/discard-section.s index df73f715494f4..9e021ac83f563 100644 --- a/lld/test/ELF/linkerscript/discard-section.s +++ b/lld/test/ELF/linkerscript/discard-section.s @@ -1,23 +1,36 @@ # REQUIRES: x86 ## Test relocations referencing symbols defined relative to sections discarded by /DISCARD/. -# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o -# RUN: echo "SECTIONS { /DISCARD/ : { *(.aaa*) } }" > %t.lds -# RUN: ld.lld -T %t.lds %t.o -z undefs -o /dev/null 2>&1 | count 0 -# RUN: ld.lld -T %t.lds %t.o -o /dev/null 2>&1 | count 0 -# RUN: ld.lld -r -T %t.lds %t.o -o /dev/null 2>&1 | count 0 +# RUN: rm -rf %t && split-file %s %t && cd %t +# RUN: llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o +# RUN: llvm-mc -filetype=obj -triple=x86_64 b.s -o b.o +# RUN: ld.lld -T a.lds a.o b.o -z undefs -o /dev/null 2>&1 | count 0 +# RUN: ld.lld -T a.lds a.o b.o -o /dev/null 2>&1 | count 0 +# RUN: ld.lld -r -T a.lds a.o b.o -o /dev/null 2>&1 | count 0 +#--- a.s .globl _start _start: .section .aaa,"a" -.globl global -.weak weak +.globl global, weakref1 +.weak weak, weakref2 global: weak: +weakref1: +weakref2: .quad 0 -.section .zzz,"a" +.section .bbb,"aw" .quad .aaa + +#--- b.s +.weak weakref1, weakref2 +.section .data,"aw" .quad global .quad weak + .quad weakref1 + .quad weakref2 + +#--- a.lds +SECTIONS { /DISCARD/ : { *(.aaa) } } From 2dc6579f6fb12470559a68886c2a4aecaa8495dd Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 8 Jul 2023 14:05:11 -0400 Subject: [PATCH 148/720] clang: Switch SYCL test to generated checks --- clang/test/CodeGenSYCL/function-attrs.cpp | 40 ++++++++++++++++++++--- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/clang/test/CodeGenSYCL/function-attrs.cpp b/clang/test/CodeGenSYCL/function-attrs.cpp index 8f5c0ea5c512c..1606f961f2d39 100644 --- a/clang/test/CodeGenSYCL/function-attrs.cpp +++ b/clang/test/CodeGenSYCL/function-attrs.cpp @@ -1,16 +1,29 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --version 3 // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -disable-llvm-passes \ -// RUN: -triple spir64 -fexceptions -emit-llvm %s -o - | FileCheck %s +// RUN: -triple spir64 -fexceptions -emit-llvm -fno-ident %s -o - | FileCheck %s int foo(); -// CHECK: define dso_local spir_func void @_Z3barv() [[BAR:#[0-9]+]] -// CHECK: attributes [[BAR]] = -// CHECK-SAME: convergent -// CHECK-SAME: nounwind +// CHECK-LABEL: define dso_local spir_func void @_Z3barv( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) +// CHECK-NEXT: [[CALL:%.*]] = call spir_func noundef i32 @_Z3foov() #[[ATTR1:[0-9]+]] +// CHECK-NEXT: store i32 [[CALL]], ptr addrspace(4) [[A_ASCAST]], align 4 +// CHECK-NEXT: ret void +// void bar() { int a = foo(); } +// CHECK-LABEL: define dso_local spir_func noundef i32 @_Z3foov( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4) +// CHECK-NEXT: ret i32 1 +// int foo() { return 1; } @@ -20,7 +33,24 @@ __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { kernelFunc(); } +// CHECK-LABEL: define dso_local noundef i32 @main( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1 +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4) +// CHECK-NEXT: [[REF_TMP_ASCAST:%.*]] = addrspacecast ptr [[REF_TMP]] to ptr addrspace(4) +// CHECK-NEXT: store i32 0, ptr addrspace(4) [[RETVAL_ASCAST]], align 4 +// CHECK-NEXT: call spir_func void @_Z18kernel_single_taskIZ4mainE11fake_kernelZ4mainEUlvE_EvRKT0_(ptr addrspace(4) noundef align 1 dereferenceable(1) [[REF_TMP_ASCAST]]) #[[ATTR1]] +// CHECK-NEXT: ret i32 0 +// int main() { kernel_single_task([] { bar(); }); return 0; } +//. +// CHECK: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// CHECK: attributes #1 = { convergent nounwind } +//. +// CHECK: !0 = !{i32 1, !"wchar_size", i32 4} +//. From ab6d5fa3d0643e68d6ec40d9190f20fb14190ed1 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 14 Oct 2023 21:00:59 -0700 Subject: [PATCH 149/720] [Sparc] Use isNullConstant (NFC) --- llvm/lib/Target/Sparc/SparcISelLowering.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index b6afb8d5a6de9..4f08014792110 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -2604,9 +2604,8 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG, if (LHS.getValueType().isInteger()) { // On V9 processors running in 64-bit mode, if CC compares two `i64`s // and the RHS is zero we might be able to use a specialized branch. - const ConstantSDNode *RHSC = dyn_cast(RHS); - if (is64Bit && isV9 && LHS.getValueType() == MVT::i64 && RHSC && - RHSC->isZero() && !ISD::isUnsignedIntSetCC(CC)) + if (is64Bit && isV9 && LHS.getValueType() == MVT::i64 && + isNullConstant(RHS) && !ISD::isUnsignedIntSetCC(CC)) return DAG.getNode(SPISD::BR_REG, dl, MVT::Other, Chain, Dest, DAG.getConstant(intCondCCodeToRcond(CC), dl, MVT::i32), LHS); From 0d661e965ad1a54e46317f38677bd88875bfcf1d Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 14 Oct 2023 22:11:09 -0700 Subject: [PATCH 150/720] [clangd] Use DenseMap::contains (NFC) --- clang-tools-extra/clangd/index/SymbolCollector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp index 74aca9b99c8a5..aac6676a995fe 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -893,7 +893,7 @@ void SymbolCollector::finish() { const Symbol *S = Symbols.find(SID); if (!S) continue; - assert(IncludeFiles.find(SID) != IncludeFiles.end()); + assert(IncludeFiles.contains(SID)); const auto FID = IncludeFiles.at(SID); // Determine if the FID is #include'd or #import'ed. From 169f60f7c76cb6f7d234ab5dfb2b5e367a35ccbb Mon Sep 17 00:00:00 2001 From: Ben Shi <2283975856@qq.com> Date: Sun, 15 Oct 2023 13:47:56 +0800 Subject: [PATCH 151/720] [clang][driver] Add avr-libc's default linker script to lld (#68507) If `-fuse-ld=lld` is specified but no user linker script is offered, we try to use avr-libc's default one for lld. (not needed for GNU ld) --- clang/lib/Driver/ToolChains/AVR.cpp | 14 ++++++++++-- .../usr/lib/avr/lib/ldscripts/avrtiny.x | 0 .../usr/lib/avr/lib/ldscripts/avrxmega6.x | 0 clang/test/Driver/avr-ld.c | 22 +++++++++++++++++++ 4 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 clang/test/Driver/Inputs/basic_avr_tree/usr/lib/avr/lib/ldscripts/avrtiny.x create mode 100644 clang/test/Driver/Inputs/basic_avr_tree/usr/lib/avr/lib/ldscripts/avrxmega6.x diff --git a/clang/lib/Driver/ToolChains/AVR.cpp b/clang/lib/Driver/ToolChains/AVR.cpp index e312fa155e11b..2e46b25aeba75 100644 --- a/clang/lib/Driver/ToolChains/AVR.cpp +++ b/clang/lib/Driver/ToolChains/AVR.cpp @@ -554,8 +554,18 @@ void AVR::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("--end-group"); - // Add user specified linker script. - Args.AddAllArgs(CmdArgs, options::OPT_T); + // Add avr-libc's linker script to lld by default, if it exists. + if (!Args.hasArg(options::OPT_T) && + Linker.find("lld") != std::string::npos) { + std::string Path(*AVRLibcRoot + "/lib/ldscripts/"); + Path += *FamilyName; + Path += ".x"; + if (llvm::sys::fs::exists(Path)) + CmdArgs.push_back(Args.MakeArgString("-T" + Path)); + } + // Otherwise add user specified linker script to either avr-ld or lld. + else + Args.AddAllArgs(CmdArgs, options::OPT_T); if (Args.hasFlag(options::OPT_mrelax, options::OPT_mno_relax, true)) CmdArgs.push_back("--relax"); diff --git a/clang/test/Driver/Inputs/basic_avr_tree/usr/lib/avr/lib/ldscripts/avrtiny.x b/clang/test/Driver/Inputs/basic_avr_tree/usr/lib/avr/lib/ldscripts/avrtiny.x new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/basic_avr_tree/usr/lib/avr/lib/ldscripts/avrxmega6.x b/clang/test/Driver/Inputs/basic_avr_tree/usr/lib/avr/lib/ldscripts/avrxmega6.x new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/avr-ld.c b/clang/test/Driver/avr-ld.c index 0f12607fe9d69..3e4114485332f 100644 --- a/clang/test/Driver/avr-ld.c +++ b/clang/test/Driver/avr-ld.c @@ -58,6 +58,28 @@ // LINKS: {{".*ld.*"}} {{.*}} "--defsym=__DATA_REGION_ORIGIN__=0x800100" "-plugin-opt=mcpu=atmega328" // LINKS-NOT: "-plugin-opt=thinlto" +// RUN: %clang -### --target=avr -mmcu=attiny40 -fuse-ld=lld --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKT0 %s +// LINKT0: {{".*lld.*"}} {{.*}} {{"-T.*avrtiny.x"}} +// LINKT0-NOT: "-m + +// RUN: %clang -### --target=avr -mmcu=atxmega384c3 -fuse-ld=lld --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKT1 %s +// LINKT1: {{".*lld.*"}} {{.*}} {{"-T.*avrxmega6.x"}} +// LINKT1-NOT: "-m + +// RUN: %clang -### --target=avr -mmcu=atmega328 -fuse-ld=lld --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKT2 %s +// LINKT2: {{".*lld.*"}} {{.*}} "--start-group" {{.*}} "--end-group" +// LINKT2-NOT: "-T +// LINKT2-NOT: "-m + +// RUN: %clang -### --target=avr -mmcu=attiny40 --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKT3 %s +// LINKT3: {{".*ld.*"}} {{.*}} "-mavrtiny" +// LINKT3-NOT: "-T + +// RUN: %clang -### --target=avr -mmcu=attiny40 --sysroot %S/Inputs/basic_avr_tree -fuse-ld=lld -T %S/Inputs/basic_avr_tree/usr/lib/avr/lib/ldscripts/avrxmega6.x %s 2>&1 | FileCheck -check-prefix LINKT4 %s +// LINKT4: {{".*lld.*"}} {{.*}} {{"-T.*avrxmega6.x"}} +// LINKT4-NOT: {{"-T.*avrtiny.x"}} +// LINKT4-NOT: "-m + // RUN: %clang -### -r --target=avr -mmcu=atmega328 --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck --check-prefix=LINKU %s // LINKU: {{".*ld.*"}} {{.*}} "-r" {{.*}} "-mavr5" // LINKU-NOT: "--gc-sections" From 3c4ecc4628601d07201780ea9ed23770a5a2d86c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Sun, 15 Oct 2023 08:51:33 +0200 Subject: [PATCH 152/720] [clang][Interp][NFC] Refactor VisitImplicitValueInitExpr The FIXME comment here is not really correct. Also, handle the case of non-primitive array element types differently, to reduce indentation. --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index bda9cf1500804..e9e20b222d5d3 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -504,19 +504,13 @@ bool ByteCodeExprGen::VisitImplicitValueInitExpr(const ImplicitValueIni assert(AT); const auto *CAT = cast(AT); size_t NumElems = CAT->getSize().getZExtValue(); + PrimType ElemT = classifyPrim(CAT->getElementType()); - if (std::optional ElemT = classify(CAT->getElementType())) { - // TODO(perf): For int and bool types, we can probably just skip this - // since we memset our Block*s to 0 and so we have the desired value - // without this. - for (size_t I = 0; I != NumElems; ++I) { - if (!this->visitZeroInitializer(*ElemT, CAT->getElementType(), E)) - return false; - if (!this->emitInitElem(*ElemT, I, E)) - return false; - } - } else { - assert(false && "default initializer for non-primitive type"); + for (size_t I = 0; I != NumElems; ++I) { + if (!this->visitZeroInitializer(ElemT, CAT->getElementType(), E)) + return false; + if (!this->emitInitElem(ElemT, I, E)) + return false; } return true; From 0187960cdd0cc640317b29a2f25a0c30df3f68ef Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 15 Oct 2023 00:14:14 -0700 Subject: [PATCH 153/720] [Scalar] Use LLVMContext::MD_mem_parallel_loop_access (NFC) --- llvm/lib/Transforms/Scalar/Scalarizer.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index 14df394e2b415..111c477337535 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -447,11 +447,9 @@ bool ScalarizerLegacyPass::runOnFunction(Function &F) { if (skipFunction(F)) return false; - Module &M = *F.getParent(); - unsigned ParallelLoopAccessMDKind = - M.getContext().getMDKindID("llvm.mem.parallel_loop_access"); DominatorTree *DT = &getAnalysis().getDomTree(); - ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT, ScalarizerPassOptions()); + ScalarizerVisitor Impl(LLVMContext::MD_mem_parallel_loop_access, DT, + ScalarizerPassOptions()); return Impl.visit(F); } @@ -1254,11 +1252,8 @@ bool ScalarizerVisitor::finish() { } PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM) { - Module &M = *F.getParent(); - unsigned ParallelLoopAccessMDKind = - M.getContext().getMDKindID("llvm.mem.parallel_loop_access"); DominatorTree *DT = &AM.getResult(F); - ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT, Options); + ScalarizerVisitor Impl(LLVMContext::MD_mem_parallel_loop_access, DT, Options); bool Changed = Impl.visit(F); PreservedAnalyses PA; PA.preserve(); From 88dd9813696e3ac1da705ffa36bd94b0eccbd78c Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 15 Oct 2023 00:20:56 -0700 Subject: [PATCH 154/720] [include-cleaner] Remove unused using decls (NFC) Identified with misc-unused-using-decls. --- .../include-cleaner/unittests/LocateSymbolTest.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang-tools-extra/include-cleaner/unittests/LocateSymbolTest.cpp b/clang-tools-extra/include-cleaner/unittests/LocateSymbolTest.cpp index d69e25bf8116d..756757cfd0f09 100644 --- a/clang-tools-extra/include-cleaner/unittests/LocateSymbolTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/LocateSymbolTest.cpp @@ -30,8 +30,6 @@ using testing::ElementsAre; using testing::ElementsAreArray; using testing::Eq; using testing::Field; -using testing::Pair; -using testing::UnorderedElementsAre; // A helper for building ASTs and getting decls out of it by name. Example usage // looks like: From fad99d398a714f2fed18e2e65aef47a9b273f2f7 Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Sun, 15 Oct 2023 15:28:30 +0800 Subject: [PATCH 155/720] [clang][Driver] Fix a spot in commit 169f60f7c76cb6f7d234ab5dfb2b5e367a35ccbb My previous commit leads to a failure in 'Builders/ppc64le-lld-multistage-test', as shown at "https://lab.llvm.org/buildbot/#/builders/36/builds/38790". --- clang/lib/Driver/ToolChains/AVR.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/AVR.cpp b/clang/lib/Driver/ToolChains/AVR.cpp index 2e46b25aeba75..27505b9462c44 100644 --- a/clang/lib/Driver/ToolChains/AVR.cpp +++ b/clang/lib/Driver/ToolChains/AVR.cpp @@ -556,7 +556,7 @@ void AVR::Linker::ConstructJob(Compilation &C, const JobAction &JA, // Add avr-libc's linker script to lld by default, if it exists. if (!Args.hasArg(options::OPT_T) && - Linker.find("lld") != std::string::npos) { + Linker.find("avr-ld") == std::string::npos) { std::string Path(*AVRLibcRoot + "/lib/ldscripts/"); Path += *FamilyName; Path += ".x"; From e1bb0598b2c0ecb098c7032716e3ae10f10a4da7 Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Sun, 15 Oct 2023 17:32:27 +0900 Subject: [PATCH 156/720] [MachineBasicBlock] Fix use after free in SplitCriticalEdge (#68786) Remove use after free when attempting to update SlotIndexes in MachineBasicBlock::SplitCriticalEdge. Use MachineFunction delegate mechanism to capture target specific manipulations of branch instructions and update SlotIndexes. --- llvm/lib/CodeGen/MachineBasicBlock.cpp | 58 ++++++++++++-------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 7d3d8b6fba1b7..14d9bb292ddf2 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -1097,6 +1097,30 @@ static bool jumpTableHasOtherUses(const MachineFunction &MF, return false; } +class SlotIndexUpdateDelegate : public MachineFunction::Delegate { +private: + MachineFunction &MF; + SlotIndexes *Indexes; + +public: + SlotIndexUpdateDelegate(MachineFunction &MF, SlotIndexes *Indexes) + : MF(MF), Indexes(Indexes) { + MF.setDelegate(this); + } + + ~SlotIndexUpdateDelegate() { MF.resetDelegate(this); } + + void MF_HandleInsertion(MachineInstr &MI) override { + if (Indexes) + Indexes->insertMachineInstrInMaps(MI); + } + + void MF_HandleRemoval(MachineInstr &MI) override { + if (Indexes) + Indexes->removeMachineInstrFromMaps(MI); + } +}; + MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( MachineBasicBlock *Succ, Pass &P, std::vector> *LiveInSets) { @@ -1170,51 +1194,23 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( ReplaceUsesOfBlockWith(Succ, NMBB); - // If updateTerminator() removes instructions, we need to remove them from - // SlotIndexes. - SmallVector Terminators; - if (Indexes) { - for (MachineInstr &MI : - llvm::make_range(getFirstInstrTerminator(), instr_end())) - Terminators.push_back(&MI); - } - // Since we replaced all uses of Succ with NMBB, that should also be treated // as the fallthrough successor if (Succ == PrevFallthrough) PrevFallthrough = NMBB; - if (!ChangedIndirectJump) + if (!ChangedIndirectJump) { + SlotIndexUpdateDelegate SlotUpdater(*MF, Indexes); updateTerminator(PrevFallthrough); - - if (Indexes) { - SmallVector NewTerminators; - for (MachineInstr &MI : - llvm::make_range(getFirstInstrTerminator(), instr_end())) - NewTerminators.push_back(&MI); - - for (MachineInstr *Terminator : Terminators) { - if (!is_contained(NewTerminators, Terminator)) - Indexes->removeMachineInstrFromMaps(*Terminator); - } } // Insert unconditional "jump Succ" instruction in NMBB if necessary. NMBB->addSuccessor(Succ); if (!NMBB->isLayoutSuccessor(Succ)) { + SlotIndexUpdateDelegate SlotUpdater(*MF, Indexes); SmallVector Cond; const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); TII->insertBranch(*NMBB, Succ, nullptr, Cond, DL); - - if (Indexes) { - for (MachineInstr &MI : NMBB->instrs()) { - // Some instructions may have been moved to NMBB by updateTerminator(), - // so we first remove any instruction that already has an index. - if (Indexes->hasIndex(MI)) - Indexes->removeMachineInstrFromMaps(MI); - Indexes->insertMachineInstrInMaps(MI); - } - } } // Fix PHI nodes in Succ so they refer to NMBB instead of this. From 9451004987e84c2bc2f109dd56ceab3844505a7f Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 15 Oct 2023 17:00:50 +0800 Subject: [PATCH 157/720] [InstCombine][TLI] Fix function prototype of `labs` (#69077) `i64 @labs(i32)` is incorrectly recognized as `LibFunc_labs` because type ID `Long` matches both `i32` and `i64`. This PR requires the type of argument to match the return value. Fixes #69059. --- llvm/include/llvm/Analysis/TargetLibraryInfo.def | 2 +- llvm/test/Transforms/InstCombine/pr69059.ll | 16 ++++++++++++++++ .../unittests/Analysis/TargetLibraryInfoTest.cpp | 10 ++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/InstCombine/pr69059.ll diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def index 03ac422d3e6b7..6bd922eed89e1 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -1570,7 +1570,7 @@ TLI_DEFINE_SIG_INTERNAL(Int, Int) /// long int labs(long int j); TLI_DEFINE_ENUM_INTERNAL(labs) TLI_DEFINE_STRING_INTERNAL("labs") -TLI_DEFINE_SIG_INTERNAL(Long, Long) +TLI_DEFINE_SIG_INTERNAL(Long, Same) /// int lchown(const char *path, uid_t owner, gid_t group); TLI_DEFINE_ENUM_INTERNAL(lchown) diff --git a/llvm/test/Transforms/InstCombine/pr69059.ll b/llvm/test/Transforms/InstCombine/pr69059.ll new file mode 100644 index 0000000000000..75690b8396520 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/pr69059.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +define i64 @pr69059() { +; CHECK-LABEL: define i64 @pr69059() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i64 @labs(i32 0) +; CHECK-NEXT: ret i64 [[CALL]] +; +entry: + %call = call i64 @labs(i32 0) + ret i64 %call +} + +; negative test: not a valid libfunc proto +declare i64 @labs(i32) diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp index 8c2328ee1c9be..292b5cade9509 100644 --- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp +++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp @@ -69,6 +69,16 @@ TEST_F(TargetLibraryInfoTest, InvalidProto) { M->getOrInsertFunction(TLI.getName(LF), InvalidFTy).getCallee()); EXPECT_FALSE(isLibFunc(F, LF)); } + + // i64 @labs(i32) + { + auto *InvalidLabsFTy = FunctionType::get(Type::getInt64Ty(Context), + {Type::getInt32Ty(Context)}, + /*isVarArg=*/false); + auto *F = cast( + M->getOrInsertFunction("labs", InvalidLabsFTy).getCallee()); + EXPECT_FALSE(isLibFunc(F, LibFunc_labs)); + } } // Check that we do accept know-correct prototypes. From eca2fcbdeb328c396d19f7970e94eca40ae79229 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Sun, 15 Oct 2023 12:25:36 +0100 Subject: [PATCH 158/720] [AMDGPU] Fix cost of fast unsafe f32 fdiv (#68988) --- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 9 +++ llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll | 62 +++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 074c8626572b9..cb877a4695f1e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -650,6 +650,15 @@ InstructionCost GCNTTIImpl::getArithmeticInstrCost( return LT.first * Cost * NElts; } + if (SLT == MVT::f32 && ((CxtI && CxtI->hasApproxFunc()) || + TLI->getTargetMachine().Options.UnsafeFPMath)) { + // Fast unsafe fdiv lowering: + // f32 rcp + // f32 fmul + int Cost = getQuarterRateInstrCost(CostKind) + getFullRateInstrCost(); + return LT.first * Cost * NElts; + } + if (SLT == MVT::f32 || SLT == MVT::f16) { // 4 more v_cvt_* insts without f16 insts support int Cost = (SLT == MVT::f16 ? 14 : 10) * getFullRateInstrCost() + diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll index 11ce416b7fd79..2830bfcdaed20 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll @@ -43,6 +43,37 @@ define amdgpu_kernel void @fdiv_f32_ieee() #0 { ret void } +define amdgpu_kernel void @fdiv_f32_afn_ieee() #0 { +; ALL-LABEL: 'fdiv_f32_afn_ieee' +; ALL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32 = fdiv afn float undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f32 = fdiv afn <2 x float> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v3f32 = fdiv afn <3 x float> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4f32 = fdiv afn <4 x float> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v5f32 = fdiv afn <5 x float> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8f32 = fdiv afn <8 x float> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v9f32 = fdiv afn <9 x float> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; ALL-SIZE-LABEL: 'fdiv_f32_afn_ieee' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32 = fdiv afn float undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32 = fdiv afn <2 x float> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v3f32 = fdiv afn <3 x float> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f32 = fdiv afn <4 x float> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v5f32 = fdiv afn <5 x float> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f32 = fdiv afn <8 x float> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v9f32 = fdiv afn <9 x float> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %f32 = fdiv afn float undef, undef + %v2f32 = fdiv afn <2 x float> undef, undef + %v3f32 = fdiv afn <3 x float> undef, undef + %v4f32 = fdiv afn <4 x float> undef, undef + %v5f32 = fdiv afn <5 x float> undef, undef + %v8f32 = fdiv afn <8 x float> undef, undef + %v9f32 = fdiv afn <9 x float> undef, undef + ret void +} + define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 { ; ALL-LABEL: 'fdiv_f32_ftzdaz' ; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f32 = fdiv float undef, undef @@ -74,6 +105,37 @@ define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 { ret void } +define amdgpu_kernel void @fdiv_f32_afn_ftzdaz() #1 { +; ALL-LABEL: 'fdiv_f32_afn_ftzdaz' +; ALL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32 = fdiv afn float undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f32 = fdiv afn <2 x float> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v3f32 = fdiv afn <3 x float> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4f32 = fdiv afn <4 x float> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v5f32 = fdiv afn <5 x float> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8f32 = fdiv afn <8 x float> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v9f32 = fdiv afn <9 x float> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; ALL-SIZE-LABEL: 'fdiv_f32_afn_ftzdaz' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32 = fdiv afn float undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32 = fdiv afn <2 x float> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v3f32 = fdiv afn <3 x float> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f32 = fdiv afn <4 x float> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v5f32 = fdiv afn <5 x float> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f32 = fdiv afn <8 x float> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v9f32 = fdiv afn <9 x float> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %f32 = fdiv afn float undef, undef + %v2f32 = fdiv afn <2 x float> undef, undef + %v3f32 = fdiv afn <3 x float> undef, undef + %v4f32 = fdiv afn <4 x float> undef, undef + %v5f32 = fdiv afn <5 x float> undef, undef + %v8f32 = fdiv afn <8 x float> undef, undef + %v9f32 = fdiv afn <9 x float> undef, undef + ret void +} + define amdgpu_kernel void @fdiv_f64() #0 { ; CIFASTF64-LABEL: 'fdiv_f64' ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double undef, undef From 2ef158752a580fd35f78822335bf0366455d5496 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Sun, 15 Oct 2023 09:41:39 -0400 Subject: [PATCH 159/720] Update Clang's Getting Involved page * Downplay cfe-commits as a place where design discussion happens. Instead, call it out as a place for historical information. * Add a link to Discord * Add a link to Office Hours --- clang/www/get_involved.html | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/clang/www/get_involved.html b/clang/www/get_involved.html index b5212ae97372b..3fc688837d0bc 100755 --- a/clang/www/get_involved.html +++ b/clang/www/get_involved.html @@ -32,12 +32,12 @@

Follow what's going on

and has a Discourse forum and mailing list:

    -
  • cfe-commits - - This list is for patch submission/discussion.
  • -
  • Clang Frontend Discourse forum - -This forum is for everything else Clang related (questions and answers, design -discussions, etc).
  • +This forum is for discussions related to Clang (questions and answers, design +discussions, RFCs, etc). + +
  • Discord chat - Real-time chat for +discussions related to Clang (primarily for questions and answers).
  • Regular meetings are held on the @@ -50,16 +50,23 @@

    Follow what's going on

    here
    .
  • +
  • Clang office hours - +People within the community hold dedicated office hours at different points +during the month, which is a great way opportunity for getting questions +answered, having more in-depth design discussions, or learning about what's +going on in the community in general.
  • + +
  • cfe-commits + - Historical record of commits to Clang and contains early community patch +review commentary.
  • +

The most common way to talk with other developers on the project is through the Clang Frontend Discourse forum -. The clang forum is a very friendly place and we welcome -newcomers. In addition to the forum, a significant amount of design -discussion takes place on the cfe-commits mailing -list. All of these lists have archives, so you can browse through previous -discussions or follow the list development on the web if you prefer.

+. The clang forum is a very friendly place and we welcome newcomers. The +forum is archived so you can browse through previous discussions or follow +development on the web if you prefer.

If you're looking for something to work on, check out our Open Projects page or look through the Date: Sun, 15 Oct 2023 22:51:45 +0800 Subject: [PATCH 160/720] [InstCombine] Fold (X << Y) / (X << Z) -> 1 << Y >> Z (#68863) Resolve #68857. Alive2 proofs: [Whole proofs](https://alive2.llvm.org/ce/z/A5b85F) --- .../InstCombine/InstCombineMulDivRem.cpp | 22 ++ llvm/test/Transforms/InstCombine/div-shift.ll | 255 ++++++++++++++++++ 2 files changed, 277 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 560c87b6efa70..26e0a6700042e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -980,6 +980,28 @@ static Instruction *foldIDivShl(BinaryOperator &I, Ret = BinaryOperator::CreateSDiv(X, Y); } + // If X << Y and X << Z does not overflow, then: + // (X << Y) / (X << Z) -> (1 << Y) / (1 << Z) -> 1 << Y >> Z + if (match(Op0, m_Shl(m_Value(X), m_Value(Y))) && + match(Op1, m_Shl(m_Specific(X), m_Value(Z)))) { + auto *Shl0 = cast(Op0); + auto *Shl1 = cast(Op1); + + if (IsSigned ? (Shl0->hasNoSignedWrap() && Shl1->hasNoSignedWrap()) + : (Shl0->hasNoUnsignedWrap() && Shl1->hasNoUnsignedWrap())) { + Constant *One = ConstantInt::get(X->getType(), 1); + // Only preserve the nsw flag if dividend has nsw + // or divisor has nsw and operator is sdiv. + Value *Dividend = Builder.CreateShl( + One, Y, "shl.dividend", + /*HasNUW*/ true, + /*HasNSW*/ + IsSigned ? (Shl0->hasNoUnsignedWrap() || Shl1->hasNoUnsignedWrap()) + : Shl0->hasNoSignedWrap()); + Ret = BinaryOperator::CreateLShr(Dividend, Z); + } + } + if (!Ret) return nullptr; diff --git a/llvm/test/Transforms/InstCombine/div-shift.ll b/llvm/test/Transforms/InstCombine/div-shift.ll index 76c5328dc8499..635c01d84441d 100644 --- a/llvm/test/Transforms/InstCombine/div-shift.ll +++ b/llvm/test/Transforms/InstCombine/div-shift.ll @@ -2,6 +2,7 @@ ; RUN: opt < %s -passes=instcombine -S | FileCheck %s declare void @use(i8) +declare void @use32(i32) declare i8 @llvm.umin.i8(i8, i8) declare i8 @llvm.umax.i8(i8, i8) @@ -1025,3 +1026,257 @@ define i8 @udiv_shl_no_overflow(i8 %x, i8 %y) { %mul = udiv i8 %x, %min ret i8 %mul } + +; (X< 1 << Y >> Z + +define i32 @sdiv_shl_pair_const(i32 %a) { +; CHECK-LABEL: @sdiv_shl_pair_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 2 +; +entry: + %lhs = shl nsw i32 %a, 2 + %rhs = shl nsw i32 %a, 1 + %div = sdiv i32 %lhs, %rhs + ret i32 %div +} + +define i32 @udiv_shl_pair_const(i32 %a) { +; CHECK-LABEL: @udiv_shl_pair_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 2 +; +entry: + %lhs = shl nuw i32 %a, 2 + %rhs = shl nuw i32 %a, 1 + %div = udiv i32 %lhs, %rhs + ret i32 %div +} + +define i32 @sdiv_shl_pair1(i32 %a, i32 %x, i32 %y) { +; CHECK-LABEL: @sdiv_shl_pair1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SHL_DIVIDEND:%.*]] = shl nuw nsw i32 1, [[X:%.*]] +; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[SHL_DIVIDEND]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[DIV]] +; +entry: + %lhs = shl nsw i32 %a, %x + %rhs = shl nuw nsw i32 %a, %y + %div = sdiv i32 %lhs, %rhs + ret i32 %div +} + +define i32 @sdiv_shl_pair2(i32 %a, i32 %x, i32 %y) { +; CHECK-LABEL: @sdiv_shl_pair2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SHL_DIVIDEND:%.*]] = shl nuw nsw i32 1, [[X:%.*]] +; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[SHL_DIVIDEND]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[DIV]] +; +entry: + %lhs = shl nuw nsw i32 %a, %x + %rhs = shl nsw i32 %a, %y + %div = sdiv i32 %lhs, %rhs + ret i32 %div +} + +define i32 @sdiv_shl_pair3(i32 %a, i32 %x, i32 %y) { +; CHECK-LABEL: @sdiv_shl_pair3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SHL_DIVIDEND:%.*]] = shl nuw i32 1, [[X:%.*]] +; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[SHL_DIVIDEND]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[DIV]] +; +entry: + %lhs = shl nsw i32 %a, %x + %rhs = shl nsw i32 %a, %y + %div = sdiv i32 %lhs, %rhs + ret i32 %div +} + +define i32 @sdiv_shl_no_pair_fail(i32 %a, i32 %b, i32 %x, i32 %y) { +; CHECK-LABEL: @sdiv_shl_no_pair_fail( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LHS:%.*]] = shl nuw nsw i32 [[A:%.*]], [[X:%.*]] +; CHECK-NEXT: [[RHS:%.*]] = shl nuw i32 [[B:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[LHS]], [[RHS]] +; CHECK-NEXT: ret i32 [[DIV]] +; +entry: + %lhs = shl nuw nsw i32 %a, %x + %rhs = shl nuw i32 %b, %y + %div = sdiv i32 %lhs, %rhs + ret i32 %div +} + +define i32 @udiv_shl_pair1(i32 %a, i32 %x, i32 %y) { +; CHECK-LABEL: @udiv_shl_pair1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SHL_DIVIDEND:%.*]] = shl nuw i32 1, [[X:%.*]] +; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[SHL_DIVIDEND]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[DIV]] +; +entry: + %lhs = shl nuw i32 %a, %x + %rhs = shl nuw i32 %a, %y + %div = udiv i32 %lhs, %rhs + ret i32 %div +} + +define i32 @udiv_shl_pair2(i32 %a, i32 %x, i32 %y) { +; CHECK-LABEL: @udiv_shl_pair2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SHL_DIVIDEND:%.*]] = shl nuw nsw i32 1, [[X:%.*]] +; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[SHL_DIVIDEND]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[DIV]] +; +entry: + %lhs = shl nuw nsw i32 %a, %x + %rhs = shl nuw i32 %a, %y + %div = udiv i32 %lhs, %rhs + ret i32 %div +} + +define i32 @udiv_shl_pair3(i32 %a, i32 %x, i32 %y) { +; CHECK-LABEL: @udiv_shl_pair3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SHL_DIVIDEND:%.*]] = shl nuw i32 1, [[X:%.*]] +; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[SHL_DIVIDEND]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[DIV]] +; +entry: + %lhs = shl nuw i32 %a, %x + %rhs = shl nuw nsw i32 %a, %y + %div = udiv i32 %lhs, %rhs + ret i32 %div +} + +define i32 @sdiv_shl_pair_overflow_fail1(i32 %a, i32 %x, i32 %y) { +; CHECK-LABEL: @sdiv_shl_pair_overflow_fail1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LHS:%.*]] = shl i32 [[A:%.*]], [[X:%.*]] +; CHECK-NEXT: [[RHS:%.*]] = shl nsw i32 [[A]], [[Y:%.*]] +; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[LHS]], [[RHS]] +; CHECK-NEXT: ret i32 [[DIV]] +; +entry: + %lhs = shl i32 %a, %x + %rhs = shl nsw i32 %a, %y + %div = sdiv i32 %lhs, %rhs + ret i32 %div +} + +define i32 @sdiv_shl_pair_overflow_fail2(i32 %a, i32 %x, i32 %y) { +; CHECK-LABEL: @sdiv_shl_pair_overflow_fail2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LHS:%.*]] = shl nsw i32 [[A:%.*]], [[X:%.*]] +; CHECK-NEXT: [[RHS:%.*]] = shl nuw i32 [[A]], [[Y:%.*]] +; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[LHS]], [[RHS]] +; CHECK-NEXT: ret i32 [[DIV]] +; +entry: + %lhs = shl nsw i32 %a, %x + %rhs = shl nuw i32 %a, %y + %div = sdiv i32 %lhs, %rhs + ret i32 %div +} + +define i32 @udiv_shl_pair_overflow_fail1(i32 %a, i32 %x, i32 %y) { +; CHECK-LABEL: @udiv_shl_pair_overflow_fail1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LHS:%.*]] = shl nsw i32 [[A:%.*]], [[X:%.*]] +; CHECK-NEXT: [[RHS:%.*]] = shl nuw i32 [[A]], [[Y:%.*]] +; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[LHS]], [[RHS]] +; CHECK-NEXT: ret i32 [[DIV]] +; +entry: + %lhs = shl nsw i32 %a, %x + %rhs = shl nuw i32 %a, %y + %div = udiv i32 %lhs, %rhs + ret i32 %div +} + +define i32 @udiv_shl_pair_overflow_fail2(i32 %a, i32 %x, i32 %y) { +; CHECK-LABEL: @udiv_shl_pair_overflow_fail2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LHS:%.*]] = shl nsw i32 [[A:%.*]], [[X:%.*]] +; CHECK-NEXT: [[RHS:%.*]] = shl i32 [[A]], [[Y:%.*]] +; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[LHS]], [[RHS]] +; CHECK-NEXT: ret i32 [[DIV]] +; +entry: + %lhs = shl nsw i32 %a, %x + %rhs = shl i32 %a, %y + %div = udiv i32 %lhs, %rhs + ret i32 %div +} + +define i32 @udiv_shl_pair_overflow_fail3(i32 %a, i32 %x, i32 %y) { +; CHECK-LABEL: @udiv_shl_pair_overflow_fail3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LHS:%.*]] = shl nuw nsw i32 [[A:%.*]], [[X:%.*]] +; CHECK-NEXT: [[RHS:%.*]] = shl i32 [[A]], [[Y:%.*]] +; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[LHS]], [[RHS]] +; CHECK-NEXT: ret i32 [[DIV]] +; +entry: + %lhs = shl nuw nsw i32 %a, %x + %rhs = shl i32 %a, %y + %div = udiv i32 %lhs, %rhs + ret i32 %div +} + +define i32 @sdiv_shl_pair_multiuse1(i32 %a, i32 %x, i32 %y) { +; CHECK-LABEL: @sdiv_shl_pair_multiuse1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LHS:%.*]] = shl nuw nsw i32 [[A:%.*]], [[X:%.*]] +; CHECK-NEXT: call void @use32(i32 [[LHS]]) +; CHECK-NEXT: [[SHL_DIVIDEND:%.*]] = shl nuw nsw i32 1, [[X]] +; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[SHL_DIVIDEND]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[DIV]] +; +entry: + %lhs = shl nuw nsw i32 %a, %x + call void @use32(i32 %lhs) + %rhs = shl nsw i32 %a, %y + %div = sdiv i32 %lhs, %rhs + ret i32 %div +} + +define i32 @sdiv_shl_pair_multiuse2(i32 %a, i32 %x, i32 %y) { +; CHECK-LABEL: @sdiv_shl_pair_multiuse2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RHS:%.*]] = shl nsw i32 [[A:%.*]], [[Y:%.*]] +; CHECK-NEXT: call void @use32(i32 [[RHS]]) +; CHECK-NEXT: [[SHL_DIVIDEND:%.*]] = shl nuw nsw i32 1, [[X:%.*]] +; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[SHL_DIVIDEND]], [[Y]] +; CHECK-NEXT: ret i32 [[DIV]] +; +entry: + %lhs = shl nuw nsw i32 %a, %x + %rhs = shl nsw i32 %a, %y + call void @use32(i32 %rhs) + %div = sdiv i32 %lhs, %rhs + ret i32 %div +} + +define i32 @sdiv_shl_pair_multiuse3(i32 %a, i32 %x, i32 %y) { +; CHECK-LABEL: @sdiv_shl_pair_multiuse3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LHS:%.*]] = shl nuw nsw i32 [[A:%.*]], [[X:%.*]] +; CHECK-NEXT: [[RHS:%.*]] = shl nsw i32 [[A]], [[Y:%.*]] +; CHECK-NEXT: call void @use32(i32 [[LHS]]) +; CHECK-NEXT: call void @use32(i32 [[RHS]]) +; CHECK-NEXT: [[SHL_DIVIDEND:%.*]] = shl nuw nsw i32 1, [[X]] +; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[SHL_DIVIDEND]], [[Y]] +; CHECK-NEXT: ret i32 [[DIV]] +; +entry: + %lhs = shl nuw nsw i32 %a, %x + %rhs = shl nsw i32 %a, %y + call void @use32(i32 %lhs) + call void @use32(i32 %rhs) + %div = sdiv i32 %lhs, %rhs + ret i32 %div +} From 6dfea561ba96974b205c31546c5e2069429c75b1 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Sun, 15 Oct 2023 16:14:55 +0100 Subject: [PATCH 161/720] [builtins] Start to refactor int to fp conversion functions to use a common implementation (#66903) After this patch, the softfp implementations of floatdidf and floatundidf use a common implementation (int_to_fp.h and int_to_fp_impl.inc). This roughly follows the pattern used for a wide range of other builtins, e.g. fp_trunc_impl.inc. Currently there is substantial copy and paste for the various int to fp conversion functions, with just a few constants being changed. This is a barrier to maintainability, and it's also not attractive to copy this approach as we introduce additional int to fp conversion functions for bf16 and half (which we currently lack, but need - see ). I've opted to conservatively start by replacing just two functions, leaving a follow-up patch to replace others that follow the same pattern. Also, for better or worse I've left the logic in float[un]didf largely unchanged other than using a similar approach to fp_trunc_impl.inc to remove the constants that are tied to a specific output floating point format. --- compiler-rt/lib/builtins/floatdidf.c | 52 ++-------------- compiler-rt/lib/builtins/floatundidf.c | 49 ++------------- compiler-rt/lib/builtins/int_to_fp.h | 51 +++++++++++++++ compiler-rt/lib/builtins/int_to_fp_impl.inc | 69 +++++++++++++++++++++ 4 files changed, 130 insertions(+), 91 deletions(-) create mode 100644 compiler-rt/lib/builtins/int_to_fp.h create mode 100644 compiler-rt/lib/builtins/int_to_fp_impl.inc diff --git a/compiler-rt/lib/builtins/floatdidf.c b/compiler-rt/lib/builtins/floatdidf.c index c994aad3f079e..6da81f7a05bf2 100644 --- a/compiler-rt/lib/builtins/floatdidf.c +++ b/compiler-rt/lib/builtins/floatdidf.c @@ -45,53 +45,11 @@ COMPILER_RT_ABI double __floatdidf(di_int a) { // flags to set, and we don't want to code-gen to an unknown soft-float // implementation. -COMPILER_RT_ABI double __floatdidf(di_int a) { - if (a == 0) - return 0.0; - const unsigned N = sizeof(di_int) * CHAR_BIT; - const di_int s = a >> (N - 1); - a = (du_int)(a ^ s) - s; - int sd = N - __builtin_clzll(a); // number of significant digits - int e = sd - 1; // exponent - if (sd > DBL_MANT_DIG) { - // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - // 12345678901234567890123456 - // 1 = msb 1 bit - // P = bit DBL_MANT_DIG-1 bits to the right of 1 - // Q = bit DBL_MANT_DIG bits to the right of 1 - // R = "or" of all bits to the right of Q - switch (sd) { - case DBL_MANT_DIG + 1: - a <<= 1; - break; - case DBL_MANT_DIG + 2: - break; - default: - a = ((du_int)a >> (sd - (DBL_MANT_DIG + 2))) | - ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0); - }; - // finish: - a |= (a & 4) != 0; // Or P into R - ++a; // round - this step may add a significant bit - a >>= 2; // dump Q and R - // a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits - if (a & ((du_int)1 << DBL_MANT_DIG)) { - a >>= 1; - ++e; - } - // a is now rounded to DBL_MANT_DIG bits - } else { - a <<= (DBL_MANT_DIG - sd); - // a is now rounded to DBL_MANT_DIG bits - } - double_bits fb; - fb.u.s.high = ((su_int)s & 0x80000000) | // sign - ((su_int)(e + 1023) << 20) | // exponent - ((su_int)(a >> 32) & 0x000FFFFF); // mantissa-high - fb.u.s.low = (su_int)a; // mantissa-low - return fb.f; -} +#define SRC_I64 +#define DST_DOUBLE +#include "int_to_fp_impl.inc" + +COMPILER_RT_ABI double __floatdidf(di_int a) { return __floatXiYf__(a); } #endif #if defined(__ARM_EABI__) diff --git a/compiler-rt/lib/builtins/floatundidf.c b/compiler-rt/lib/builtins/floatundidf.c index 2ec802cdc134f..9743e96ec6791 100644 --- a/compiler-rt/lib/builtins/floatundidf.c +++ b/compiler-rt/lib/builtins/floatundidf.c @@ -51,50 +51,11 @@ COMPILER_RT_ABI double __floatundidf(du_int a) { // flags to set, and we don't want to code-gen to an unknown soft-float // implementation. -COMPILER_RT_ABI double __floatundidf(du_int a) { - if (a == 0) - return 0.0; - const unsigned N = sizeof(du_int) * CHAR_BIT; - int sd = N - __builtin_clzll(a); // number of significant digits - int e = sd - 1; // exponent - if (sd > DBL_MANT_DIG) { - // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - // 12345678901234567890123456 - // 1 = msb 1 bit - // P = bit DBL_MANT_DIG-1 bits to the right of 1 - // Q = bit DBL_MANT_DIG bits to the right of 1 - // R = "or" of all bits to the right of Q - switch (sd) { - case DBL_MANT_DIG + 1: - a <<= 1; - break; - case DBL_MANT_DIG + 2: - break; - default: - a = (a >> (sd - (DBL_MANT_DIG + 2))) | - ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0); - }; - // finish: - a |= (a & 4) != 0; // Or P into R - ++a; // round - this step may add a significant bit - a >>= 2; // dump Q and R - // a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits - if (a & ((du_int)1 << DBL_MANT_DIG)) { - a >>= 1; - ++e; - } - // a is now rounded to DBL_MANT_DIG bits - } else { - a <<= (DBL_MANT_DIG - sd); - // a is now rounded to DBL_MANT_DIG bits - } - double_bits fb; - fb.u.s.high = ((su_int)(e + 1023) << 20) | // exponent - ((su_int)(a >> 32) & 0x000FFFFF); // mantissa-high - fb.u.s.low = (su_int)a; // mantissa-low - return fb.f; -} +#define SRC_U64 +#define DST_DOUBLE +#include "int_to_fp_impl.inc" + +COMPILER_RT_ABI double __floatundidf(du_int a) { return __floatXiYf__(a); } #endif #if defined(__ARM_EABI__) diff --git a/compiler-rt/lib/builtins/int_to_fp.h b/compiler-rt/lib/builtins/int_to_fp.h new file mode 100644 index 0000000000000..dbab5130fb39e --- /dev/null +++ b/compiler-rt/lib/builtins/int_to_fp.h @@ -0,0 +1,51 @@ +//===-- int_to_fp.h - integer to floating point conversion ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Set source and destination defines in order to use a correctly +// parameterised floatXiYf implementation. +// +//===----------------------------------------------------------------------===// + +#ifndef INT_TO_FP_H +#define INT_TO_FP_H + +#include "int_lib.h" + +#if defined SRC_I64 +typedef int64_t src_t; +typedef uint64_t usrc_t; +static __inline int clzSrcT(usrc_t x) { return __builtin_clzll(x); } + +#elif defined SRC_U64 +typedef uint64_t src_t; +typedef uint64_t usrc_t; +static __inline int clzSrcT(usrc_t x) { return __builtin_clzll(x); } + +#else +#error Source should be a handled integer type. +#endif + +#if defined DST_DOUBLE +typedef double dst_t; +typedef uint64_t dst_rep_t; +#define DST_REP_C UINT64_C +static const int dstSigBits = 52; + +#else +#error Destination should be a handled floating point type +#endif + +static __inline dst_t dstFromRep(dst_rep_t x) { + const union { + dst_t f; + dst_rep_t i; + } rep = {.i = x}; + return rep.f; +} + +#endif // INT_TO_FP_H diff --git a/compiler-rt/lib/builtins/int_to_fp_impl.inc b/compiler-rt/lib/builtins/int_to_fp_impl.inc new file mode 100644 index 0000000000000..c49f2c9607ec1 --- /dev/null +++ b/compiler-rt/lib/builtins/int_to_fp_impl.inc @@ -0,0 +1,69 @@ +//===-- int_to_fp_impl.inc - integer to floating point conversion ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Thsi file implements a generic conversion from an integer type to an +// IEEE-754 floating point type, allowing a common implementation to be hsared +// without copy and paste. +// +//===----------------------------------------------------------------------===// + +#include "int_to_fp.h" + +static __inline dst_t __floatXiYf__(src_t a) { + if (a == 0) + return 0.0; + const int dstMantDig = dstSigBits + 1; + const int srcBits = sizeof(src_t) * CHAR_BIT; + const int srcIsSigned = ((src_t)-1) < 0; + const src_t s = srcIsSigned ? a >> (srcBits - 1) : 0; + a = (usrc_t)(a ^ s) - s; + int sd = srcBits - clzSrcT(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > dstMantDig) { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit dstMantDig-1 bits to the right of 1 + // Q = bit dstMantDig bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) { + case dstMantDig + 1: + a <<= 1; + break; + case dstMantDig + 2: + break; + default: + a = ((usrc_t)a >> (sd - (dstMantDig + 2))) | + ((a & ((usrc_t)(-1) >> ((srcBits + dstMantDig + 2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to dstMantDig or dstMantDig+1 bits + if (a & ((usrc_t)1 << dstMantDig)) { + a >>= 1; + ++e; + } + // a is now rounded to dstMantDig bits + } else { + a <<= (dstMantDig - sd); + // a is now rounded to dstMantDig bits + } + const int dstBits = sizeof(dst_t) * CHAR_BIT; + const dst_rep_t dstSignMask = DST_REP_C(1) << (dstBits - 1); + const int dstExpBits = dstBits - dstSigBits - 1; + const int dstExpBias = (1 << (dstExpBits - 1)) - 1; + const dst_rep_t dstSignificandMask = (DST_REP_C(1) << dstSigBits) - 1; + // Combine sign, exponent, and mantissa. + const dst_rep_t result = ((dst_rep_t)s & dstSignMask) | + ((dst_rep_t)(e + dstExpBias) << dstSigBits) | + ((dst_rep_t)(a) & dstSignificandMask); + return dstFromRep(result); +} From d5e91ca5633e630f97777d96103aba3fa1247080 Mon Sep 17 00:00:00 2001 From: George Lyon Date: Sun, 15 Oct 2023 09:59:00 -0700 Subject: [PATCH 162/720] [CMake] Limit -gsplit-dwarf option to C and C++ compilers Currently, If the C or C++ compiler supports the `-gsplit-dwarf` option it is added to _all_ compilers. If a project decides to use another language, such as Swift, this option will be sent to that compiler as well, regardless whether that compiler supports it or not (Swift doesnot). This patch uses [generator expressions](https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html) to limit the `-gsplit-dwarf` option to only those compilers that support it (C and C++). --- llvm/cmake/modules/HandleLLVMOptions.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index c5142c9e660fb..19cb881adc3fa 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -1007,7 +1007,7 @@ if (LLVM_USE_SPLIT_DWARF AND # Limit to clang and gcc so far. Add compilers supporting this option. if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - add_compile_options(-gsplit-dwarf) + add_compile_options($<$:-gsplit-dwarf>) include(LLVMCheckLinkerFlag) llvm_check_linker_flag(CXX "-Wl,--gdb-index" LINKER_SUPPORTS_GDB_INDEX) append_if(LINKER_SUPPORTS_GDB_INDEX "-Wl,--gdb-index" From 4698b9926221ca388a462ccd5c363d9f8f6b9128 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 16 Oct 2023 01:48:39 +0800 Subject: [PATCH 163/720] [BasicAA] Add pre-commit tests for PR69096. NFC. --- llvm/test/Analysis/BasicAA/pr69096.ll | 31 +++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 llvm/test/Analysis/BasicAA/pr69096.ll diff --git a/llvm/test/Analysis/BasicAA/pr69096.ll b/llvm/test/Analysis/BasicAA/pr69096.ll new file mode 100644 index 0000000000000..7d8506b81c2bf --- /dev/null +++ b/llvm/test/Analysis/BasicAA/pr69096.ll @@ -0,0 +1,31 @@ +; RUN: opt %s -aa-pipeline=basic-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s + +target datalayout = "p:64:64:64" + +; CHECK-LABEL: Function: pr69096 +; FIXME: This should be MayAlias. %p == %scevgep.i when %a == -1. +; CHECK: NoAlias: i8* %p, i16* %scevgep.i + +define i32 @pr69096(i16 %a, ptr %p) { +entry: + %0 = load i8, ptr %p, align 2 + %dec.i = add i8 %0, -1 + %cmp636.i = icmp eq i16 %a, -1 + br i1 %cmp636.i, label %for.cond2.for.inc29_crit_edge.i, label %n.exit + +for.cond2.for.inc29_crit_edge.i: + %conv3.i = zext i16 %a to i64 + %sub.i.i = shl i64 %conv3.i, 56 + %sub21.i = shl nuw nsw i64 %conv3.i, 2 + %1 = getelementptr i8, ptr %p, i64 %sub21.i + %2 = getelementptr i8, ptr %1, i64 -262140 + %3 = getelementptr i8, ptr %2, i64 %sub.i.i + %scevgep.i = getelementptr i8, ptr %3, i64 72057594037927936 + store i16 1285, ptr %scevgep.i, align 2 + br label %n.exit + +n.exit: + %4 = load i8, ptr %p, align 2 + %conv = sext i8 %4 to i32 + ret i32 %conv +} From 546c3d792addc24bcc44382dd83939c50924c909 Mon Sep 17 00:00:00 2001 From: Shraiysh Date: Sun, 15 Oct 2023 13:17:46 -0500 Subject: [PATCH 164/720] [OpenMP][mlir] Added `num_teams`, `thread_limit` translation to LLVM IR (#68821) This patch adds translation to LLVM IR for `num_teams` and `thread_limit` in for `omp.teams` operation. --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 22 +++- mlir/test/Target/LLVMIR/openmp-teams.mlir | 111 ++++++++++++++++++ 2 files changed, 127 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 208c3d690e553..e3dc68a1b8b7d 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -666,11 +666,9 @@ convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; LogicalResult bodyGenStatus = success(); - if (op.getNumTeamsLower() || op.getNumTeamsUpper() || op.getIfExpr() || - op.getThreadLimit() || !op.getAllocatorsVars().empty() || - op.getReductions()) { + if (op.getIfExpr() || !op.getAllocatorsVars().empty() || op.getReductions()) return op.emitError("unhandled clauses for translation to LLVM IR"); - } + auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { LLVM::ModuleTranslation::SaveStack frame( moduleTranslation, allocaIP); @@ -679,9 +677,21 @@ convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, moduleTranslation, bodyGenStatus); }; + llvm::Value *numTeamsLower = nullptr; + if (Value numTeamsLowerVar = op.getNumTeamsLower()) + numTeamsLower = moduleTranslation.lookupValue(numTeamsLowerVar); + + llvm::Value *numTeamsUpper = nullptr; + if (Value numTeamsUpperVar = op.getNumTeamsUpper()) + numTeamsUpper = moduleTranslation.lookupValue(numTeamsUpperVar); + + llvm::Value *threadLimit = nullptr; + if (Value threadLimitVar = op.getThreadLimit()) + threadLimit = moduleTranslation.lookupValue(threadLimitVar); + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - builder.restoreIP( - moduleTranslation.getOpenMPBuilder()->createTeams(ompLoc, bodyCB)); + builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTeams( + ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit)); return bodyGenStatus; } diff --git a/mlir/test/Target/LLVMIR/openmp-teams.mlir b/mlir/test/Target/LLVMIR/openmp-teams.mlir index 18fc2bb5a3c61..87ef90223ed70 100644 --- a/mlir/test/Target/LLVMIR/openmp-teams.mlir +++ b/mlir/test/Target/LLVMIR/openmp-teams.mlir @@ -124,3 +124,114 @@ llvm.func @omp_teams_branching_shared(%condition: i1, %arg0: i32, %arg1: f32, %a // CHECK-NEXT: br label // CHECK: ret void +// ----- + +llvm.func @beforeTeams() +llvm.func @duringTeams() +llvm.func @afterTeams() + +// CHECK-LABEL: @omp_teams_thread_limit +// CHECK-SAME: (i32 [[THREAD_LIMIT:.+]]) +llvm.func @omp_teams_thread_limit(%threadLimit: i32) { + // CHECK-NEXT: call void @beforeTeams() + llvm.call @beforeTeams() : () -> () + // CHECK: [[THREAD_NUM:%.+]] = call i32 @__kmpc_global_thread_num + // CHECK-NEXT: call void @__kmpc_push_num_teams_51({{.+}}, i32 [[THREAD_NUM]], i32 0, i32 0, i32 [[THREAD_LIMIT]]) + // CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @1, i32 0, ptr [[OUTLINED_FN:.+]]) + omp.teams thread_limit(%threadLimit : i32) { + llvm.call @duringTeams() : () -> () + omp.terminator + } + // CHECK: call void @afterTeams + llvm.call @afterTeams() : () -> () + // CHECK: ret void + llvm.return +} + +// CHECK: define internal void [[OUTLINED_FN]](ptr {{.+}}, ptr {{.+}}) +// CHECK: call void @duringTeams() +// CHECK: ret void + +// ----- + +llvm.func @beforeTeams() +llvm.func @duringTeams() +llvm.func @afterTeams() + +// CHECK-LABEL: @omp_teams_num_teams_upper +// CHECK-SAME: (i32 [[NUM_TEAMS_UPPER:.+]]) +llvm.func @omp_teams_num_teams_upper(%numTeamsUpper: i32) { + // CHECK-NEXT: call void @beforeTeams() + llvm.call @beforeTeams() : () -> () + // CHECK: [[THREAD_NUM:%.+]] = call i32 @__kmpc_global_thread_num + // CHECK-NEXT: call void @__kmpc_push_num_teams_51({{.+}}, i32 [[THREAD_NUM]], i32 [[NUM_TEAMS_UPPER]], i32 [[NUM_TEAMS_UPPER]], i32 0) + // CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @1, i32 0, ptr [[OUTLINED_FN:.+]]) + omp.teams num_teams(to %numTeamsUpper : i32) { + llvm.call @duringTeams() : () -> () + omp.terminator + } + // CHECK: call void @afterTeams + llvm.call @afterTeams() : () -> () + // CHECK: ret void + llvm.return +} + +// CHECK: define internal void [[OUTLINED_FN]](ptr {{.+}}, ptr {{.+}}) +// CHECK: call void @duringTeams() +// CHECK: ret void + +// ----- + +llvm.func @beforeTeams() +llvm.func @duringTeams() +llvm.func @afterTeams() + +// CHECK-LABEL: @omp_teams_num_teams_lower_and_upper +// CHECK-SAME: (i32 [[NUM_TEAMS_LOWER:.+]], i32 [[NUM_TEAMS_UPPER:.+]]) +llvm.func @omp_teams_num_teams_lower_and_upper(%numTeamsLower: i32, %numTeamsUpper: i32) { + // CHECK-NEXT: call void @beforeTeams() + llvm.call @beforeTeams() : () -> () + // CHECK: [[THREAD_NUM:%.+]] = call i32 @__kmpc_global_thread_num + // CHECK-NEXT: call void @__kmpc_push_num_teams_51({{.+}}, i32 [[THREAD_NUM]], i32 [[NUM_TEAMS_LOWER]], i32 [[NUM_TEAMS_UPPER]], i32 0) + // CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @1, i32 0, ptr [[OUTLINED_FN:.+]]) + omp.teams num_teams(%numTeamsLower : i32 to %numTeamsUpper: i32) { + llvm.call @duringTeams() : () -> () + omp.terminator + } + // CHECK: call void @afterTeams + llvm.call @afterTeams() : () -> () + // CHECK: ret void + llvm.return +} + +// CHECK: define internal void [[OUTLINED_FN]](ptr {{.+}}, ptr {{.+}}) +// CHECK: call void @duringTeams() +// CHECK: ret void + +// ----- + +llvm.func @beforeTeams() +llvm.func @duringTeams() +llvm.func @afterTeams() + +// CHECK-LABEL: @omp_teams_num_teams_and_thread_limit +// CHECK-SAME: (i32 [[NUM_TEAMS_LOWER:.+]], i32 [[NUM_TEAMS_UPPER:.+]], i32 [[THREAD_LIMIT:.+]]) +llvm.func @omp_teams_num_teams_and_thread_limit(%numTeamsLower: i32, %numTeamsUpper: i32, %threadLimit: i32) { + // CHECK-NEXT: call void @beforeTeams() + llvm.call @beforeTeams() : () -> () + // CHECK: [[THREAD_NUM:%.+]] = call i32 @__kmpc_global_thread_num + // CHECK-NEXT: call void @__kmpc_push_num_teams_51({{.+}}, i32 [[THREAD_NUM]], i32 [[NUM_TEAMS_LOWER]], i32 [[NUM_TEAMS_UPPER]], i32 [[THREAD_LIMIT]]) + // CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @1, i32 0, ptr [[OUTLINED_FN:.+]]) + omp.teams num_teams(%numTeamsLower : i32 to %numTeamsUpper: i32) thread_limit(%threadLimit: i32) { + llvm.call @duringTeams() : () -> () + omp.terminator + } + // CHECK: call void @afterTeams + llvm.call @afterTeams() : () -> () + // CHECK: ret void + llvm.return +} + +// CHECK: define internal void [[OUTLINED_FN]](ptr {{.+}}, ptr {{.+}}) +// CHECK: call void @duringTeams() +// CHECK: ret void From 017b9c03d60676843438ffa53e77ea307303c848 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 15 Oct 2023 13:20:45 -0700 Subject: [PATCH 165/720] [CodeGen] Remove unused declaration RegisterDefIsDead The corresponding function definition was removed by: commit 6325446666b4c76f399b3974f8ef1b5092624e2a Author: Evan Cheng Date: Wed Mar 5 00:59:57 2008 +0000 --- llvm/include/llvm/CodeGen/LiveVariables.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llvm/include/llvm/CodeGen/LiveVariables.h b/llvm/include/llvm/CodeGen/LiveVariables.h index a1ed3c073251b..90aeb8ceda559 100644 --- a/llvm/include/llvm/CodeGen/LiveVariables.h +++ b/llvm/include/llvm/CodeGen/LiveVariables.h @@ -182,10 +182,6 @@ class LiveVariables : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &MF) override; - /// RegisterDefIsDead - Return true if the specified instruction defines the - /// specified register, but that definition is dead. - bool RegisterDefIsDead(MachineInstr &MI, Register Reg) const; - //===--------------------------------------------------------------------===// // API to update live variable information From 196108857d228e1997bc684d448c12b56e794459 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 15 Oct 2023 13:20:46 -0700 Subject: [PATCH 166/720] [GlobalISel] Remove unused declaration applyCombineAnyExtTrunc The corresponding function definition was removed by: commit a6be26710bbdf8de39a16ad64526ec955dda6c59 Author: Jay Foad Date: Tue Feb 23 16:10:19 2021 +0000 --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index d8f19c19ee106..30b04930dfb96 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -378,7 +378,6 @@ class CombinerHelper { /// Transform anyext(trunc(x)) to x. bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg); - void applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg); /// Transform zext(trunc(x)) to x. bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg); From 96196e25fd49e3d0ecec9550e81365ce122679cb Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 15 Oct 2023 13:20:48 -0700 Subject: [PATCH 167/720] [GlobalISel] Remove unused declaration tryCombineIndexedLoadStore The corresponding function definition was removed by: commit 7e5c2672cb4ef5a607414023805b8040b8e1fa99 Author: Amara Emerson Date: Mon Sep 25 03:22:25 2023 +0800 --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 30b04930dfb96..d64b414f27476 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -194,9 +194,6 @@ class CombinerHelper { /// Match (and (load x), mask) -> zextload x bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo); - /// Combine \p MI into a pre-indexed or post-indexed load/store operation if - /// legal and the surrounding code makes it useful. - bool tryCombineIndexedLoadStore(MachineInstr &MI); bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo); void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo); From ea4cc2007efeaf14b8a07b967cb0c570e5b59d7c Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 16 Oct 2023 04:40:10 +0800 Subject: [PATCH 168/720] [BasicAA] Remove NSW flags when merging scales (#69122) When merging scales of `LinearExpression` that have common index variables, we cannot guarantee the NSW flag still applies to the merged expression. Fixes #69096. --- llvm/lib/Analysis/BasicAliasAnalysis.cpp | 1 + llvm/test/Analysis/BasicAA/pr69096.ll | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index c162b8f6edc19..ca65abeb591c5 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -662,6 +662,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, if (Decomposed.VarIndices[i].Val.V == LE.Val.V && Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val)) { Scale += Decomposed.VarIndices[i].Scale; + LE.IsNSW = false; // We cannot guarantee nsw for the merge. Decomposed.VarIndices.erase(Decomposed.VarIndices.begin() + i); break; } diff --git a/llvm/test/Analysis/BasicAA/pr69096.ll b/llvm/test/Analysis/BasicAA/pr69096.ll index 7d8506b81c2bf..fe8504a0308ad 100644 --- a/llvm/test/Analysis/BasicAA/pr69096.ll +++ b/llvm/test/Analysis/BasicAA/pr69096.ll @@ -3,8 +3,8 @@ target datalayout = "p:64:64:64" ; CHECK-LABEL: Function: pr69096 -; FIXME: This should be MayAlias. %p == %scevgep.i when %a == -1. -; CHECK: NoAlias: i8* %p, i16* %scevgep.i +; %p == %scevgep.i when %a == -1. +; CHECK: MayAlias: i8* %p, i16* %scevgep.i define i32 @pr69096(i16 %a, ptr %p) { entry: From 19505072123e43eccf528b660973067b5c9b4a26 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Sat, 14 Oct 2023 20:56:06 -0700 Subject: [PATCH 169/720] Revert "Re-apply '[AArch64] Enable "sink-and-fold" in MachineSink by default (#67432)'" This reverts commit dbb9faedec5e28ab3f584f5e14d31e475ac268ac. This seems to cause miscompiles on CTMark/sqlite3 and others with GISel. --- .../Target/AArch64/AArch64TargetMachine.cpp | 2 +- .../CodeGen/AArch64/arm64-indexed-memory.ll | 230 +++++++++++++----- .../machine-sink-cache-invalidation.ll | 3 +- llvm/test/CodeGen/AArch64/sink-and-fold.ll | 2 +- 4 files changed, 178 insertions(+), 59 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index fcc30a7cfceaf..3d818c76bd4b7 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -200,7 +200,7 @@ static cl::opt EnableGISelLoadStoreOptPostLegal( static cl::opt EnableSinkFold("aarch64-enable-sink-fold", cl::desc("Enable sinking and folding of instruction copies"), - cl::init(true), cl::Hidden); + cl::init(false), cl::Hidden); extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() { // Register the target. diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll index bb18d6d4866ca..87e5602847612 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll @@ -727,11 +727,25 @@ define ptr @pretrunc64to8(ptr %ptr, i64 %spacing) { ; Pre-indexed loads ;----- define ptr @preidxf64(ptr %src, ptr %out) { -; CHECK-LABEL: preidxf64: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8]! -; CHECK-NEXT: str d0, [x1] -; CHECK-NEXT: ret +; CHECK64-LABEL: preidxf64: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: ldr d0, [x0, #8]! +; CHECK64-NEXT: str d0, [x1] +; CHECK64-NEXT: ret +; +; GISEL-LABEL: preidxf64: +; GISEL: ; %bb.0: +; GISEL-NEXT: add x8, x0, #8 +; GISEL-NEXT: ldr d0, [x0, #8] +; GISEL-NEXT: mov x0, x8 +; GISEL-NEXT: str d0, [x1] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: preidxf64: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: ldr d0, [x0, #8]! +; CHECK32-NEXT: str d0, [x1] +; CHECK32-NEXT: ret %ptr = getelementptr inbounds double, ptr %src, i64 1 %tmp = load double, ptr %ptr, align 4 store double %tmp, ptr %out, align 4 @@ -739,11 +753,25 @@ define ptr @preidxf64(ptr %src, ptr %out) { } define ptr @preidxf32(ptr %src, ptr %out) { -; CHECK-LABEL: preidxf32: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr s0, [x0, #4]! -; CHECK-NEXT: str s0, [x1] -; CHECK-NEXT: ret +; CHECK64-LABEL: preidxf32: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: ldr s0, [x0, #4]! +; CHECK64-NEXT: str s0, [x1] +; CHECK64-NEXT: ret +; +; GISEL-LABEL: preidxf32: +; GISEL: ; %bb.0: +; GISEL-NEXT: add x8, x0, #4 +; GISEL-NEXT: ldr s0, [x0, #4] +; GISEL-NEXT: mov x0, x8 +; GISEL-NEXT: str s0, [x1] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: preidxf32: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: ldr s0, [x0, #4]! +; CHECK32-NEXT: str s0, [x1] +; CHECK32-NEXT: ret %ptr = getelementptr inbounds float, ptr %src, i64 1 %tmp = load float, ptr %ptr, align 4 store float %tmp, ptr %out, align 4 @@ -759,8 +787,9 @@ define ptr @preidxf16(ptr %src, ptr %out) { ; ; GISEL-LABEL: preidxf16: ; GISEL: ; %bb.0: +; GISEL-NEXT: add x8, x0, #2 ; GISEL-NEXT: ldr h0, [x0, #2] -; GISEL-NEXT: add x0, x0, #2 +; GISEL-NEXT: mov x0, x8 ; GISEL-NEXT: str h0, [x1] ; GISEL-NEXT: ret ; @@ -776,11 +805,25 @@ define ptr @preidxf16(ptr %src, ptr %out) { } define ptr @preidx64(ptr %src, ptr %out) { -; CHECK-LABEL: preidx64: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr x8, [x0, #8]! -; CHECK-NEXT: str x8, [x1] -; CHECK-NEXT: ret +; CHECK64-LABEL: preidx64: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: ldr x8, [x0, #8]! +; CHECK64-NEXT: str x8, [x1] +; CHECK64-NEXT: ret +; +; GISEL-LABEL: preidx64: +; GISEL: ; %bb.0: +; GISEL-NEXT: add x8, x0, #8 +; GISEL-NEXT: ldr x9, [x0, #8] +; GISEL-NEXT: mov x0, x8 +; GISEL-NEXT: str x9, [x1] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: preidx64: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: ldr x8, [x0, #8]! +; CHECK32-NEXT: str x8, [x1] +; CHECK32-NEXT: ret %ptr = getelementptr inbounds i64, ptr %src, i64 1 %tmp = load i64, ptr %ptr, align 4 store i64 %tmp, ptr %out, align 4 @@ -788,11 +831,25 @@ define ptr @preidx64(ptr %src, ptr %out) { } define ptr @preidx32(ptr %src, ptr %out) { -; CHECK-LABEL: preidx32: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldr w8, [x0, #4]! -; CHECK-NEXT: str w8, [x1] -; CHECK-NEXT: ret +; CHECK64-LABEL: preidx32: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: ldr w8, [x0, #4]! +; CHECK64-NEXT: str w8, [x1] +; CHECK64-NEXT: ret +; +; GISEL-LABEL: preidx32: +; GISEL: ; %bb.0: +; GISEL-NEXT: add x8, x0, #4 +; GISEL-NEXT: ldr w9, [x0, #4] +; GISEL-NEXT: mov x0, x8 +; GISEL-NEXT: str w9, [x1] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: preidx32: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: ldr w8, [x0, #4]! +; CHECK32-NEXT: str w8, [x1] +; CHECK32-NEXT: ret %ptr = getelementptr inbounds i32, ptr %src, i64 1 %tmp = load i32, ptr %ptr, align 4 store i32 %tmp, ptr %out, align 4 @@ -800,11 +857,25 @@ define ptr @preidx32(ptr %src, ptr %out) { } define ptr @preidx16zext32(ptr %src, ptr %out) { -; CHECK-LABEL: preidx16zext32: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldrh w8, [x0, #2]! -; CHECK-NEXT: str w8, [x1] -; CHECK-NEXT: ret +; CHECK64-LABEL: preidx16zext32: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: ldrh w8, [x0, #2]! +; CHECK64-NEXT: str w8, [x1] +; CHECK64-NEXT: ret +; +; GISEL-LABEL: preidx16zext32: +; GISEL: ; %bb.0: +; GISEL-NEXT: add x8, x0, #2 +; GISEL-NEXT: ldrh w9, [x0, #2] +; GISEL-NEXT: mov x0, x8 +; GISEL-NEXT: str w9, [x1] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: preidx16zext32: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: ldrh w8, [x0, #2]! +; CHECK32-NEXT: str w8, [x1] +; CHECK32-NEXT: ret %ptr = getelementptr inbounds i16, ptr %src, i64 1 %tmp = load i16, ptr %ptr, align 4 %ext = zext i16 %tmp to i32 @@ -813,11 +884,25 @@ define ptr @preidx16zext32(ptr %src, ptr %out) { } define ptr @preidx16zext64(ptr %src, ptr %out) { -; CHECK-LABEL: preidx16zext64: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldrh w8, [x0, #2]! -; CHECK-NEXT: str x8, [x1] -; CHECK-NEXT: ret +; CHECK64-LABEL: preidx16zext64: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: ldrh w8, [x0, #2]! +; CHECK64-NEXT: str x8, [x1] +; CHECK64-NEXT: ret +; +; GISEL-LABEL: preidx16zext64: +; GISEL: ; %bb.0: +; GISEL-NEXT: add x8, x0, #2 +; GISEL-NEXT: ldrh w9, [x0, #2] +; GISEL-NEXT: mov x0, x8 +; GISEL-NEXT: str x9, [x1] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: preidx16zext64: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: ldrh w8, [x0, #2]! +; CHECK32-NEXT: str x8, [x1] +; CHECK32-NEXT: ret %ptr = getelementptr inbounds i16, ptr %src, i64 1 %tmp = load i16, ptr %ptr, align 4 %ext = zext i16 %tmp to i64 @@ -826,11 +911,25 @@ define ptr @preidx16zext64(ptr %src, ptr %out) { } define ptr @preidx8zext32(ptr %src, ptr %out) { -; CHECK-LABEL: preidx8zext32: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldrb w8, [x0, #1]! -; CHECK-NEXT: str w8, [x1] -; CHECK-NEXT: ret +; CHECK64-LABEL: preidx8zext32: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: ldrb w8, [x0, #1]! +; CHECK64-NEXT: str w8, [x1] +; CHECK64-NEXT: ret +; +; GISEL-LABEL: preidx8zext32: +; GISEL: ; %bb.0: +; GISEL-NEXT: add x8, x0, #1 +; GISEL-NEXT: ldrb w9, [x0, #1] +; GISEL-NEXT: mov x0, x8 +; GISEL-NEXT: str w9, [x1] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: preidx8zext32: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: ldrb w8, [x0, #1]! +; CHECK32-NEXT: str w8, [x1] +; CHECK32-NEXT: ret %ptr = getelementptr inbounds i8, ptr %src, i64 1 %tmp = load i8, ptr %ptr, align 4 %ext = zext i8 %tmp to i32 @@ -839,11 +938,25 @@ define ptr @preidx8zext32(ptr %src, ptr %out) { } define ptr @preidx8zext64(ptr %src, ptr %out) { -; CHECK-LABEL: preidx8zext64: -; CHECK: ; %bb.0: -; CHECK-NEXT: ldrb w8, [x0, #1]! -; CHECK-NEXT: str x8, [x1] -; CHECK-NEXT: ret +; CHECK64-LABEL: preidx8zext64: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: ldrb w8, [x0, #1]! +; CHECK64-NEXT: str x8, [x1] +; CHECK64-NEXT: ret +; +; GISEL-LABEL: preidx8zext64: +; GISEL: ; %bb.0: +; GISEL-NEXT: add x8, x0, #1 +; GISEL-NEXT: ldrb w9, [x0, #1] +; GISEL-NEXT: mov x0, x8 +; GISEL-NEXT: str x9, [x1] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: preidx8zext64: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: ldrb w8, [x0, #1]! +; CHECK32-NEXT: str x8, [x1] +; CHECK32-NEXT: ret %ptr = getelementptr inbounds i8, ptr %src, i64 1 %tmp = load i8, ptr %ptr, align 4 %ext = zext i8 %tmp to i64 @@ -860,9 +973,10 @@ define ptr @preidx32sext64(ptr %src, ptr %out) { ; ; GISEL-LABEL: preidx32sext64: ; GISEL: ; %bb.0: -; GISEL-NEXT: ldrsw x8, [x0, #4] -; GISEL-NEXT: add x0, x0, #4 -; GISEL-NEXT: str x8, [x1] +; GISEL-NEXT: add x8, x0, #4 +; GISEL-NEXT: ldrsw x9, [x0, #4] +; GISEL-NEXT: mov x0, x8 +; GISEL-NEXT: str x9, [x1] ; GISEL-NEXT: ret ; ; CHECK32-LABEL: preidx32sext64: @@ -886,9 +1000,10 @@ define ptr @preidx16sext32(ptr %src, ptr %out) { ; ; GISEL-LABEL: preidx16sext32: ; GISEL: ; %bb.0: -; GISEL-NEXT: ldrsh w8, [x0, #2] -; GISEL-NEXT: add x0, x0, #2 -; GISEL-NEXT: str w8, [x1] +; GISEL-NEXT: add x8, x0, #2 +; GISEL-NEXT: ldrsh w9, [x0, #2] +; GISEL-NEXT: mov x0, x8 +; GISEL-NEXT: str w9, [x1] ; GISEL-NEXT: ret ; ; CHECK32-LABEL: preidx16sext32: @@ -912,9 +1027,10 @@ define ptr @preidx16sext64(ptr %src, ptr %out) { ; ; GISEL-LABEL: preidx16sext64: ; GISEL: ; %bb.0: -; GISEL-NEXT: ldrsh x8, [x0, #2] -; GISEL-NEXT: add x0, x0, #2 -; GISEL-NEXT: str x8, [x1] +; GISEL-NEXT: add x8, x0, #2 +; GISEL-NEXT: ldrsh x9, [x0, #2] +; GISEL-NEXT: mov x0, x8 +; GISEL-NEXT: str x9, [x1] ; GISEL-NEXT: ret ; ; CHECK32-LABEL: preidx16sext64: @@ -938,9 +1054,10 @@ define ptr @preidx8sext32(ptr %src, ptr %out) { ; ; GISEL-LABEL: preidx8sext32: ; GISEL: ; %bb.0: -; GISEL-NEXT: ldrsb w8, [x0, #1] -; GISEL-NEXT: add x0, x0, #1 -; GISEL-NEXT: str w8, [x1] +; GISEL-NEXT: add x8, x0, #1 +; GISEL-NEXT: ldrsb w9, [x0, #1] +; GISEL-NEXT: mov x0, x8 +; GISEL-NEXT: str w9, [x1] ; GISEL-NEXT: ret ; ; CHECK32-LABEL: preidx8sext32: @@ -964,9 +1081,10 @@ define ptr @preidx8sext64(ptr %src, ptr %out) { ; ; GISEL-LABEL: preidx8sext64: ; GISEL: ; %bb.0: -; GISEL-NEXT: ldrsb x8, [x0, #1] -; GISEL-NEXT: add x0, x0, #1 -; GISEL-NEXT: str x8, [x1] +; GISEL-NEXT: add x8, x0, #1 +; GISEL-NEXT: ldrsb x9, [x0, #1] +; GISEL-NEXT: mov x0, x8 +; GISEL-NEXT: str x9, [x1] ; GISEL-NEXT: ret ; ; CHECK32-LABEL: preidx8sext64: diff --git a/llvm/test/CodeGen/AArch64/machine-sink-cache-invalidation.ll b/llvm/test/CodeGen/AArch64/machine-sink-cache-invalidation.ll index ce000021fb29b..ad6fdb6f1f9b9 100644 --- a/llvm/test/CodeGen/AArch64/machine-sink-cache-invalidation.ll +++ b/llvm/test/CodeGen/AArch64/machine-sink-cache-invalidation.ll @@ -22,7 +22,8 @@ define i32 @nsis_BZ2_bzDecompress(ptr %pos.i, i1 %cmp661.not3117.i, i1 %exitcond ; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: mov x9, xzr ; CHECK-NEXT: ldrb w9, [x9] -; CHECK-NEXT: strb wzr, [x0, x9] +; CHECK-NEXT: add x9, x0, x9 +; CHECK-NEXT: strb wzr, [x9] ; CHECK-NEXT: b .LBB0_1 ; CHECK-NEXT: .LBB0_4: // %for.end677.i ; CHECK-NEXT: mov w0, wzr diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold.ll b/llvm/test/CodeGen/AArch64/sink-and-fold.ll index 52007221e12a7..632fdb3910531 100644 --- a/llvm/test/CodeGen/AArch64/sink-and-fold.ll +++ b/llvm/test/CodeGen/AArch64/sink-and-fold.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s | FileCheck %s +; RUN: llc -aarch64-enable-sink-fold=true < %s | FileCheck %s target triple = "aarch64-linux" declare i32 @use(...) From 2ad9a658005e6a4204d7ee617c3949632a707aa5 Mon Sep 17 00:00:00 2001 From: DianQK Date: Mon, 16 Oct 2023 06:25:23 +0800 Subject: [PATCH 170/720] [LVI][CVP] Treat undef like a full range on abs(x, false) (#68711) Fixes #68682. --- .../Scalar/CorrelatedValuePropagation.cpp | 68 ++++----- .../CorrelatedValuePropagation/abs.ll | 133 +++++++++++++++++- 2 files changed, 160 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 48b27a1ea0a29..523196e5e6eab 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -470,17 +470,17 @@ static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI); // because it is negation-invariant. static bool processAbsIntrinsic(IntrinsicInst *II, LazyValueInfo *LVI) { Value *X = II->getArgOperand(0); - bool IsIntMinPoison = cast(II->getArgOperand(1))->isOne(); - Type *Ty = X->getType(); - Constant *IntMin = - ConstantInt::get(Ty, APInt::getSignedMinValue(Ty->getScalarSizeInBits())); - LazyValueInfo::Tristate Result; + if (!Ty->isIntegerTy()) + return false; + + bool IsIntMinPoison = cast(II->getArgOperand(1))->isOne(); + APInt IntMin = APInt::getSignedMinValue(Ty->getScalarSizeInBits()); + ConstantRange Range = LVI->getConstantRangeAtUse( + II->getOperandUse(0), /*UndefAllowed*/ IsIntMinPoison); // Is X in [0, IntMin]? NOTE: INT_MIN is fine! - Result = LVI->getPredicateAt(CmpInst::Predicate::ICMP_ULE, X, IntMin, II, - /*UseBlockValue=*/true); - if (Result == LazyValueInfo::True) { + if (Range.icmp(CmpInst::ICMP_ULE, IntMin)) { ++NumAbs; II->replaceAllUsesWith(X); II->eraseFromParent(); @@ -488,40 +488,30 @@ static bool processAbsIntrinsic(IntrinsicInst *II, LazyValueInfo *LVI) { } // Is X in [IntMin, 0]? NOTE: INT_MIN is fine! - Constant *Zero = ConstantInt::getNullValue(Ty); - Result = LVI->getPredicateAt(CmpInst::Predicate::ICMP_SLE, X, Zero, II, - /*UseBlockValue=*/true); - assert(Result != LazyValueInfo::False && "Should have been handled already."); - - if (Result == LazyValueInfo::Unknown) { - // Argument's range crosses zero. - bool Changed = false; - if (!IsIntMinPoison) { - // Can we at least tell that the argument is never INT_MIN? - Result = LVI->getPredicateAt(CmpInst::Predicate::ICMP_NE, X, IntMin, II, - /*UseBlockValue=*/true); - if (Result == LazyValueInfo::True) { - ++NumNSW; - ++NumSubNSW; - II->setArgOperand(1, ConstantInt::getTrue(II->getContext())); - Changed = true; - } - } - return Changed; - } + if (Range.getSignedMax().isNonPositive()) { + IRBuilder<> B(II); + Value *NegX = B.CreateNeg(X, II->getName(), /*HasNUW=*/false, + /*HasNSW=*/IsIntMinPoison); + ++NumAbs; + II->replaceAllUsesWith(NegX); + II->eraseFromParent(); - IRBuilder<> B(II); - Value *NegX = B.CreateNeg(X, II->getName(), /*HasNUW=*/false, - /*HasNSW=*/IsIntMinPoison); - ++NumAbs; - II->replaceAllUsesWith(NegX); - II->eraseFromParent(); + // See if we can infer some no-wrap flags. + if (auto *BO = dyn_cast(NegX)) + processBinOp(BO, LVI); - // See if we can infer some no-wrap flags. - if (auto *BO = dyn_cast(NegX)) - processBinOp(BO, LVI); + return true; + } - return true; + // Argument's range crosses zero. + // Can we at least tell that the argument is never INT_MIN? + if (!IsIntMinPoison && !Range.contains(IntMin)) { + ++NumNSW; + ++NumSubNSW; + II->setArgOperand(1, ConstantInt::getTrue(II->getContext())); + return true; + } + return false; } // See if this min/max intrinsic always picks it's one specific operand. diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/abs.ll b/llvm/test/Transforms/CorrelatedValuePropagation/abs.ll index 6231b05a851cb..7f10ce63e2fdc 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/abs.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/abs.ll @@ -2,6 +2,7 @@ ; RUN: opt < %s -passes=correlated-propagation -S | FileCheck %s declare void @llvm.assume(i1) +declare i32 @llvm.abs.i32(i32, i1) declare i8 @llvm.abs.i8(i8, i1) declare i1 @llvm.abs.i1(i1, i1) @@ -379,11 +380,139 @@ define i8 @test27(i8 %x) { define i1 @pr59887(i1 %x, i1 %c) { ; CHECK-LABEL: @pr59887( -; CHECK-NEXT: [[ABS:%.*]] = call i1 @llvm.abs.i1(i1 [[X:%.*]], i1 false) -; CHECK-NEXT: [[RES:%.*]] = select i1 [[C:%.*]], i1 [[ABS]], i1 false +; CHECK-NEXT: [[RES:%.*]] = select i1 [[C:%.*]], i1 [[X:%.*]], i1 false ; CHECK-NEXT: ret i1 [[RES]] ; %abs = call i1 @llvm.abs.i1(i1 %x, i1 false) %res = select i1 %c, i1 %abs, i1 false ret i1 %res } + +; Because of `undef`, We can't delete `abs`. +; We can't replace the `abs` argument with true either. +define i32 @pr68381_undef_abs_false(i1 %c0, i1 %c1, i8 %v1) { +; CHECK-LABEL: @pr68381_undef_abs_false( +; CHECK-NEXT: start: +; CHECK-NEXT: br i1 [[C0:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: bb0: +; CHECK-NEXT: [[V1_I32:%.*]] = zext i8 [[V1:%.*]] to i32 +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb1: +; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[V1_I32]], [[BB0]] ], [ undef, [[START:%.*]] ] +; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB0]], label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[Z:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) +; CHECK-NEXT: ret i32 [[Z]] +; +start: + br i1 %c0, label %bb0, label %bb1 + +bb0: + %v1_i32 = zext i8 %v1 to i32 + br label %bb1 + +bb1: + %x = phi i32 [ %v1_i32, %bb0 ], [ undef, %start ] + br i1 %c1, label %bb0, label %bb2 + +bb2: + %z = call i32 @llvm.abs.i32(i32 %x, i1 false) + ret i32 %z +} + +; Because of `and`, we can delete `abs`. +define i32 @pr68381_undef_abs_false_and(i1 %c0, i1 %c1, i8 %v1) { +; CHECK-LABEL: @pr68381_undef_abs_false_and( +; CHECK-NEXT: start: +; CHECK-NEXT: br i1 [[C0:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: bb0: +; CHECK-NEXT: [[V1_I32:%.*]] = zext i8 [[V1:%.*]] to i32 +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb1: +; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[V1_I32]], [[BB0]] ], [ undef, [[START:%.*]] ] +; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB0]], label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[Y:%.*]] = and i32 [[X]], 255 +; CHECK-NEXT: ret i32 [[Y]] +; +start: + br i1 %c0, label %bb0, label %bb1 + +bb0: + %v1_i32 = zext i8 %v1 to i32 + br label %bb1 + +bb1: + %x = phi i32 [ %v1_i32, %bb0 ], [ undef, %start ] + br i1 %c1, label %bb0, label %bb2 + +bb2: + %y = and i32 %x, 255 + %z = call i32 @llvm.abs.i32(i32 %y, i1 false) + ret i32 %z +} + +; Because of `undef`, we can't replace `abs` with `sub`. +define i32 @pr68381_undef_abs_false_sub(i1 %c0, i1 %c1, i32 %v1, i32 %v2) { +; CHECK-LABEL: @pr68381_undef_abs_false_sub( +; CHECK-NEXT: start: +; CHECK-NEXT: br i1 [[C0:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: bb0: +; CHECK-NEXT: [[V3:%.*]] = add i32 [[V1:%.*]], [[V2:%.*]] +; CHECK-NEXT: [[LIM:%.*]] = icmp sle i32 [[V3]], -1 +; CHECK-NEXT: call void @llvm.assume(i1 [[LIM]]) +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb1: +; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[V3]], [[BB0]] ], [ undef, [[START:%.*]] ] +; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB0]], label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[Z:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) +; CHECK-NEXT: ret i32 [[Z]] +; +start: + br i1 %c0, label %bb0, label %bb1 + +bb0: + %v3 = add i32 %v1, %v2 + %lim = icmp sle i32 %v3, -1 + call void @llvm.assume(i1 %lim) + br label %bb1 + +bb1: + %x = phi i32 [ %v3, %bb0 ], [ undef, %start ] + br i1 %c1, label %bb0, label %bb2 + +bb2: + %z = call i32 @llvm.abs.i32(i32 %x, i1 false) + ret i32 %z +} + +; We can delete `abs`. +define i32 @pr68381_undef_abs_true(i1 %c0, i1 %c1, i8 %v1) { +; CHECK-LABEL: @pr68381_undef_abs_true( +; CHECK-NEXT: start: +; CHECK-NEXT: br i1 [[C0:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: bb0: +; CHECK-NEXT: [[V1_I32:%.*]] = zext i8 [[V1:%.*]] to i32 +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb1: +; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[V1_I32]], [[BB0]] ], [ undef, [[START:%.*]] ] +; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB0]], label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: ret i32 [[X]] +; +start: + br i1 %c0, label %bb0, label %bb1 + +bb0: + %v1_i32 = zext i8 %v1 to i32 + br label %bb1 + +bb1: + %x = phi i32 [ %v1_i32, %bb0 ], [ undef, %start ] + br i1 %c1, label %bb0, label %bb2 + +bb2: + %z = call i32 @llvm.abs.i32(i32 %x, i1 true) + ret i32 %z +} From 6f46bcc609f14121e6942763ba9871f98541ea0e Mon Sep 17 00:00:00 2001 From: Jared Grubb Date: Mon, 2 Oct 2023 16:31:03 -0700 Subject: [PATCH 171/720] [clang-format] Treat AttributeMacro more like __attribute__ There are two parts to this fix: - Annotate the paren after an AttributeMacro as an AttributeLParen. - Treat an AttributeMacro-without-paren the same as one with a paren. I added a new test-case to differentiate a macro that is or is-not an AttributeMacro; also handled whether ColumnLimit is set to infinite (0) or a finite value, as part of this patch is in ContinuationIndenter. Closes #68722. Differential Revision: https://reviews.llvm.org/D145262 --- clang/lib/Format/ContinuationIndenter.cpp | 5 +- clang/lib/Format/TokenAnnotator.cpp | 8 +- clang/unittests/Format/FormatTestObjC.cpp | 214 +++++++++++++++++- clang/unittests/Format/TokenAnnotatorTest.cpp | 110 +++++++++ 4 files changed, 332 insertions(+), 5 deletions(-) diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 91ce825224d7f..928c30364bfcf 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -1336,8 +1336,9 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { (PreviousNonComment->ClosesTemplateDeclaration || PreviousNonComment->ClosesRequiresClause || PreviousNonComment->isOneOf( - TT_AttributeRParen, TT_AttributeSquare, TT_FunctionAnnotationRParen, - TT_JavaAnnotation, TT_LeadingJavaAnnotation))) || + TT_AttributeRParen, TT_AttributeMacro, TT_AttributeSquare, + TT_FunctionAnnotationRParen, TT_JavaAnnotation, + TT_LeadingJavaAnnotation))) || (!Style.IndentWrappedFunctionNames && NextNonComment->isOneOf(tok::kw_operator, TT_FunctionDeclarationName))) { return std::max(CurrentState.LastSpace, CurrentState.Indent); diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 543c119620bf2..0c642594053fa 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -4709,7 +4709,9 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Line.Type == LT_ObjCMethodDecl) { if (Left.is(TT_ObjCMethodSpecifier)) return true; - if (Left.is(tok::r_paren) && canBeObjCSelectorComponent(Right)) { + // Apply this logic for parens that are not function attribute macros. + if (Left.is(tok::r_paren) && Left.isNot(TT_AttributeRParen) && + canBeObjCSelectorComponent(Right)) { // Don't space between ')' and or ')' and 'new'. 'new' is not a // keyword in Objective-C, and '+ (instancetype)new;' is a standard class // method declaration. @@ -5222,8 +5224,10 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, } // Ensure wrapping after __attribute__((XX)) and @interface etc. - if (Left.is(TT_AttributeRParen) && Right.is(TT_ObjCDecl)) + if (Left.isOneOf(TT_AttributeRParen, TT_AttributeMacro) && + Right.is(TT_ObjCDecl)) { return true; + } if (Left.is(TT_LambdaLBrace)) { if (IsFunctionArgument(Left) && diff --git a/clang/unittests/Format/FormatTestObjC.cpp b/clang/unittests/Format/FormatTestObjC.cpp index a9e5434dfabfb..84a3d240055ff 100644 --- a/clang/unittests/Format/FormatTestObjC.cpp +++ b/clang/unittests/Format/FormatTestObjC.cpp @@ -1527,7 +1527,10 @@ TEST_F(FormatTestObjC, IfNotUnlikely) { " [obj func:arg2];"); } -TEST_F(FormatTestObjC, Attributes) { +TEST_F(FormatTestObjC, AttributesOnObjCDecl) { + Style.AttributeMacros.push_back("ATTRIBUTE_MACRO"); + + // Check '__attribute__' macro directly. verifyFormat("__attribute__((objc_subclassing_restricted))\n" "@interface Foo\n" "@end"); @@ -1537,6 +1540,215 @@ TEST_F(FormatTestObjC, Attributes) { verifyFormat("__attribute__((objc_subclassing_restricted))\n" "@implementation Foo\n" "@end"); + + // Check AttributeMacro gets treated the same, with or without parentheses. + verifyFormat("ATTRIBUTE_MACRO\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO(X)\n" + "@interface Foo\n" + "@end"); + + // Indenter also needs to understand multiple attribute macros. + // Try each of the three kinds paired with each of the other kind. + + // Column limit, but no reflow. + verifyFormat("ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO ATTRIBUTE_MACRO(X)\n" + "@interface Foo\n" + "@end"); + verifyFormat("__attribute__((X)) ATTRIBUTE_MACRO\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO __attribute__((X))\n" + "@interface Foo\n" + "@end"); + verifyFormat("__attribute__((X)) ATTRIBUTE_MACRO(X)\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO(X) __attribute__((X))\n" + "@interface Foo\n" + "@end"); + + // Column limit that requires reflow. + Style.ColumnLimit = 30; + verifyFormat("ATTRIBUTE_MACRO(X)\n" + "ATTRIBUTE_MACRO\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO\n" + "ATTRIBUTE_MACRO(X)\n" + "@interface Foo\n" + "@end"); + verifyFormat("__attribute__((X))\n" + "ATTRIBUTE_MACRO\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO\n" + "__attribute__((X))\n" + "@interface Foo\n" + "@end"); + verifyFormat("__attribute__((X))\n" + "ATTRIBUTE_MACRO(X)\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO(X)\n" + "__attribute__((X))\n" + "@interface Foo\n" + "@end"); + + // No column limit + Style.ColumnLimit = 0; + verifyFormat("ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO ATTRIBUTE_MACRO(X)\n" + "@interface Foo\n" + "@end"); + verifyFormat("__attribute__((X)) ATTRIBUTE_MACRO\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO __attribute__((X))\n" + "@interface Foo\n" + "@end"); + verifyFormat("__attribute__((X)) ATTRIBUTE_MACRO(X)\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO(X) __attribute__((X))\n" + "@interface Foo\n" + "@end"); +} + +TEST_F(FormatTestObjC, AttributesOnObjCMethodDecl) { + Style.AttributeMacros.push_back("ATTRIBUTE_MACRO"); + + // Check '__attribute__' macro directly. + verifyFormat("- (id)init __attribute__((objc_designated_initializer));"); + + // Check AttributeMacro gets treated the same, with or without parentheses. + verifyFormat("- (id)init ATTRIBUTE_MACRO;"); + verifyFormat("- (id)init ATTRIBUTE_MACRO(X);"); + + // Indenter also needs to understand multiple attribute macros. + + // Column limit (default), but no reflow. + verifyFormat("- (id)init ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO;"); + verifyFormat("- (id)init ATTRIBUTE_MACRO ATTRIBUTE_MACRO(X);"); + verifyFormat("- (id)init __attribute__((X)) ATTRIBUTE_MACRO;"); + verifyFormat("- (id)init ATTRIBUTE_MACRO __attribute__((X));"); + verifyFormat("- (id)init __attribute__((X)) ATTRIBUTE_MACRO(X);"); + verifyFormat("- (id)init ATTRIBUTE_MACRO(X) __attribute__((X));"); + + // Column limit that requires reflow. + Style.ColumnLimit = 30; + + // Reflow after method name. + verifyFormat("- (id)initWithReallyLongName\n" + " __attribute__((X))\n" + " ATTRIBUTE_MACRO;"); + verifyFormat("- (id)initWithReallyLongName\n" + " ATTRIBUTE_MACRO(X)\n" + " ATTRIBUTE_MACRO;"); + verifyFormat("- (id)initWithReallyLongName\n" + " ATTRIBUTE_MACRO\n" + " ATTRIBUTE_MACRO;"); + // Reflow after first macro. + // FIXME: these should indent but don't. +#if 0 + verifyFormat("- (id)init ATTRIBUTE_MACRO(X)\n" + " ATTRIBUTE_MACRO;"); + verifyFormat("- (id)init ATTRIBUTE_MACRO\n" + " ATTRIBUTE_MACRO(X);"); + verifyFormat("- (id)init __attribute__((X))\n" + " ATTRIBUTE_MACRO;"); + verifyFormat("- (id)init ATTRIBUTE_MACRO\n" + " __attribute__((X));"); + verifyFormat("- (id)init __attribute__((X))\n" + " ATTRIBUTE_MACRO(X);"); + verifyFormat("- (id)init ATTRIBUTE_MACRO(X)\n" + " __attribute__((X));"); +#endif + + // No column limit. + Style.ColumnLimit = 0; + verifyFormat("- (id)init ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO;"); + verifyFormat("- (id)init ATTRIBUTE_MACRO ATTRIBUTE_MACRO(X);"); + verifyFormat("- (id)init __attribute__((X)) ATTRIBUTE_MACRO;"); + verifyFormat("- (id)init ATTRIBUTE_MACRO __attribute__((X));"); + verifyFormat("- (id)init __attribute__((X)) ATTRIBUTE_MACRO(X);"); + verifyFormat("- (id)init ATTRIBUTE_MACRO(X) __attribute__((X));"); +} + +TEST_F(FormatTestObjC, AttributesOnObjCProperty) { + Style.AttributeMacros.push_back("ATTRIBUTE_MACRO"); + + // Check '__attribute__' macro directly. + verifyFormat("@property(weak) id delegate " + "__attribute__((objc_designated_initializer));"); + + // Check AttributeMacro gets treated the same, with or without parentheses. + verifyFormat("@property(weak) id delegate ATTRIBUTE_MACRO;"); + verifyFormat("@property(weak) id delegate ATTRIBUTE_MACRO(X);"); + + // Indenter also needs to understand multiple attribute macros. + + // Column limit (default), but no reflow. + verifyFormat( + "@property(weak) id delegate ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO;"); + verifyFormat( + "@property(weak) id delegate ATTRIBUTE_MACRO ATTRIBUTE_MACRO(X);"); + verifyFormat( + "@property(weak) id delegate __attribute__((X)) ATTRIBUTE_MACRO;"); + verifyFormat( + "@property(weak) id delegate ATTRIBUTE_MACRO __attribute__((X));"); + verifyFormat( + "@property(weak) id delegate __attribute__((X)) ATTRIBUTE_MACRO(X);"); + verifyFormat( + "@property(weak) id delegate ATTRIBUTE_MACRO(X) __attribute__((X));"); + + // Column limit that requires reflow. + Style.ColumnLimit = 50; + + // Reflow after method name. + verifyFormat("@property(weak) id delegateWithLongName\n" + " __attribute__((X)) ATTRIBUTE_MACRO;"); + verifyFormat("@property(weak) id delegateWithLongName\n" + " ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO;"); + verifyFormat("@property(weak) id delegateWithLongName\n" + " ATTRIBUTE_MACRO ATTRIBUTE_MACRO;"); + // Reflow after first macro. + // FIXME: these should indent but don't. +#if 0 + verifyFormat("@property(weak) id delegate ATTRIBUTE_MACRO(X)\n" + " ATTRIBUTE_MACRO;"); + verifyFormat("@property(weak) id delegate ATTRIBUTE_MACRO\n" + " ATTRIBUTE_MACRO(X);"); + verifyFormat("@property(weak) id delegate __attribute__((X))\n" + " ATTRIBUTE_MACRO;"); + verifyFormat("@property(weak) id delegate ATTRIBUTE_MACRO\n" + " __attribute__((X));"); + verifyFormat("@property(weak) id delegate __attribute__((X))\n" + " ATTRIBUTE_MACRO(X);"); + verifyFormat("@property(weak) id delegate ATTRIBUTE_MACRO(X)\n" + " __attribute__((X));"); +#endif + + // No column limit. + Style.ColumnLimit = 0; + verifyFormat( + "@property(weak) id delegate ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO;"); + verifyFormat( + "@property(weak) id delegate ATTRIBUTE_MACRO ATTRIBUTE_MACRO(X);"); + verifyFormat( + "@property(weak) id delegate __attribute__((X)) ATTRIBUTE_MACRO;"); + verifyFormat( + "@property(weak) id delegate ATTRIBUTE_MACRO __attribute__((X));"); + verifyFormat( + "@property(weak) id delegate __attribute__((X)) ATTRIBUTE_MACRO(X);"); + verifyFormat( + "@property(weak) id delegate ATTRIBUTE_MACRO(X) __attribute__((X));"); } } // end namespace diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index b6d4cf166de02..e5cc3ed3686b3 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -1795,6 +1795,116 @@ TEST_F(TokenAnnotatorTest, UnderstandsTrailingReturnArrow) { EXPECT_TOKEN(Tokens[13], tok::arrow, TT_Unknown); } +TEST_F(TokenAnnotatorTest, UnderstandsAttributeMacros) { + // '__attribute__' has special handling. + auto Tokens = annotate("__attribute__(X) void Foo(void);"); + ASSERT_EQ(Tokens.size(), 11u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::kw___attribute, TT_Unknown); + EXPECT_TOKEN(Tokens[1], tok::l_paren, TT_AttributeLParen); + EXPECT_TOKEN(Tokens[3], tok::r_paren, TT_AttributeRParen); + + // Generic macro has no special handling in this location. + Tokens = annotate("A(X) void Foo(void);"); + ASSERT_EQ(Tokens.size(), 11u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::identifier, TT_Unknown); + EXPECT_TOKEN(Tokens[1], tok::l_paren, TT_Unknown); + + // Add a custom AttributeMacro. Test that it has the same behavior. + FormatStyle Style = getLLVMStyle(); + Style.AttributeMacros.push_back("A"); + + // An "AttributeMacro" gets annotated like '__attribute__'. + Tokens = annotate("A(X) void Foo(void);", Style); + ASSERT_EQ(Tokens.size(), 11u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::identifier, TT_AttributeMacro); + EXPECT_TOKEN(Tokens[1], tok::l_paren, TT_AttributeLParen); + EXPECT_TOKEN(Tokens[3], tok::r_paren, TT_AttributeRParen); +} + +TEST_F(TokenAnnotatorTest, UnderstandsAttributeMacrosOnObjCDecl) { + // '__attribute__' has special handling. + auto Tokens = annotate("__attribute__(X) @interface Foo"); + ASSERT_EQ(Tokens.size(), 8u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::kw___attribute, TT_Unknown); + EXPECT_TOKEN(Tokens[1], tok::l_paren, TT_AttributeLParen); + EXPECT_TOKEN(Tokens[3], tok::r_paren, TT_AttributeRParen); + + // Generic macro has no special handling in this location. + Tokens = annotate("A(X) @interface Foo"); + ASSERT_EQ(Tokens.size(), 8u) << Tokens; + // Note: Don't check token-type as a random token in this position is hard to + // reason about. + EXPECT_TOKEN_KIND(Tokens[0], tok::identifier); + EXPECT_TOKEN_KIND(Tokens[1], tok::l_paren); + + // Add a custom AttributeMacro. Test that it has the same behavior. + FormatStyle Style = getLLVMStyle(); + Style.AttributeMacros.push_back("A"); + + // An "AttributeMacro" gets annotated like '__attribute__'. + Tokens = annotate("A(X) @interface Foo", Style); + ASSERT_EQ(Tokens.size(), 8u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::identifier, TT_AttributeMacro); + EXPECT_TOKEN(Tokens[1], tok::l_paren, TT_AttributeLParen); + EXPECT_TOKEN(Tokens[3], tok::r_paren, TT_AttributeRParen); +} + +TEST_F(TokenAnnotatorTest, UnderstandsAttributeMacrosOnObjCMethodDecl) { + // '__attribute__' has special handling. + auto Tokens = annotate("- (id)init __attribute__(X);"); + ASSERT_EQ(Tokens.size(), 11u) << Tokens; + EXPECT_TOKEN(Tokens[5], tok::kw___attribute, TT_Unknown); + EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_AttributeLParen); + EXPECT_TOKEN(Tokens[8], tok::r_paren, TT_AttributeRParen); + + // Generic macro has no special handling in this location. + Tokens = annotate("- (id)init A(X);"); + ASSERT_EQ(Tokens.size(), 11u) << Tokens; + // Note: Don't check token-type as a random token in this position is hard to + // reason about. + EXPECT_TOKEN_KIND(Tokens[5], tok::identifier); + EXPECT_TOKEN_KIND(Tokens[6], tok::l_paren); + + // Add a custom AttributeMacro. Test that it has the same behavior. + FormatStyle Style = getLLVMStyle(); + Style.AttributeMacros.push_back("A"); + + // An "AttributeMacro" gets annotated like '__attribute__'. + Tokens = annotate("- (id)init A(X);", Style); + ASSERT_EQ(Tokens.size(), 11u) << Tokens; + EXPECT_TOKEN(Tokens[5], tok::identifier, TT_AttributeMacro); + EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_AttributeLParen); + EXPECT_TOKEN(Tokens[8], tok::r_paren, TT_AttributeRParen); +} + +TEST_F(TokenAnnotatorTest, UnderstandsAttributeMacrosOnObjCProperty) { + // '__attribute__' has special handling. + auto Tokens = annotate("@property(weak) id delegate __attribute__(X);"); + ASSERT_EQ(Tokens.size(), 13u) << Tokens; + EXPECT_TOKEN(Tokens[7], tok::kw___attribute, TT_Unknown); + EXPECT_TOKEN(Tokens[8], tok::l_paren, TT_AttributeLParen); + EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_AttributeRParen); + + // Generic macro has no special handling in this location. + Tokens = annotate("@property(weak) id delegate A(X);"); + ASSERT_EQ(Tokens.size(), 13u) << Tokens; + // Note: Don't check token-type as a random token in this position is hard to + // reason about. + EXPECT_TOKEN_KIND(Tokens[7], tok::identifier); + EXPECT_TOKEN_KIND(Tokens[8], tok::l_paren); + + // Add a custom AttributeMacro. Test that it has the same behavior. + FormatStyle Style = getLLVMStyle(); + Style.AttributeMacros.push_back("A"); + + // An "AttributeMacro" gets annotated like '__attribute__'. + Tokens = annotate("@property(weak) id delegate A(X);", Style); + ASSERT_EQ(Tokens.size(), 13u) << Tokens; + EXPECT_TOKEN(Tokens[7], tok::identifier, TT_AttributeMacro); + EXPECT_TOKEN(Tokens[8], tok::l_paren, TT_AttributeLParen); + EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_AttributeRParen); +} + TEST_F(TokenAnnotatorTest, UnderstandsVerilogOperators) { auto Annotate = [this](llvm::StringRef Code) { return annotate(Code, getLLVMStyle(FormatStyle::LK_Verilog)); From 6c7cf74a75572c3cc5d9979f02b67a7357e9c656 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Sun, 15 Oct 2023 15:52:17 -0700 Subject: [PATCH 172/720] Revert "[clang-format] Treat AttributeMacro more like __attribute__" This reverts commit 6f46bcc609f14121e6942763ba9871f98541ea0e. --- clang/lib/Format/ContinuationIndenter.cpp | 5 +- clang/lib/Format/TokenAnnotator.cpp | 8 +- clang/unittests/Format/FormatTestObjC.cpp | 214 +----------------- clang/unittests/Format/TokenAnnotatorTest.cpp | 110 --------- 4 files changed, 5 insertions(+), 332 deletions(-) diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 928c30364bfcf..91ce825224d7f 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -1336,9 +1336,8 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { (PreviousNonComment->ClosesTemplateDeclaration || PreviousNonComment->ClosesRequiresClause || PreviousNonComment->isOneOf( - TT_AttributeRParen, TT_AttributeMacro, TT_AttributeSquare, - TT_FunctionAnnotationRParen, TT_JavaAnnotation, - TT_LeadingJavaAnnotation))) || + TT_AttributeRParen, TT_AttributeSquare, TT_FunctionAnnotationRParen, + TT_JavaAnnotation, TT_LeadingJavaAnnotation))) || (!Style.IndentWrappedFunctionNames && NextNonComment->isOneOf(tok::kw_operator, TT_FunctionDeclarationName))) { return std::max(CurrentState.LastSpace, CurrentState.Indent); diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 0c642594053fa..543c119620bf2 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -4709,9 +4709,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Line.Type == LT_ObjCMethodDecl) { if (Left.is(TT_ObjCMethodSpecifier)) return true; - // Apply this logic for parens that are not function attribute macros. - if (Left.is(tok::r_paren) && Left.isNot(TT_AttributeRParen) && - canBeObjCSelectorComponent(Right)) { + if (Left.is(tok::r_paren) && canBeObjCSelectorComponent(Right)) { // Don't space between ')' and or ')' and 'new'. 'new' is not a // keyword in Objective-C, and '+ (instancetype)new;' is a standard class // method declaration. @@ -5224,10 +5222,8 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, } // Ensure wrapping after __attribute__((XX)) and @interface etc. - if (Left.isOneOf(TT_AttributeRParen, TT_AttributeMacro) && - Right.is(TT_ObjCDecl)) { + if (Left.is(TT_AttributeRParen) && Right.is(TT_ObjCDecl)) return true; - } if (Left.is(TT_LambdaLBrace)) { if (IsFunctionArgument(Left) && diff --git a/clang/unittests/Format/FormatTestObjC.cpp b/clang/unittests/Format/FormatTestObjC.cpp index 84a3d240055ff..a9e5434dfabfb 100644 --- a/clang/unittests/Format/FormatTestObjC.cpp +++ b/clang/unittests/Format/FormatTestObjC.cpp @@ -1527,10 +1527,7 @@ TEST_F(FormatTestObjC, IfNotUnlikely) { " [obj func:arg2];"); } -TEST_F(FormatTestObjC, AttributesOnObjCDecl) { - Style.AttributeMacros.push_back("ATTRIBUTE_MACRO"); - - // Check '__attribute__' macro directly. +TEST_F(FormatTestObjC, Attributes) { verifyFormat("__attribute__((objc_subclassing_restricted))\n" "@interface Foo\n" "@end"); @@ -1540,215 +1537,6 @@ TEST_F(FormatTestObjC, AttributesOnObjCDecl) { verifyFormat("__attribute__((objc_subclassing_restricted))\n" "@implementation Foo\n" "@end"); - - // Check AttributeMacro gets treated the same, with or without parentheses. - verifyFormat("ATTRIBUTE_MACRO\n" - "@interface Foo\n" - "@end"); - verifyFormat("ATTRIBUTE_MACRO(X)\n" - "@interface Foo\n" - "@end"); - - // Indenter also needs to understand multiple attribute macros. - // Try each of the three kinds paired with each of the other kind. - - // Column limit, but no reflow. - verifyFormat("ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO\n" - "@interface Foo\n" - "@end"); - verifyFormat("ATTRIBUTE_MACRO ATTRIBUTE_MACRO(X)\n" - "@interface Foo\n" - "@end"); - verifyFormat("__attribute__((X)) ATTRIBUTE_MACRO\n" - "@interface Foo\n" - "@end"); - verifyFormat("ATTRIBUTE_MACRO __attribute__((X))\n" - "@interface Foo\n" - "@end"); - verifyFormat("__attribute__((X)) ATTRIBUTE_MACRO(X)\n" - "@interface Foo\n" - "@end"); - verifyFormat("ATTRIBUTE_MACRO(X) __attribute__((X))\n" - "@interface Foo\n" - "@end"); - - // Column limit that requires reflow. - Style.ColumnLimit = 30; - verifyFormat("ATTRIBUTE_MACRO(X)\n" - "ATTRIBUTE_MACRO\n" - "@interface Foo\n" - "@end"); - verifyFormat("ATTRIBUTE_MACRO\n" - "ATTRIBUTE_MACRO(X)\n" - "@interface Foo\n" - "@end"); - verifyFormat("__attribute__((X))\n" - "ATTRIBUTE_MACRO\n" - "@interface Foo\n" - "@end"); - verifyFormat("ATTRIBUTE_MACRO\n" - "__attribute__((X))\n" - "@interface Foo\n" - "@end"); - verifyFormat("__attribute__((X))\n" - "ATTRIBUTE_MACRO(X)\n" - "@interface Foo\n" - "@end"); - verifyFormat("ATTRIBUTE_MACRO(X)\n" - "__attribute__((X))\n" - "@interface Foo\n" - "@end"); - - // No column limit - Style.ColumnLimit = 0; - verifyFormat("ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO\n" - "@interface Foo\n" - "@end"); - verifyFormat("ATTRIBUTE_MACRO ATTRIBUTE_MACRO(X)\n" - "@interface Foo\n" - "@end"); - verifyFormat("__attribute__((X)) ATTRIBUTE_MACRO\n" - "@interface Foo\n" - "@end"); - verifyFormat("ATTRIBUTE_MACRO __attribute__((X))\n" - "@interface Foo\n" - "@end"); - verifyFormat("__attribute__((X)) ATTRIBUTE_MACRO(X)\n" - "@interface Foo\n" - "@end"); - verifyFormat("ATTRIBUTE_MACRO(X) __attribute__((X))\n" - "@interface Foo\n" - "@end"); -} - -TEST_F(FormatTestObjC, AttributesOnObjCMethodDecl) { - Style.AttributeMacros.push_back("ATTRIBUTE_MACRO"); - - // Check '__attribute__' macro directly. - verifyFormat("- (id)init __attribute__((objc_designated_initializer));"); - - // Check AttributeMacro gets treated the same, with or without parentheses. - verifyFormat("- (id)init ATTRIBUTE_MACRO;"); - verifyFormat("- (id)init ATTRIBUTE_MACRO(X);"); - - // Indenter also needs to understand multiple attribute macros. - - // Column limit (default), but no reflow. - verifyFormat("- (id)init ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO;"); - verifyFormat("- (id)init ATTRIBUTE_MACRO ATTRIBUTE_MACRO(X);"); - verifyFormat("- (id)init __attribute__((X)) ATTRIBUTE_MACRO;"); - verifyFormat("- (id)init ATTRIBUTE_MACRO __attribute__((X));"); - verifyFormat("- (id)init __attribute__((X)) ATTRIBUTE_MACRO(X);"); - verifyFormat("- (id)init ATTRIBUTE_MACRO(X) __attribute__((X));"); - - // Column limit that requires reflow. - Style.ColumnLimit = 30; - - // Reflow after method name. - verifyFormat("- (id)initWithReallyLongName\n" - " __attribute__((X))\n" - " ATTRIBUTE_MACRO;"); - verifyFormat("- (id)initWithReallyLongName\n" - " ATTRIBUTE_MACRO(X)\n" - " ATTRIBUTE_MACRO;"); - verifyFormat("- (id)initWithReallyLongName\n" - " ATTRIBUTE_MACRO\n" - " ATTRIBUTE_MACRO;"); - // Reflow after first macro. - // FIXME: these should indent but don't. -#if 0 - verifyFormat("- (id)init ATTRIBUTE_MACRO(X)\n" - " ATTRIBUTE_MACRO;"); - verifyFormat("- (id)init ATTRIBUTE_MACRO\n" - " ATTRIBUTE_MACRO(X);"); - verifyFormat("- (id)init __attribute__((X))\n" - " ATTRIBUTE_MACRO;"); - verifyFormat("- (id)init ATTRIBUTE_MACRO\n" - " __attribute__((X));"); - verifyFormat("- (id)init __attribute__((X))\n" - " ATTRIBUTE_MACRO(X);"); - verifyFormat("- (id)init ATTRIBUTE_MACRO(X)\n" - " __attribute__((X));"); -#endif - - // No column limit. - Style.ColumnLimit = 0; - verifyFormat("- (id)init ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO;"); - verifyFormat("- (id)init ATTRIBUTE_MACRO ATTRIBUTE_MACRO(X);"); - verifyFormat("- (id)init __attribute__((X)) ATTRIBUTE_MACRO;"); - verifyFormat("- (id)init ATTRIBUTE_MACRO __attribute__((X));"); - verifyFormat("- (id)init __attribute__((X)) ATTRIBUTE_MACRO(X);"); - verifyFormat("- (id)init ATTRIBUTE_MACRO(X) __attribute__((X));"); -} - -TEST_F(FormatTestObjC, AttributesOnObjCProperty) { - Style.AttributeMacros.push_back("ATTRIBUTE_MACRO"); - - // Check '__attribute__' macro directly. - verifyFormat("@property(weak) id delegate " - "__attribute__((objc_designated_initializer));"); - - // Check AttributeMacro gets treated the same, with or without parentheses. - verifyFormat("@property(weak) id delegate ATTRIBUTE_MACRO;"); - verifyFormat("@property(weak) id delegate ATTRIBUTE_MACRO(X);"); - - // Indenter also needs to understand multiple attribute macros. - - // Column limit (default), but no reflow. - verifyFormat( - "@property(weak) id delegate ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO;"); - verifyFormat( - "@property(weak) id delegate ATTRIBUTE_MACRO ATTRIBUTE_MACRO(X);"); - verifyFormat( - "@property(weak) id delegate __attribute__((X)) ATTRIBUTE_MACRO;"); - verifyFormat( - "@property(weak) id delegate ATTRIBUTE_MACRO __attribute__((X));"); - verifyFormat( - "@property(weak) id delegate __attribute__((X)) ATTRIBUTE_MACRO(X);"); - verifyFormat( - "@property(weak) id delegate ATTRIBUTE_MACRO(X) __attribute__((X));"); - - // Column limit that requires reflow. - Style.ColumnLimit = 50; - - // Reflow after method name. - verifyFormat("@property(weak) id delegateWithLongName\n" - " __attribute__((X)) ATTRIBUTE_MACRO;"); - verifyFormat("@property(weak) id delegateWithLongName\n" - " ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO;"); - verifyFormat("@property(weak) id delegateWithLongName\n" - " ATTRIBUTE_MACRO ATTRIBUTE_MACRO;"); - // Reflow after first macro. - // FIXME: these should indent but don't. -#if 0 - verifyFormat("@property(weak) id delegate ATTRIBUTE_MACRO(X)\n" - " ATTRIBUTE_MACRO;"); - verifyFormat("@property(weak) id delegate ATTRIBUTE_MACRO\n" - " ATTRIBUTE_MACRO(X);"); - verifyFormat("@property(weak) id delegate __attribute__((X))\n" - " ATTRIBUTE_MACRO;"); - verifyFormat("@property(weak) id delegate ATTRIBUTE_MACRO\n" - " __attribute__((X));"); - verifyFormat("@property(weak) id delegate __attribute__((X))\n" - " ATTRIBUTE_MACRO(X);"); - verifyFormat("@property(weak) id delegate ATTRIBUTE_MACRO(X)\n" - " __attribute__((X));"); -#endif - - // No column limit. - Style.ColumnLimit = 0; - verifyFormat( - "@property(weak) id delegate ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO;"); - verifyFormat( - "@property(weak) id delegate ATTRIBUTE_MACRO ATTRIBUTE_MACRO(X);"); - verifyFormat( - "@property(weak) id delegate __attribute__((X)) ATTRIBUTE_MACRO;"); - verifyFormat( - "@property(weak) id delegate ATTRIBUTE_MACRO __attribute__((X));"); - verifyFormat( - "@property(weak) id delegate __attribute__((X)) ATTRIBUTE_MACRO(X);"); - verifyFormat( - "@property(weak) id delegate ATTRIBUTE_MACRO(X) __attribute__((X));"); } } // end namespace diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index e5cc3ed3686b3..b6d4cf166de02 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -1795,116 +1795,6 @@ TEST_F(TokenAnnotatorTest, UnderstandsTrailingReturnArrow) { EXPECT_TOKEN(Tokens[13], tok::arrow, TT_Unknown); } -TEST_F(TokenAnnotatorTest, UnderstandsAttributeMacros) { - // '__attribute__' has special handling. - auto Tokens = annotate("__attribute__(X) void Foo(void);"); - ASSERT_EQ(Tokens.size(), 11u) << Tokens; - EXPECT_TOKEN(Tokens[0], tok::kw___attribute, TT_Unknown); - EXPECT_TOKEN(Tokens[1], tok::l_paren, TT_AttributeLParen); - EXPECT_TOKEN(Tokens[3], tok::r_paren, TT_AttributeRParen); - - // Generic macro has no special handling in this location. - Tokens = annotate("A(X) void Foo(void);"); - ASSERT_EQ(Tokens.size(), 11u) << Tokens; - EXPECT_TOKEN(Tokens[0], tok::identifier, TT_Unknown); - EXPECT_TOKEN(Tokens[1], tok::l_paren, TT_Unknown); - - // Add a custom AttributeMacro. Test that it has the same behavior. - FormatStyle Style = getLLVMStyle(); - Style.AttributeMacros.push_back("A"); - - // An "AttributeMacro" gets annotated like '__attribute__'. - Tokens = annotate("A(X) void Foo(void);", Style); - ASSERT_EQ(Tokens.size(), 11u) << Tokens; - EXPECT_TOKEN(Tokens[0], tok::identifier, TT_AttributeMacro); - EXPECT_TOKEN(Tokens[1], tok::l_paren, TT_AttributeLParen); - EXPECT_TOKEN(Tokens[3], tok::r_paren, TT_AttributeRParen); -} - -TEST_F(TokenAnnotatorTest, UnderstandsAttributeMacrosOnObjCDecl) { - // '__attribute__' has special handling. - auto Tokens = annotate("__attribute__(X) @interface Foo"); - ASSERT_EQ(Tokens.size(), 8u) << Tokens; - EXPECT_TOKEN(Tokens[0], tok::kw___attribute, TT_Unknown); - EXPECT_TOKEN(Tokens[1], tok::l_paren, TT_AttributeLParen); - EXPECT_TOKEN(Tokens[3], tok::r_paren, TT_AttributeRParen); - - // Generic macro has no special handling in this location. - Tokens = annotate("A(X) @interface Foo"); - ASSERT_EQ(Tokens.size(), 8u) << Tokens; - // Note: Don't check token-type as a random token in this position is hard to - // reason about. - EXPECT_TOKEN_KIND(Tokens[0], tok::identifier); - EXPECT_TOKEN_KIND(Tokens[1], tok::l_paren); - - // Add a custom AttributeMacro. Test that it has the same behavior. - FormatStyle Style = getLLVMStyle(); - Style.AttributeMacros.push_back("A"); - - // An "AttributeMacro" gets annotated like '__attribute__'. - Tokens = annotate("A(X) @interface Foo", Style); - ASSERT_EQ(Tokens.size(), 8u) << Tokens; - EXPECT_TOKEN(Tokens[0], tok::identifier, TT_AttributeMacro); - EXPECT_TOKEN(Tokens[1], tok::l_paren, TT_AttributeLParen); - EXPECT_TOKEN(Tokens[3], tok::r_paren, TT_AttributeRParen); -} - -TEST_F(TokenAnnotatorTest, UnderstandsAttributeMacrosOnObjCMethodDecl) { - // '__attribute__' has special handling. - auto Tokens = annotate("- (id)init __attribute__(X);"); - ASSERT_EQ(Tokens.size(), 11u) << Tokens; - EXPECT_TOKEN(Tokens[5], tok::kw___attribute, TT_Unknown); - EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_AttributeLParen); - EXPECT_TOKEN(Tokens[8], tok::r_paren, TT_AttributeRParen); - - // Generic macro has no special handling in this location. - Tokens = annotate("- (id)init A(X);"); - ASSERT_EQ(Tokens.size(), 11u) << Tokens; - // Note: Don't check token-type as a random token in this position is hard to - // reason about. - EXPECT_TOKEN_KIND(Tokens[5], tok::identifier); - EXPECT_TOKEN_KIND(Tokens[6], tok::l_paren); - - // Add a custom AttributeMacro. Test that it has the same behavior. - FormatStyle Style = getLLVMStyle(); - Style.AttributeMacros.push_back("A"); - - // An "AttributeMacro" gets annotated like '__attribute__'. - Tokens = annotate("- (id)init A(X);", Style); - ASSERT_EQ(Tokens.size(), 11u) << Tokens; - EXPECT_TOKEN(Tokens[5], tok::identifier, TT_AttributeMacro); - EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_AttributeLParen); - EXPECT_TOKEN(Tokens[8], tok::r_paren, TT_AttributeRParen); -} - -TEST_F(TokenAnnotatorTest, UnderstandsAttributeMacrosOnObjCProperty) { - // '__attribute__' has special handling. - auto Tokens = annotate("@property(weak) id delegate __attribute__(X);"); - ASSERT_EQ(Tokens.size(), 13u) << Tokens; - EXPECT_TOKEN(Tokens[7], tok::kw___attribute, TT_Unknown); - EXPECT_TOKEN(Tokens[8], tok::l_paren, TT_AttributeLParen); - EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_AttributeRParen); - - // Generic macro has no special handling in this location. - Tokens = annotate("@property(weak) id delegate A(X);"); - ASSERT_EQ(Tokens.size(), 13u) << Tokens; - // Note: Don't check token-type as a random token in this position is hard to - // reason about. - EXPECT_TOKEN_KIND(Tokens[7], tok::identifier); - EXPECT_TOKEN_KIND(Tokens[8], tok::l_paren); - - // Add a custom AttributeMacro. Test that it has the same behavior. - FormatStyle Style = getLLVMStyle(); - Style.AttributeMacros.push_back("A"); - - // An "AttributeMacro" gets annotated like '__attribute__'. - Tokens = annotate("@property(weak) id delegate A(X);", Style); - ASSERT_EQ(Tokens.size(), 13u) << Tokens; - EXPECT_TOKEN(Tokens[7], tok::identifier, TT_AttributeMacro); - EXPECT_TOKEN(Tokens[8], tok::l_paren, TT_AttributeLParen); - EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_AttributeRParen); -} - TEST_F(TokenAnnotatorTest, UnderstandsVerilogOperators) { auto Annotate = [this](llvm::StringRef Code) { return annotate(Code, getLLVMStyle(FormatStyle::LK_Verilog)); From 7f881a2abe2c3eceeae0272fc41ba0a237770450 Mon Sep 17 00:00:00 2001 From: Jared Grubb Date: Tue, 10 Oct 2023 13:58:41 -0700 Subject: [PATCH 173/720] [clang-format] Treat AttributeMacro more like __attribute__ There are two parts to this fix: - Annotate the paren after an AttributeMacro as an AttributeLParen. - Treat an AttributeMacro-without-paren the same as one with a paren. I added a new test-case to differentiate a macro that is or is-not an AttributeMacro; also handled whether ColumnLimit is set to infinite (0) or a finite value, as part of this patch is in ContinuationIndenter. Closes #68722. Differential Revision: https://reviews.llvm.org/D145262 --- clang/lib/Format/ContinuationIndenter.cpp | 2 + clang/lib/Format/TokenAnnotator.cpp | 11 +- clang/unittests/Format/FormatTest.cpp | 3 + clang/unittests/Format/FormatTestObjC.cpp | 214 +++++++++++++++++- clang/unittests/Format/TokenAnnotatorTest.cpp | 110 +++++++++ 5 files changed, 336 insertions(+), 4 deletions(-) diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 91ce825224d7f..3b28f84fd8417 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -1335,6 +1335,8 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { if ((PreviousNonComment && (PreviousNonComment->ClosesTemplateDeclaration || PreviousNonComment->ClosesRequiresClause || + (PreviousNonComment->is(TT_AttributeMacro) && + Current.isNot(tok::l_paren)) || PreviousNonComment->isOneOf( TT_AttributeRParen, TT_AttributeSquare, TT_FunctionAnnotationRParen, TT_JavaAnnotation, TT_LeadingJavaAnnotation))) || diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 543c119620bf2..3dd537272e9da 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -4393,8 +4393,10 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return false; } // Space in __attribute__((attr)) ::type. - if (Left.is(TT_AttributeRParen) && Right.is(tok::coloncolon)) + if (Left.isOneOf(TT_AttributeRParen, TT_AttributeMacro) && + Right.is(tok::coloncolon)) { return true; + } if (Left.is(tok::kw_operator)) return Right.is(tok::coloncolon); @@ -4709,7 +4711,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Line.Type == LT_ObjCMethodDecl) { if (Left.is(TT_ObjCMethodSpecifier)) return true; - if (Left.is(tok::r_paren) && canBeObjCSelectorComponent(Right)) { + if (Left.is(tok::r_paren) && Left.isNot(TT_AttributeRParen) && + canBeObjCSelectorComponent(Right)) { // Don't space between ')' and or ')' and 'new'. 'new' is not a // keyword in Objective-C, and '+ (instancetype)new;' is a standard class // method declaration. @@ -5222,8 +5225,10 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, } // Ensure wrapping after __attribute__((XX)) and @interface etc. - if (Left.is(TT_AttributeRParen) && Right.is(TT_ObjCDecl)) + if (Left.isOneOf(TT_AttributeRParen, TT_AttributeMacro) && + Right.is(TT_ObjCDecl)) { return true; + } if (Left.is(TT_LambdaLBrace)) { if (IsFunctionArgument(Left) && diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 2ef3c9b299bca..963fb8f4d4416 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -11674,6 +11674,9 @@ TEST_F(FormatTest, UnderstandsAttributes) { verifyFormat("vector v;", CustomAttrs); verifyFormat("vector v;", CustomAttrs); verifyFormat("vector v;", CustomAttrs); + verifyFormat("__attr1 ::qualified_type f();", CustomAttrs); + verifyFormat("__attr1() ::qualified_type f();", CustomAttrs); + verifyFormat("__attr1(nodebug) ::qualified_type f();", CustomAttrs); // Check that these are not parsed as function declarations: CustomAttrs.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; diff --git a/clang/unittests/Format/FormatTestObjC.cpp b/clang/unittests/Format/FormatTestObjC.cpp index a9e5434dfabfb..84a3d240055ff 100644 --- a/clang/unittests/Format/FormatTestObjC.cpp +++ b/clang/unittests/Format/FormatTestObjC.cpp @@ -1527,7 +1527,10 @@ TEST_F(FormatTestObjC, IfNotUnlikely) { " [obj func:arg2];"); } -TEST_F(FormatTestObjC, Attributes) { +TEST_F(FormatTestObjC, AttributesOnObjCDecl) { + Style.AttributeMacros.push_back("ATTRIBUTE_MACRO"); + + // Check '__attribute__' macro directly. verifyFormat("__attribute__((objc_subclassing_restricted))\n" "@interface Foo\n" "@end"); @@ -1537,6 +1540,215 @@ TEST_F(FormatTestObjC, Attributes) { verifyFormat("__attribute__((objc_subclassing_restricted))\n" "@implementation Foo\n" "@end"); + + // Check AttributeMacro gets treated the same, with or without parentheses. + verifyFormat("ATTRIBUTE_MACRO\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO(X)\n" + "@interface Foo\n" + "@end"); + + // Indenter also needs to understand multiple attribute macros. + // Try each of the three kinds paired with each of the other kind. + + // Column limit, but no reflow. + verifyFormat("ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO ATTRIBUTE_MACRO(X)\n" + "@interface Foo\n" + "@end"); + verifyFormat("__attribute__((X)) ATTRIBUTE_MACRO\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO __attribute__((X))\n" + "@interface Foo\n" + "@end"); + verifyFormat("__attribute__((X)) ATTRIBUTE_MACRO(X)\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO(X) __attribute__((X))\n" + "@interface Foo\n" + "@end"); + + // Column limit that requires reflow. + Style.ColumnLimit = 30; + verifyFormat("ATTRIBUTE_MACRO(X)\n" + "ATTRIBUTE_MACRO\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO\n" + "ATTRIBUTE_MACRO(X)\n" + "@interface Foo\n" + "@end"); + verifyFormat("__attribute__((X))\n" + "ATTRIBUTE_MACRO\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO\n" + "__attribute__((X))\n" + "@interface Foo\n" + "@end"); + verifyFormat("__attribute__((X))\n" + "ATTRIBUTE_MACRO(X)\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO(X)\n" + "__attribute__((X))\n" + "@interface Foo\n" + "@end"); + + // No column limit + Style.ColumnLimit = 0; + verifyFormat("ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO ATTRIBUTE_MACRO(X)\n" + "@interface Foo\n" + "@end"); + verifyFormat("__attribute__((X)) ATTRIBUTE_MACRO\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO __attribute__((X))\n" + "@interface Foo\n" + "@end"); + verifyFormat("__attribute__((X)) ATTRIBUTE_MACRO(X)\n" + "@interface Foo\n" + "@end"); + verifyFormat("ATTRIBUTE_MACRO(X) __attribute__((X))\n" + "@interface Foo\n" + "@end"); +} + +TEST_F(FormatTestObjC, AttributesOnObjCMethodDecl) { + Style.AttributeMacros.push_back("ATTRIBUTE_MACRO"); + + // Check '__attribute__' macro directly. + verifyFormat("- (id)init __attribute__((objc_designated_initializer));"); + + // Check AttributeMacro gets treated the same, with or without parentheses. + verifyFormat("- (id)init ATTRIBUTE_MACRO;"); + verifyFormat("- (id)init ATTRIBUTE_MACRO(X);"); + + // Indenter also needs to understand multiple attribute macros. + + // Column limit (default), but no reflow. + verifyFormat("- (id)init ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO;"); + verifyFormat("- (id)init ATTRIBUTE_MACRO ATTRIBUTE_MACRO(X);"); + verifyFormat("- (id)init __attribute__((X)) ATTRIBUTE_MACRO;"); + verifyFormat("- (id)init ATTRIBUTE_MACRO __attribute__((X));"); + verifyFormat("- (id)init __attribute__((X)) ATTRIBUTE_MACRO(X);"); + verifyFormat("- (id)init ATTRIBUTE_MACRO(X) __attribute__((X));"); + + // Column limit that requires reflow. + Style.ColumnLimit = 30; + + // Reflow after method name. + verifyFormat("- (id)initWithReallyLongName\n" + " __attribute__((X))\n" + " ATTRIBUTE_MACRO;"); + verifyFormat("- (id)initWithReallyLongName\n" + " ATTRIBUTE_MACRO(X)\n" + " ATTRIBUTE_MACRO;"); + verifyFormat("- (id)initWithReallyLongName\n" + " ATTRIBUTE_MACRO\n" + " ATTRIBUTE_MACRO;"); + // Reflow after first macro. + // FIXME: these should indent but don't. +#if 0 + verifyFormat("- (id)init ATTRIBUTE_MACRO(X)\n" + " ATTRIBUTE_MACRO;"); + verifyFormat("- (id)init ATTRIBUTE_MACRO\n" + " ATTRIBUTE_MACRO(X);"); + verifyFormat("- (id)init __attribute__((X))\n" + " ATTRIBUTE_MACRO;"); + verifyFormat("- (id)init ATTRIBUTE_MACRO\n" + " __attribute__((X));"); + verifyFormat("- (id)init __attribute__((X))\n" + " ATTRIBUTE_MACRO(X);"); + verifyFormat("- (id)init ATTRIBUTE_MACRO(X)\n" + " __attribute__((X));"); +#endif + + // No column limit. + Style.ColumnLimit = 0; + verifyFormat("- (id)init ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO;"); + verifyFormat("- (id)init ATTRIBUTE_MACRO ATTRIBUTE_MACRO(X);"); + verifyFormat("- (id)init __attribute__((X)) ATTRIBUTE_MACRO;"); + verifyFormat("- (id)init ATTRIBUTE_MACRO __attribute__((X));"); + verifyFormat("- (id)init __attribute__((X)) ATTRIBUTE_MACRO(X);"); + verifyFormat("- (id)init ATTRIBUTE_MACRO(X) __attribute__((X));"); +} + +TEST_F(FormatTestObjC, AttributesOnObjCProperty) { + Style.AttributeMacros.push_back("ATTRIBUTE_MACRO"); + + // Check '__attribute__' macro directly. + verifyFormat("@property(weak) id delegate " + "__attribute__((objc_designated_initializer));"); + + // Check AttributeMacro gets treated the same, with or without parentheses. + verifyFormat("@property(weak) id delegate ATTRIBUTE_MACRO;"); + verifyFormat("@property(weak) id delegate ATTRIBUTE_MACRO(X);"); + + // Indenter also needs to understand multiple attribute macros. + + // Column limit (default), but no reflow. + verifyFormat( + "@property(weak) id delegate ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO;"); + verifyFormat( + "@property(weak) id delegate ATTRIBUTE_MACRO ATTRIBUTE_MACRO(X);"); + verifyFormat( + "@property(weak) id delegate __attribute__((X)) ATTRIBUTE_MACRO;"); + verifyFormat( + "@property(weak) id delegate ATTRIBUTE_MACRO __attribute__((X));"); + verifyFormat( + "@property(weak) id delegate __attribute__((X)) ATTRIBUTE_MACRO(X);"); + verifyFormat( + "@property(weak) id delegate ATTRIBUTE_MACRO(X) __attribute__((X));"); + + // Column limit that requires reflow. + Style.ColumnLimit = 50; + + // Reflow after method name. + verifyFormat("@property(weak) id delegateWithLongName\n" + " __attribute__((X)) ATTRIBUTE_MACRO;"); + verifyFormat("@property(weak) id delegateWithLongName\n" + " ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO;"); + verifyFormat("@property(weak) id delegateWithLongName\n" + " ATTRIBUTE_MACRO ATTRIBUTE_MACRO;"); + // Reflow after first macro. + // FIXME: these should indent but don't. +#if 0 + verifyFormat("@property(weak) id delegate ATTRIBUTE_MACRO(X)\n" + " ATTRIBUTE_MACRO;"); + verifyFormat("@property(weak) id delegate ATTRIBUTE_MACRO\n" + " ATTRIBUTE_MACRO(X);"); + verifyFormat("@property(weak) id delegate __attribute__((X))\n" + " ATTRIBUTE_MACRO;"); + verifyFormat("@property(weak) id delegate ATTRIBUTE_MACRO\n" + " __attribute__((X));"); + verifyFormat("@property(weak) id delegate __attribute__((X))\n" + " ATTRIBUTE_MACRO(X);"); + verifyFormat("@property(weak) id delegate ATTRIBUTE_MACRO(X)\n" + " __attribute__((X));"); +#endif + + // No column limit. + Style.ColumnLimit = 0; + verifyFormat( + "@property(weak) id delegate ATTRIBUTE_MACRO(X) ATTRIBUTE_MACRO;"); + verifyFormat( + "@property(weak) id delegate ATTRIBUTE_MACRO ATTRIBUTE_MACRO(X);"); + verifyFormat( + "@property(weak) id delegate __attribute__((X)) ATTRIBUTE_MACRO;"); + verifyFormat( + "@property(weak) id delegate ATTRIBUTE_MACRO __attribute__((X));"); + verifyFormat( + "@property(weak) id delegate __attribute__((X)) ATTRIBUTE_MACRO(X);"); + verifyFormat( + "@property(weak) id delegate ATTRIBUTE_MACRO(X) __attribute__((X));"); } } // end namespace diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index b6d4cf166de02..e5cc3ed3686b3 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -1795,6 +1795,116 @@ TEST_F(TokenAnnotatorTest, UnderstandsTrailingReturnArrow) { EXPECT_TOKEN(Tokens[13], tok::arrow, TT_Unknown); } +TEST_F(TokenAnnotatorTest, UnderstandsAttributeMacros) { + // '__attribute__' has special handling. + auto Tokens = annotate("__attribute__(X) void Foo(void);"); + ASSERT_EQ(Tokens.size(), 11u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::kw___attribute, TT_Unknown); + EXPECT_TOKEN(Tokens[1], tok::l_paren, TT_AttributeLParen); + EXPECT_TOKEN(Tokens[3], tok::r_paren, TT_AttributeRParen); + + // Generic macro has no special handling in this location. + Tokens = annotate("A(X) void Foo(void);"); + ASSERT_EQ(Tokens.size(), 11u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::identifier, TT_Unknown); + EXPECT_TOKEN(Tokens[1], tok::l_paren, TT_Unknown); + + // Add a custom AttributeMacro. Test that it has the same behavior. + FormatStyle Style = getLLVMStyle(); + Style.AttributeMacros.push_back("A"); + + // An "AttributeMacro" gets annotated like '__attribute__'. + Tokens = annotate("A(X) void Foo(void);", Style); + ASSERT_EQ(Tokens.size(), 11u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::identifier, TT_AttributeMacro); + EXPECT_TOKEN(Tokens[1], tok::l_paren, TT_AttributeLParen); + EXPECT_TOKEN(Tokens[3], tok::r_paren, TT_AttributeRParen); +} + +TEST_F(TokenAnnotatorTest, UnderstandsAttributeMacrosOnObjCDecl) { + // '__attribute__' has special handling. + auto Tokens = annotate("__attribute__(X) @interface Foo"); + ASSERT_EQ(Tokens.size(), 8u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::kw___attribute, TT_Unknown); + EXPECT_TOKEN(Tokens[1], tok::l_paren, TT_AttributeLParen); + EXPECT_TOKEN(Tokens[3], tok::r_paren, TT_AttributeRParen); + + // Generic macro has no special handling in this location. + Tokens = annotate("A(X) @interface Foo"); + ASSERT_EQ(Tokens.size(), 8u) << Tokens; + // Note: Don't check token-type as a random token in this position is hard to + // reason about. + EXPECT_TOKEN_KIND(Tokens[0], tok::identifier); + EXPECT_TOKEN_KIND(Tokens[1], tok::l_paren); + + // Add a custom AttributeMacro. Test that it has the same behavior. + FormatStyle Style = getLLVMStyle(); + Style.AttributeMacros.push_back("A"); + + // An "AttributeMacro" gets annotated like '__attribute__'. + Tokens = annotate("A(X) @interface Foo", Style); + ASSERT_EQ(Tokens.size(), 8u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::identifier, TT_AttributeMacro); + EXPECT_TOKEN(Tokens[1], tok::l_paren, TT_AttributeLParen); + EXPECT_TOKEN(Tokens[3], tok::r_paren, TT_AttributeRParen); +} + +TEST_F(TokenAnnotatorTest, UnderstandsAttributeMacrosOnObjCMethodDecl) { + // '__attribute__' has special handling. + auto Tokens = annotate("- (id)init __attribute__(X);"); + ASSERT_EQ(Tokens.size(), 11u) << Tokens; + EXPECT_TOKEN(Tokens[5], tok::kw___attribute, TT_Unknown); + EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_AttributeLParen); + EXPECT_TOKEN(Tokens[8], tok::r_paren, TT_AttributeRParen); + + // Generic macro has no special handling in this location. + Tokens = annotate("- (id)init A(X);"); + ASSERT_EQ(Tokens.size(), 11u) << Tokens; + // Note: Don't check token-type as a random token in this position is hard to + // reason about. + EXPECT_TOKEN_KIND(Tokens[5], tok::identifier); + EXPECT_TOKEN_KIND(Tokens[6], tok::l_paren); + + // Add a custom AttributeMacro. Test that it has the same behavior. + FormatStyle Style = getLLVMStyle(); + Style.AttributeMacros.push_back("A"); + + // An "AttributeMacro" gets annotated like '__attribute__'. + Tokens = annotate("- (id)init A(X);", Style); + ASSERT_EQ(Tokens.size(), 11u) << Tokens; + EXPECT_TOKEN(Tokens[5], tok::identifier, TT_AttributeMacro); + EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_AttributeLParen); + EXPECT_TOKEN(Tokens[8], tok::r_paren, TT_AttributeRParen); +} + +TEST_F(TokenAnnotatorTest, UnderstandsAttributeMacrosOnObjCProperty) { + // '__attribute__' has special handling. + auto Tokens = annotate("@property(weak) id delegate __attribute__(X);"); + ASSERT_EQ(Tokens.size(), 13u) << Tokens; + EXPECT_TOKEN(Tokens[7], tok::kw___attribute, TT_Unknown); + EXPECT_TOKEN(Tokens[8], tok::l_paren, TT_AttributeLParen); + EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_AttributeRParen); + + // Generic macro has no special handling in this location. + Tokens = annotate("@property(weak) id delegate A(X);"); + ASSERT_EQ(Tokens.size(), 13u) << Tokens; + // Note: Don't check token-type as a random token in this position is hard to + // reason about. + EXPECT_TOKEN_KIND(Tokens[7], tok::identifier); + EXPECT_TOKEN_KIND(Tokens[8], tok::l_paren); + + // Add a custom AttributeMacro. Test that it has the same behavior. + FormatStyle Style = getLLVMStyle(); + Style.AttributeMacros.push_back("A"); + + // An "AttributeMacro" gets annotated like '__attribute__'. + Tokens = annotate("@property(weak) id delegate A(X);", Style); + ASSERT_EQ(Tokens.size(), 13u) << Tokens; + EXPECT_TOKEN(Tokens[7], tok::identifier, TT_AttributeMacro); + EXPECT_TOKEN(Tokens[8], tok::l_paren, TT_AttributeLParen); + EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_AttributeRParen); +} + TEST_F(TokenAnnotatorTest, UnderstandsVerilogOperators) { auto Annotate = [this](llvm::StringRef Code) { return annotate(Code, getLLVMStyle(FormatStyle::LK_Verilog)); From fd84b1a99dfe37d4212be8afba2a93209679bc7f Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Tue, 2 May 2023 10:56:41 -0700 Subject: [PATCH 174/720] [M68k] Add new calling convention M68k_RTD `M68k_RTD` is really similar to X86's stdcall, in which callee pops the arguments from stack. In LLVM IR it can be written as `m68k_rtdcc`. This patch also improves how ExpandPseudo Pass handles popping stack at function returns in the absent of the RTD instruction. Differential Revision: https://reviews.llvm.org/D149864 --- llvm/include/llvm/AsmParser/LLToken.h | 1 + llvm/include/llvm/IR/CallingConv.h | 3 ++ llvm/lib/AsmParser/LLLexer.cpp | 1 + llvm/lib/AsmParser/LLParser.cpp | 2 + llvm/lib/IR/AsmWriter.cpp | 1 + llvm/lib/Target/M68k/M68kExpandPseudo.cpp | 36 ++++++----------- llvm/lib/Target/M68k/M68kISelLowering.cpp | 5 +-- llvm/test/CodeGen/M68k/CConv/rtd-call.ll | 48 +++++++++++++++++++++++ llvm/test/CodeGen/M68k/CConv/rtd-ret.ll | 31 +++++++++++++++ 9 files changed, 102 insertions(+), 26 deletions(-) create mode 100644 llvm/test/CodeGen/M68k/CConv/rtd-call.ll create mode 100644 llvm/test/CodeGen/M68k/CConv/rtd-ret.ll diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index 673dc58ce6451..2d6b8a19401d7 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -175,6 +175,7 @@ enum Kind { kw_amdgpu_kernel, kw_amdgpu_gfx, kw_tailcc, + kw_m68k_rtdcc, // Attributes: kw_attributes, diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h index e97623b29f523..40222fa31d978 100644 --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -245,6 +245,9 @@ namespace CallingConv { /// placement. Preserves active lane values for input VGPRs. AMDGPU_CS_ChainPreserve = 105, + /// Used for M68k rtd-based CC (similar to X86's stdcall). + M68k_RTD = 106, + /// The highest possible ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 466bdebc001f5..1402c152bb5c3 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -632,6 +632,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(amdgpu_kernel); KEYWORD(amdgpu_gfx); KEYWORD(tailcc); + KEYWORD(m68k_rtdcc); KEYWORD(cc); KEYWORD(c); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 04eabc94cfc6a..e104f8b3d1fdb 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -1999,6 +1999,7 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'amdgpu_cs_chain_preserve' /// ::= 'amdgpu_kernel' /// ::= 'tailcc' +/// ::= 'm68k_rtdcc' /// ::= 'cc' UINT /// bool LLParser::parseOptionalCallingConv(unsigned &CC) { @@ -2067,6 +2068,7 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) { break; case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break; case lltok::kw_tailcc: CC = CallingConv::Tail; break; + case lltok::kw_m68k_rtdcc: CC = CallingConv::M68k_RTD; break; case lltok::kw_cc: { Lex.Lex(); return parseUInt32(CC); diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index e190d82127908..bd8b3e9ad5221 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -350,6 +350,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { break; case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break; case CallingConv::AMDGPU_Gfx: Out << "amdgpu_gfx"; break; + case CallingConv::M68k_RTD: Out << "m68k_rtdcc"; break; } } diff --git a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp index 2f60fc834a18e..13268d754a9dd 100644 --- a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp +++ b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp @@ -258,32 +258,22 @@ bool M68kExpandPseudo::ExpandMI(MachineBasicBlock &MBB, if (StackAdj == 0) { MIB = BuildMI(MBB, MBBI, DL, TII->get(M68k::RTS)); - } else if (isUInt<16>(StackAdj)) { - - if (STI->atLeastM68020()) { - llvm_unreachable("RTD is not implemented"); - } else { - // Copy PC from stack to a free address(A0 or A1) register - // TODO check if pseudo expand uses free address register - BuildMI(MBB, MBBI, DL, TII->get(M68k::MOV32aj), M68k::A1) - .addReg(M68k::SP); + } else { + // Copy return address from stack to a free address(A0 or A1) register + // TODO check if pseudo expand uses free address register + BuildMI(MBB, MBBI, DL, TII->get(M68k::MOV32aj), M68k::A1) + .addReg(M68k::SP); - // Adjust SP - FL->emitSPUpdate(MBB, MBBI, StackAdj, /*InEpilogue=*/true); + // Adjust SP + FL->emitSPUpdate(MBB, MBBI, StackAdj, /*InEpilogue=*/true); - // Put the return address on stack - BuildMI(MBB, MBBI, DL, TII->get(M68k::MOV32ja)) - .addReg(M68k::SP) - .addReg(M68k::A1); + // Put the return address on stack + BuildMI(MBB, MBBI, DL, TII->get(M68k::MOV32ja)) + .addReg(M68k::SP) + .addReg(M68k::A1); - // RTS - BuildMI(MBB, MBBI, DL, TII->get(M68k::RTS)); - } - } else { - // TODO: RTD can only handle immediates as big as 2**16-1. - // If we need to pop off bytes before the return address, we - // must do it manually. - llvm_unreachable("Stack adjustment size not supported"); + // RTS + BuildMI(MBB, MBBI, DL, TII->get(M68k::RTS)); } // FIXME: Can rest of the operands be ignored, if there is any? diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp index d1ed26457fbcf..0830cc7feb220 100644 --- a/llvm/lib/Target/M68k/M68kISelLowering.cpp +++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp @@ -3050,9 +3050,8 @@ M68kTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, /// Determines whether the callee is required to pop its own arguments. /// Callee pop is necessary to support tail calls. -bool M68k::isCalleePop(CallingConv::ID CallingConv, bool IsVarArg, - bool GuaranteeTCO) { - return false; +bool M68k::isCalleePop(CallingConv::ID CC, bool IsVarArg, bool GuaranteeTCO) { + return CC == CallingConv::M68k_RTD && !IsVarArg; } // Return true if it is OK for this CMOV pseudo-opcode to be cascaded diff --git a/llvm/test/CodeGen/M68k/CConv/rtd-call.ll b/llvm/test/CodeGen/M68k/CConv/rtd-call.ll new file mode 100644 index 0000000000000..56f36efbe0fb9 --- /dev/null +++ b/llvm/test/CodeGen/M68k/CConv/rtd-call.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=m68k %s -stop-after=finalize-isel -o - | FileCheck %s + +; We want to make sure caller doesn't pop the stack for callees using +; the M68k_RTD CC. However, we've implemented some frame optimization +; techniques to eliminate as many as frame setup/destroy instructions. +; Therefore, to make test case small and concise, we check the MIR generated +; after ISel instead. + +declare dso_local m68k_rtdcc void @callee(i32 noundef) +declare dso_local m68k_rtdcc void @va_callee(i32 noundef, ...) + +define dso_local i32 @caller(ptr noundef %y) { + ; CHECK-LABEL: name: caller + ; CHECK: bb.0.entry: + ; CHECK-NEXT: [[MOV32rp:%[0-9]+]]:ar32 = MOV32rp 0, %fixed-stack.0, implicit-def dead $ccr :: (load (s32) from %fixed-stack.0, align 8) + ; CHECK-NEXT: [[MOV32rj:%[0-9]+]]:xr32 = MOV32rj killed [[MOV32rp]], implicit-def dead $ccr :: (load (s32) from %ir.y) + ; CHECK-NEXT: ADJCALLSTACKDOWN 4, 0, implicit-def dead $sp, implicit-def dead $ccr, implicit $sp + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ar32 = COPY $sp + ; CHECK-NEXT: MOV32jr [[COPY]], [[MOV32rj]], implicit-def dead $ccr :: (store (s32) into stack, align 2) + ; CHECK-NEXT: CALLb @callee, csr_std, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 4, 4, implicit-def dead $sp, implicit-def dead $ccr, implicit $sp + ; CHECK-NEXT: $d0 = COPY [[MOV32rj]] + ; CHECK-NEXT: RET 0, $d0 +entry: + %0 = load i32, ptr %y, align 4 + call m68k_rtdcc void @callee(i32 noundef %0) + ret i32 %0 +} + +define dso_local i32 @va_caller(ptr noundef %y) { + ; CHECK-LABEL: name: va_caller + ; CHECK: bb.0.entry: + ; CHECK-NEXT: [[MOV32rp:%[0-9]+]]:ar32 = MOV32rp 0, %fixed-stack.0, implicit-def dead $ccr :: (load (s32) from %fixed-stack.0, align 8) + ; CHECK-NEXT: [[MOV32rj:%[0-9]+]]:xr32 = MOV32rj killed [[MOV32rp]], implicit-def dead $ccr :: (load (s32) from %ir.y) + ; CHECK-NEXT: ADJCALLSTACKDOWN 4, 0, implicit-def dead $sp, implicit-def dead $ccr, implicit $sp + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ar32 = COPY $sp + ; CHECK-NEXT: MOV32jr [[COPY]], [[MOV32rj]], implicit-def dead $ccr :: (store (s32) into stack, align 2) + ; CHECK-NEXT: CALLb @va_callee, csr_std, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 4, 0, implicit-def dead $sp, implicit-def dead $ccr, implicit $sp + ; CHECK-NEXT: $d0 = COPY [[MOV32rj]] + ; CHECK-NEXT: RET 0, $d0 +entry: + %0 = load i32, ptr %y, align 4 + call m68k_rtdcc void (i32, ...) @va_callee(i32 noundef %0) + ret i32 %0 +} + diff --git a/llvm/test/CodeGen/M68k/CConv/rtd-ret.ll b/llvm/test/CodeGen/M68k/CConv/rtd-ret.ll new file mode 100644 index 0000000000000..2dc5f2812fcea --- /dev/null +++ b/llvm/test/CodeGen/M68k/CConv/rtd-ret.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=m68k < %s | FileCheck %s + +define dso_local m68k_rtdcc i32 @ret(i32 noundef %a, i32 noundef %b, i32 noundef %c) nounwind { +; CHECK-LABEL: ret: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: move.l (8,%sp), %d0 +; CHECK-NEXT: add.l (4,%sp), %d0 +; CHECK-NEXT: add.l (12,%sp), %d0 +; CHECK-NEXT: move.l (%sp), %a1 +; CHECK-NEXT: adda.l #12, %sp +; CHECK-NEXT: move.l %a1, (%sp) +; CHECK-NEXT: rts +entry: + %add = add nsw i32 %b, %a + %add1 = add nsw i32 %add, %c + ret i32 %add1 +} + +define dso_local m68k_rtdcc i32 @va_ret(i32 noundef %a, i32 noundef %b, i32 noundef %c, ...) nounwind { +; CHECK-LABEL: va_ret: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: move.l (8,%sp), %d0 +; CHECK-NEXT: add.l (4,%sp), %d0 +; CHECK-NEXT: add.l (12,%sp), %d0 +; CHECK-NEXT: rts +entry: + %add = add nsw i32 %b, %a + %add1 = add nsw i32 %add, %c + ret i32 %add1 +} From 42b707e5b438be538e3560429d0b4afcd7ca05be Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Mon, 12 Jun 2023 10:19:39 -0700 Subject: [PATCH 175/720] [DWARF][M68k] Add new DW_CC for the new M68kRTD calling convention Add `DW_CC_M68kRTD` to model the new `llvm::CallingConv::M68kRTD`. Differential Revision: https://reviews.llvm.org/D152587 --- llvm/include/llvm/BinaryFormat/Dwarf.def | 1 + llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp | 3 +++ .../test/DebugInfo/M68k/calling-convention.ll | 21 +++++++++++++++++++ llvm/test/DebugInfo/M68k/lit.local.cfg | 2 ++ 4 files changed, 27 insertions(+) create mode 100644 llvm/test/DebugInfo/M68k/calling-convention.ll create mode 100644 llvm/test/DebugInfo/M68k/lit.local.cfg diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def index 40d958c867de9..fb328a0257732 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.def +++ b/llvm/include/llvm/BinaryFormat/Dwarf.def @@ -1036,6 +1036,7 @@ HANDLE_DW_CC(0xc8, LLVM_Swift) HANDLE_DW_CC(0xc9, LLVM_PreserveMost) HANDLE_DW_CC(0xca, LLVM_PreserveAll) HANDLE_DW_CC(0xcb, LLVM_X86RegCall) +HANDLE_DW_CC(0xcc, LLVM_M68kRTD) // From GCC source code (include/dwarf2.h): This DW_CC_ value is not currently // generated by any toolchain. It is used internally to GDB to indicate OpenCL // C functions that have been compiled with the IBM XL C for OpenCL compiler and diff --git a/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp b/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp index c474de6076265..5a5ac28f18221 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp @@ -620,6 +620,9 @@ void DWARFTypePrinter::appendSubroutineNameAfter( case CallingConvention::DW_CC_LLVM_X86RegCall: OS << " __attribute__((regcall))"; break; + case CallingConvention::DW_CC_LLVM_M68kRTD: + OS << " __attribute__((m68k_rtd))"; + break; } } diff --git a/llvm/test/DebugInfo/M68k/calling-convention.ll b/llvm/test/DebugInfo/M68k/calling-convention.ll new file mode 100644 index 0000000000000..c6e8049771e42 --- /dev/null +++ b/llvm/test/DebugInfo/M68k/calling-convention.ll @@ -0,0 +1,21 @@ +; RUN: llc --mtriple=m68k -filetype=obj %s -o %t +; RUN: llvm-dwarfdump -v %t | FileCheck %s + +; CHECK-LABEL: DW_TAG_subprogram +; CHECK: DW_AT_calling_convention [DW_FORM_data1] (DW_CC_LLVM_M68kRTD) +define m68k_rtdcc void @foo() !dbg !3 { +entry: + ret void +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 17.0.0 (https://github.com/llvm/llvm-project.git)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "foo.c", directory: "/path/to/file") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "foo", scope: !4, file: !4, line: 4, type: !5, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !7) +!4 = !DIFile(filename: "./foo.c", directory: "/path/to/file") +!5 = !DISubroutineType(cc: DW_CC_LLVM_M68kRTD, types: !6) +!6 = !{null} +!7 = !{} diff --git a/llvm/test/DebugInfo/M68k/lit.local.cfg b/llvm/test/DebugInfo/M68k/lit.local.cfg new file mode 100644 index 0000000000000..dd33fe312cdd2 --- /dev/null +++ b/llvm/test/DebugInfo/M68k/lit.local.cfg @@ -0,0 +1,2 @@ +if not "M68k" in config.root.targets: + config.unsupported = True From fd4f96290ac99bf8b9284d3b32743cac0bb135ea Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Tue, 22 Aug 2023 23:13:54 -0700 Subject: [PATCH 176/720] [Clang][M68k] Add Clang support for the new M68k_RTD CC This patch adds `CC_M68kRTD`, which will be used on function if either `__attribute__((m68k_rtd))` is presented or `-mrtd` flag is given. Differential Revision: https://reviews.llvm.org/D149867 --- clang/docs/ReleaseNotes.rst | 4 ++ clang/include/clang-c/Index.h | 1 + clang/include/clang/Basic/Attr.td | 5 ++ clang/include/clang/Basic/AttrDocs.td | 12 +++++ clang/include/clang/Basic/LangOptions.h | 3 +- clang/include/clang/Basic/Specifiers.h | 2 + clang/include/clang/Driver/Options.td | 4 +- clang/lib/AST/ASTContext.cpp | 4 ++ clang/lib/AST/ItaniumMangle.cpp | 1 + clang/lib/AST/Type.cpp | 2 + clang/lib/AST/TypePrinter.cpp | 6 +++ clang/lib/Basic/Targets/M68k.cpp | 10 ++++ clang/lib/Basic/Targets/M68k.h | 1 + clang/lib/CodeGen/CGCall.cpp | 4 ++ clang/lib/CodeGen/CGDebugInfo.cpp | 2 + clang/lib/Driver/ToolChains/Clang.cpp | 8 +++- clang/lib/Frontend/CompilerInvocation.cpp | 13 ++++-- clang/lib/Sema/SemaDeclAttr.cpp | 7 +++ clang/lib/Sema/SemaType.cpp | 5 +- clang/test/CodeGen/mrtd.c | 25 ++++++---- .../test/CodeGenCXX/default_calling_conv.cpp | 35 +++++++++----- clang/test/CodeGenCXX/m68k-rtdcall.cpp | 16 +++++++ clang/test/Sema/m68k-rtdcall.c | 46 +++++++++++++++++++ clang/test/SemaCXX/m68k-rtdcall.cpp | 14 ++++++ clang/tools/libclang/CXType.cpp | 1 + 25 files changed, 200 insertions(+), 31 deletions(-) create mode 100644 clang/test/CodeGenCXX/m68k-rtdcall.cpp create mode 100644 clang/test/Sema/m68k-rtdcall.c create mode 100644 clang/test/SemaCXX/m68k-rtdcall.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index be7c8bf247f7a..6d315e9f84ddf 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -205,6 +205,10 @@ Modified Compiler Flags * ``-frewrite-includes`` now guards the original #include directives with ``__CLANG_REWRITTEN_INCLUDES``, and ``__CLANG_REWRITTEN_SYSTEM_INCLUDES`` as appropriate. +* Introducing a new default calling convention for ``-fdefault-calling-conv``: + ``rtdcall``. This new default CC only works for M68k and will use the new + ``m68k_rtdcc`` CC on every functions that are not variadic. The ``-mrtd`` + driver/frontend flag has the same effect when targeting M68k. Removed Compiler Flags ------------------------- diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index 1b91feabd584c..64ab3378957c7 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2980,6 +2980,7 @@ enum CXCallingConv { CXCallingConv_AArch64VectorCall = 16, CXCallingConv_SwiftAsync = 17, CXCallingConv_AArch64SVEPCS = 18, + CXCallingConv_M68kRTD = 19, CXCallingConv_Invalid = 100, CXCallingConv_Unexposed = 200 diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 5c9eb7b8a9810..5486b36133755 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2805,6 +2805,11 @@ def PreserveAll : DeclOrTypeAttr { let Documentation = [PreserveAllDocs]; } +def M68kRTD: DeclOrTypeAttr { + let Spellings = [Clang<"m68k_rtd">]; + let Documentation = [M68kRTDDocs]; +} + def Target : InheritableAttr { let Spellings = [GCC<"target">]; let Args = [StringArgument<"featuresStr">]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 9f9991bdae361..cbbf69faeb308 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2825,6 +2825,18 @@ See the documentation for `__vectorcall`_ on MSDN for more details. }]; } +def M68kRTDDocs : Documentation { + let Category = DocCatCallingConvs; + let Content = [{ +On M68k targets, this attribute changes the calling convention of a function +to clear parameters off the stack on return. In other words, callee is +responsible for cleaning out the stack space allocated for incoming paramters. +This convention does not support variadic calls or unprototyped functions in C. +When targeting M68010 or newer CPUs, this calling convention is implemented +using the `rtd` instruction. + }]; +} + def DocCatConsumed : DocumentationCategory<"Consumed Annotation Checking"> { let Content = [{ Clang supports additional attributes for checking basic resource management diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index e0e95f6d26f45..20a8ada60e0fe 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -134,7 +134,8 @@ class LangOptions : public LangOptionsBase { DCC_FastCall, DCC_StdCall, DCC_VectorCall, - DCC_RegCall + DCC_RegCall, + DCC_RtdCall }; enum AddrSpaceMapMangling { ASMM_Target, ASMM_On, ASMM_Off }; diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h index 6ae56703eca41..0add24d53b21e 100644 --- a/clang/include/clang/Basic/Specifiers.h +++ b/clang/include/clang/Basic/Specifiers.h @@ -288,6 +288,7 @@ namespace clang { CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs)) CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs)) CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel)) + CC_M68kRTD, // __attribute__((m68k_rtd)) }; /// Checks whether the given calling convention supports variadic @@ -304,6 +305,7 @@ namespace clang { case CC_OpenCLKernel: case CC_Swift: case CC_SwiftAsync: + case CC_M68kRTD: return false; default: return true; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 3f2058a5d4650..54afd652ad3d0 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -7478,9 +7478,9 @@ def fnative_half_arguments_and_returns : Flag<["-"], "fnative-half-arguments-and ImpliedByAnyOf<[open_cl.KeyPath, render_script.KeyPath, hlsl.KeyPath]>; def fdefault_calling_conv_EQ : Joined<["-"], "fdefault-calling-conv=">, HelpText<"Set default calling convention">, - Values<"cdecl,fastcall,stdcall,vectorcall,regcall">, + Values<"cdecl,fastcall,stdcall,vectorcall,regcall,rtdcall">, NormalizedValuesScope<"LangOptions">, - NormalizedValues<["DCC_CDecl", "DCC_FastCall", "DCC_StdCall", "DCC_VectorCall", "DCC_RegCall"]>, + NormalizedValues<["DCC_CDecl", "DCC_FastCall", "DCC_StdCall", "DCC_VectorCall", "DCC_RegCall", "DCC_RtdCall"]>, MarshallingInfoEnum, "DCC_None">; // These options cannot be marshalled, because they are used to set up the LangOptions defaults. diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index cdc3d62bca008..4c4bcbf8a68f7 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -12024,6 +12024,10 @@ CallingConv ASTContext::getDefaultCallingConvention(bool IsVariadic, if (!IsVariadic) return CC_X86RegCall; break; + case LangOptions::DCC_RtdCall: + if (!IsVariadic) + return CC_M68kRTD; + break; } } return Target->getDefaultCallingConv(); diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 23ec35cae4b7b..8862f4d4fbd7b 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -3429,6 +3429,7 @@ StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) { case CC_OpenCLKernel: case CC_PreserveMost: case CC_PreserveAll: + case CC_M68kRTD: // FIXME: we should be mangling all of the above. return ""; diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 282298971705b..570d460edbda0 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -3373,6 +3373,7 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) { case CC_SwiftAsync: return "swiftasynccall"; case CC_PreserveMost: return "preserve_most"; case CC_PreserveAll: return "preserve_all"; + case CC_M68kRTD: return "m68k_rtd"; } llvm_unreachable("Invalid calling convention."); @@ -3852,6 +3853,7 @@ bool AttributedType::isCallingConv() const { case attr::IntelOclBicc: case attr::PreserveMost: case attr::PreserveAll: + case attr::M68kRTD: return true; } llvm_unreachable("invalid attr kind"); diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index eb69d0bb8755b..b9f6c0eeb450d 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -1044,6 +1044,9 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info, case CC_PreserveAll: OS << " __attribute__((preserve_all))"; break; + case CC_M68kRTD: + OS << " __attribute__((m68k_rtd))"; + break; } } @@ -1879,6 +1882,9 @@ void TypePrinter::printAttributedAfter(const AttributedType *T, case attr::PreserveAll: OS << "preserve_all"; break; + case attr::M68kRTD: + OS << "m68k_rtd"; + break; case attr::NoDeref: OS << "noderef"; break; diff --git a/clang/lib/Basic/Targets/M68k.cpp b/clang/lib/Basic/Targets/M68k.cpp index 3c6274f89dab1..1b7e0a7f32c9b 100644 --- a/clang/lib/Basic/Targets/M68k.cpp +++ b/clang/lib/Basic/Targets/M68k.cpp @@ -238,5 +238,15 @@ TargetInfo::BuiltinVaListKind M68kTargetInfo::getBuiltinVaListKind() const { return TargetInfo::VoidPtrBuiltinVaList; } +TargetInfo::CallingConvCheckResult +M68kTargetInfo::checkCallingConvention(CallingConv CC) const { + switch (CC) { + case CC_C: + case CC_M68kRTD: + return CCCR_OK; + default: + return TargetInfo::checkCallingConvention(CC); + } +} } // namespace targets } // namespace clang diff --git a/clang/lib/Basic/Targets/M68k.h b/clang/lib/Basic/Targets/M68k.h index 1af00115a5059..a9c262e62fbad 100644 --- a/clang/lib/Basic/Targets/M68k.h +++ b/clang/lib/Basic/Targets/M68k.h @@ -54,6 +54,7 @@ class LLVM_LIBRARY_VISIBILITY M68kTargetInfo : public TargetInfo { std::string_view getClobbers() const override; BuiltinVaListKind getBuiltinVaListKind() const override; bool setCPU(const std::string &Name) override; + CallingConvCheckResult checkCallingConvention(CallingConv CC) const override; }; } // namespace targets diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 93e16575042c4..150450e916590 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -72,6 +72,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { case CC_PreserveAll: return llvm::CallingConv::PreserveAll; case CC_Swift: return llvm::CallingConv::Swift; case CC_SwiftAsync: return llvm::CallingConv::SwiftTail; + case CC_M68kRTD: return llvm::CallingConv::M68k_RTD; } } @@ -252,6 +253,9 @@ static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D, if (D->hasAttr()) return CC_PreserveAll; + if (D->hasAttr()) + return CC_M68kRTD; + return CC_C; } diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index c73a63e12f03a..c430713b0d77d 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1445,6 +1445,8 @@ static unsigned getDwarfCC(CallingConv CC) { return llvm::dwarf::DW_CC_LLVM_PreserveAll; case CC_X86RegCall: return llvm::dwarf::DW_CC_LLVM_X86RegCall; + case CC_M68kRTD: + return llvm::dwarf::DW_CC_LLVM_M68kRTD; } return 0; } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index b91126ebed018..94c184435ae14 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5466,8 +5466,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } } - if (Args.hasFlag(options::OPT_mrtd, options::OPT_mno_rtd, false)) - CmdArgs.push_back("-fdefault-calling-conv=stdcall"); + if (Args.hasFlag(options::OPT_mrtd, options::OPT_mno_rtd, false)) { + if (Triple.getArch() == llvm::Triple::m68k) + CmdArgs.push_back("-fdefault-calling-conv=rtdcall"); + else + CmdArgs.push_back("-fdefault-calling-conv=stdcall"); + } if (Args.hasArg(options::OPT_fenable_matrix)) { // enable-matrix is needed by both the LangOpts and by LLVM. diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index bb442495f5835..4e6d7bb16f51b 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -648,6 +648,7 @@ static bool FixupInvocation(CompilerInvocation &Invocation, emitError |= (DefaultCC == LangOptions::DCC_VectorCall || DefaultCC == LangOptions::DCC_RegCall) && !T.isX86(); + emitError |= DefaultCC == LangOptions::DCC_RtdCall && Arch != llvm::Triple::m68k; if (emitError) Diags.Report(diag::err_drv_argument_not_allowed_with) << A->getSpelling() << T.getTriple(); @@ -3865,11 +3866,17 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, Diags.Report(diag::err_drv_argument_not_allowed_with) << A->getSpelling() << "-fdefault-calling-conv"; else { - if (T.getArch() != llvm::Triple::x86) + switch (T.getArch()) { + case llvm::Triple::x86: + Opts.setDefaultCallingConv(LangOptions::DCC_StdCall); + break; + case llvm::Triple::m68k: + Opts.setDefaultCallingConv(LangOptions::DCC_RtdCall); + break; + default: Diags.Report(diag::err_drv_argument_not_allowed_with) << A->getSpelling() << T.getTriple(); - else - Opts.setDefaultCallingConv(LangOptions::DCC_StdCall); + } } } diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index feb02cad9080e..5adf058bea56a 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5211,6 +5211,9 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) { case ParsedAttr::AT_PreserveAll: D->addAttr(::new (S.Context) PreserveAllAttr(S.Context, AL)); return; + case ParsedAttr::AT_M68kRTD: + D->addAttr(::new (S.Context) M68kRTDAttr(S.Context, AL)); + return; default: llvm_unreachable("unexpected attribute kind"); } @@ -5408,6 +5411,9 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC, case ParsedAttr::AT_PreserveAll: CC = CC_PreserveAll; break; + case ParsedAttr::AT_M68kRTD: + CC = CC_M68kRTD; + break; default: llvm_unreachable("unexpected attribute kind"); } @@ -9353,6 +9359,7 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_AArch64VectorPcs: case ParsedAttr::AT_AArch64SVEPcs: case ParsedAttr::AT_AMDGPUKernelCall: + case ParsedAttr::AT_M68kRTD: handleCallConvAttr(S, D, AL); break; case ParsedAttr::AT_Suppress: diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 068971f8130a4..2182fa6f7550c 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -136,7 +136,8 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr, case ParsedAttr::AT_Pcs: \ case ParsedAttr::AT_IntelOclBicc: \ case ParsedAttr::AT_PreserveMost: \ - case ParsedAttr::AT_PreserveAll + case ParsedAttr::AT_PreserveAll: \ + case ParsedAttr::AT_M68kRTD // Function type attributes. #define FUNCTION_TYPE_ATTRS_CASELIST \ @@ -7802,6 +7803,8 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) { return createSimpleAttr(Ctx, Attr); case ParsedAttr::AT_PreserveAll: return createSimpleAttr(Ctx, Attr); + case ParsedAttr::AT_M68kRTD: + return createSimpleAttr(Ctx, Attr); } llvm_unreachable("unexpected attribute kind!"); } diff --git a/clang/test/CodeGen/mrtd.c b/clang/test/CodeGen/mrtd.c index e615cdfa674bc..c37a9ea95d02a 100644 --- a/clang/test/CodeGen/mrtd.c +++ b/clang/test/CodeGen/mrtd.c @@ -1,20 +1,24 @@ -// RUN: %clang_cc1 -mrtd -triple i386-unknown-unknown -std=c89 -emit-llvm -o - %s 2>&1 | FileCheck %s - -// CHECK: mrtd.c:10:3: warning: function with no prototype cannot use the stdcall calling convention +// RUN: %clang_cc1 -mrtd -triple i386-unknown-unknown -std=c89 -emit-llvm -o - %s 2>&1 | FileCheck --check-prefixes=CHECK,X86 %s +// RUN: %clang_cc1 -mrtd -triple m68k-unknown-unknown -std=c89 -emit-llvm -o - %s 2>&1 | FileCheck --check-prefixes=CHECK,M68K %s void baz(int arg); -// CHECK: define{{.*}} x86_stdcallcc void @foo(i32 noundef %arg) [[NUW:#[0-9]+]] +// X86: define{{.*}} x86_stdcallcc void @foo(i32 noundef %arg) [[NUW:#[0-9]+]] +// M68K: define{{.*}} m68k_rtdcc void @foo(i32 noundef %arg) void foo(int arg) { -// CHECK: call x86_stdcallcc i32 @bar( +// X86: call x86_stdcallcc i32 @bar( +#ifndef __mc68000__ bar(arg); -// CHECK: call x86_stdcallcc void @baz(i32 +#endif +// X86: call x86_stdcallcc void @baz(i32 +// M68K: call m68k_rtdcc void @baz(i32 baz(arg); } -// CHECK: declare x86_stdcallcc i32 @bar(...) +// X86: declare x86_stdcallcc i32 @bar(...) -// CHECK: declare x86_stdcallcc void @baz(i32 noundef) +// X86: declare x86_stdcallcc void @baz(i32 noundef) +// M68K: declare m68k_rtdcc void @baz(i32 noundef) void qux(int arg, ...) { } // CHECK: define{{.*}} void @qux(i32 noundef %arg, ...) @@ -22,7 +26,8 @@ void qux(int arg, ...) { } void quux(int a1, int a2, int a3) { qux(a1, a2, a3); } -// CHECK-LABEL: define{{.*}} x86_stdcallcc void @quux +// X86-LABEL: define{{.*}} x86_stdcallcc void @quux +// M68K-LABEL: define{{.*}} m68k_rtdcc void @quux // CHECK: call void (i32, ...) @qux -// CHECK: attributes [[NUW]] = { noinline nounwind{{.*}} } +// X86: attributes [[NUW]] = { noinline nounwind{{.*}} } diff --git a/clang/test/CodeGenCXX/default_calling_conv.cpp b/clang/test/CodeGenCXX/default_calling_conv.cpp index 0991f862b881d..ff81f3712116d 100644 --- a/clang/test/CodeGenCXX/default_calling_conv.cpp +++ b/clang/test/CodeGenCXX/default_calling_conv.cpp @@ -1,43 +1,50 @@ -// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -fdefault-calling-conv=cdecl -emit-llvm -o - %s | FileCheck %s --check-prefix=CDECL --check-prefix=ALL -// RUN: %clang_cc1 -triple i786-unknown-linux-gnu -target-feature +sse4.2 -fdefault-calling-conv=fastcall -emit-llvm -o - %s | FileCheck %s --check-prefix=FASTCALL --check-prefix=ALL -// RUN: %clang_cc1 -triple i486-unknown-linux-gnu -fdefault-calling-conv=stdcall -emit-llvm -o - %s | FileCheck %s --check-prefix=STDCALL --check-prefix=ALL -// RUN: %clang_cc1 -triple i486-unknown-linux-gnu -mrtd -emit-llvm -o - %s | FileCheck %s --check-prefix=STDCALL --check-prefix=ALL -// RUN: %clang_cc1 -triple i986-unknown-linux-gnu -fdefault-calling-conv=vectorcall -emit-llvm -o - %s | FileCheck %s --check-prefix=VECTORCALL --check-prefix=ALL -// RUN: %clang_cc1 -triple i986-unknown-linux-gnu -fdefault-calling-conv=regcall -emit-llvm -o - %s | FileCheck %s --check-prefix=REGCALL --check-prefix=ALL +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -fdefault-calling-conv=cdecl -emit-llvm -o - %s | FileCheck %s --check-prefix=CDECL --check-prefix=X86 --check-prefix=ALL +// RUN: %clang_cc1 -triple i786-unknown-linux-gnu -target-feature +sse4.2 -fdefault-calling-conv=fastcall -emit-llvm -o - %s | FileCheck %s --check-prefix=FASTCALL --check-prefix=X86 --check-prefix=ALL +// RUN: %clang_cc1 -triple i486-unknown-linux-gnu -fdefault-calling-conv=stdcall -emit-llvm -o - %s | FileCheck %s --check-prefix=STDCALL --check-prefix=X86 --check-prefix=ALL +// RUN: %clang_cc1 -triple i486-unknown-linux-gnu -mrtd -emit-llvm -o - %s | FileCheck %s --check-prefix=STDCALL --check-prefix=X86 --check-prefix=ALL +// RUN: %clang_cc1 -triple i986-unknown-linux-gnu -fdefault-calling-conv=vectorcall -emit-llvm -o - %s | FileCheck %s --check-prefix=VECTORCALL --check-prefix=X86 --check-prefix=ALL +// RUN: %clang_cc1 -triple i986-unknown-linux-gnu -fdefault-calling-conv=regcall -emit-llvm -o - %s | FileCheck %s --check-prefix=REGCALL --check-prefix=X86 --check-prefix=ALL // RUN: %clang_cc1 -triple i686-pc-win32 -fdefault-calling-conv=vectorcall -emit-llvm -o - %s -DWINDOWS | FileCheck %s --check-prefix=WIN32 // RUN: %clang_cc1 -triple x86_64-windows-msvc -fdefault-calling-conv=vectorcall -emit-llvm -o - %s -DWINDOWS | FileCheck %s --check-prefix=WIN64 // RUN: %clang_cc1 -triple i686-pc-win32 -emit-llvm -o - %s -DEXPLICITCC | FileCheck %s --check-prefix=EXPLICITCC +// RUN: %clang_cc1 -triple m68k-unknown-linux-gnu -mrtd -emit-llvm -o - %s | FileCheck %s --check-prefix=RTDCALL --check-prefix=ALL +// RUN: %clang_cc1 -triple m68k-unknown-linux-gnu -fdefault-calling-conv=rtdcall -emit-llvm -o - %s | FileCheck %s --check-prefix=RTDCALL --check-prefix=ALL // CDECL: define{{.*}} void @_Z5test1v // FASTCALL: define{{.*}} x86_fastcallcc void @_Z5test1v // STDCALL: define{{.*}} x86_stdcallcc void @_Z5test1v // VECTORCALL: define{{.*}} x86_vectorcallcc void @_Z5test1v // REGCALL: define{{.*}} x86_regcallcc void @_Z17__regcall3__test1v +// RTDCALL: define{{.*}} m68k_rtdcc void @_Z5test1v void test1() {} -// fastcall, stdcall, vectorcall and regcall do not support variadic functions. +// fastcall, stdcall, vectorcall, regcall and m68k_rtd do not support variadic functions. // CDECL: define{{.*}} void @_Z12testVariadicz // FASTCALL: define{{.*}} void @_Z12testVariadicz // STDCALL: define{{.*}} void @_Z12testVariadicz // VECTORCALL: define{{.*}} void @_Z12testVariadicz // REGCALL: define{{.*}} void @_Z12testVariadicz +// RTDCALL: define{{.*}} void @_Z12testVariadicz void testVariadic(...){} -// ALL: define{{.*}} void @_Z5test2v +// X86: define{{.*}} void @_Z5test2v void __attribute__((cdecl)) test2() {} -// ALL: define{{.*}} x86_fastcallcc void @_Z5test3v +// X86: define{{.*}} x86_fastcallcc void @_Z5test3v void __attribute__((fastcall)) test3() {} -// ALL: define{{.*}} x86_stdcallcc void @_Z5test4v +// X86: define{{.*}} x86_stdcallcc void @_Z5test4v void __attribute__((stdcall)) test4() {} -// ALL: define{{.*}} x86_vectorcallcc void @_Z5test5v +// X86: define{{.*}} x86_vectorcallcc void @_Z5test5v void __attribute__((vectorcall)) test5() {} -// ALL: define{{.*}} x86_regcallcc void @_Z17__regcall3__test6v +// X86: define{{.*}} x86_regcallcc void @_Z17__regcall3__test6v void __attribute__((regcall)) test6() {} +// RTDCALL: define{{.*}} m68k_rtdcc void @_Z5test7v +void __attribute__((m68k_rtd)) test7() {} + // ALL: define linkonce_odr void @_ZN1A11test_memberEv class A { public: @@ -47,6 +54,10 @@ class A { void test() { A a; a.test_member(); + +// ALL: define internal void @"_ZZ{{.*}}testvENK3$_0clEi" + auto f = [](int b) {}; + f(87); } // ALL: define{{.*}} i32 @main diff --git a/clang/test/CodeGenCXX/m68k-rtdcall.cpp b/clang/test/CodeGenCXX/m68k-rtdcall.cpp new file mode 100644 index 0000000000000..835649359ae15 --- /dev/null +++ b/clang/test/CodeGenCXX/m68k-rtdcall.cpp @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -triple m68k-linux-gnu -emit-llvm -o - %s | FileCheck %s + +class A { +public: +// CHECK: define{{.*}} m68k_rtdcc void @_ZN1A6memberEv + void __attribute__((m68k_rtd)) member() {} +}; + +void test() { + A a; + a.member(); + +// CHECK: define{{.*}} m68k_rtdcc void @"_ZZ4testvENK3$_0clEi" + auto f = [](int b) __attribute__((m68k_rtd)) {}; + f(87); +}; diff --git a/clang/test/Sema/m68k-rtdcall.c b/clang/test/Sema/m68k-rtdcall.c new file mode 100644 index 0000000000000..114af64aaa5a7 --- /dev/null +++ b/clang/test/Sema/m68k-rtdcall.c @@ -0,0 +1,46 @@ +// RUN: %clang_cc1 -triple m68k-unknown-unknown -mrtd -std=c89 -verify -verify=rtd %s +// RUN: %clang_cc1 -triple m68k-unknown-unknown -std=c89 -verify -verify=nortd %s + +// rtd-error@+2 {{function with no prototype cannot use the m68k_rtd calling convention}} +void foo(int arg) { + bar(arg); +} + +// nortd-note@+4 {{previous declaration is here}} +// nortd-error@+4 {{function declared 'm68k_rtd' here was previously declared without calling convention}} +// nortd-note@+4 {{previous declaration is here}} +// nortd-error@+4 {{function declared 'm68k_rtd' here was previously declared without calling convention}} +void nonvariadic1(int a, int b, int c); +void __attribute__((m68k_rtd)) nonvariadic1(int a, int b, int c); +void nonvariadic2(int a, int b, int c); +void __attribute__((m68k_rtd)) nonvariadic2(int a, int b, int c) { } + +// expected-error@+2 {{variadic function cannot use m68k_rtd calling convention}} +void variadic(int a, ...); +void __attribute__((m68k_rtd)) variadic(int a, ...); + +// rtd-note@+2 {{previous declaration is here}} +// rtd-error@+2 {{redeclaration of 'a' with a different type: 'void ((*))(int, int) __attribute__((cdecl))' vs 'void (*)(int, int) __attribute__((m68k_rtd))'}} +extern void (*a)(int, int); +__attribute__((cdecl)) extern void (*a)(int, int); + +extern void (*b)(int, ...); +__attribute__((cdecl)) extern void (*b)(int, ...); + +// nortd-note@+2 {{previous declaration is here}} +// nortd-error@+2 {{redeclaration of 'c' with a different type: 'void ((*))(int, int) __attribute__((m68k_rtd))' vs 'void (*)(int, int)'}} +extern void (*c)(int, int); +__attribute__((m68k_rtd)) extern void (*c)(int, int); + +// expected-error@+2 {{variadic function cannot use m68k_rtd calling convention}} +extern void (*d)(int, ...); +__attribute__((m68k_rtd)) extern void (*d)(int, ...); + +// expected-warning@+1 {{'m68k_rtd' only applies to function types; type here is 'int'}} +__attribute__((m68k_rtd)) static int g = 0; + +// expected-error@+1 {{'m68k_rtd' attribute takes no arguments}} +void __attribute__((m68k_rtd("invalid"))) z(int a); + +// expected-error@+1 {{function with no prototype cannot use the m68k_rtd calling convention}} +void __attribute__((m68k_rtd)) e(); diff --git a/clang/test/SemaCXX/m68k-rtdcall.cpp b/clang/test/SemaCXX/m68k-rtdcall.cpp new file mode 100644 index 0000000000000..31f4bceafd955 --- /dev/null +++ b/clang/test/SemaCXX/m68k-rtdcall.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -triple m68k-linux-gnu -fsyntax-only %s + +class A { +public: + void __attribute__((m68k_rtd)) member() {} +}; + +void test() { + A a; + a.member(); + + auto f = [](int b) __attribute__((m68k_rtd)) {}; + f(87); +}; diff --git a/clang/tools/libclang/CXType.cpp b/clang/tools/libclang/CXType.cpp index eb8bfc25a7c91..3d620d3bfb260 100644 --- a/clang/tools/libclang/CXType.cpp +++ b/clang/tools/libclang/CXType.cpp @@ -678,6 +678,7 @@ CXCallingConv clang_getFunctionTypeCallingConv(CXType X) { TCALLINGCONV(SwiftAsync); TCALLINGCONV(PreserveMost); TCALLINGCONV(PreserveAll); + TCALLINGCONV(M68kRTD); case CC_SpirFunction: return CXCallingConv_Unexposed; case CC_AMDGPUKernelCall: return CXCallingConv_Unexposed; case CC_OpenCLKernel: return CXCallingConv_Unexposed; From 3049ac44e638c1af5177dc923f5f0675e9213d2a Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Sun, 15 Oct 2023 19:37:54 -0400 Subject: [PATCH 177/720] [mlir][vector] Enable transfer op hoisting with dynamic indices (#68500) Recent changes (https://github.com/llvm/llvm-project/pull/66930) disabled vector transfer ops hoisting with view-like intermediate ops. The recommended way is to fold subview ops into transfer op indices before invoking hoisting. That would mean now we see transfer op indices involving dynamic values, instead of static constant values before with subview ops. Therefore hoisting won't kick in anymore. This breaks downstream users. To fix it, this commit enables hoisting transfer ops with dynamic indices by using `ValueBoundsConstraintSet` to prove ranges are disjoint in `isDisjointTransferIndices`. Given that utility is used in many places including op folders, right now we introduce a flag to it and only set as true for "heavy" transforms in hoisting and load-store forwarding. --- .../Affine/IR/ValueBoundsOpInterfaceImpl.h | 12 +- .../mlir/Dialect/Vector/IR/VectorOps.h | 19 ++- .../mlir/Interfaces/ValueBoundsOpInterface.h | 10 ++ .../Affine/IR/ValueBoundsOpInterfaceImpl.cpp | 9 +- .../Dialect/Linalg/Transforms/Hoisting.cpp | 12 +- mlir/lib/Dialect/Vector/IR/CMakeLists.txt | 2 + mlir/lib/Dialect/Vector/IR/VectorOps.cpp | 65 +++++++-- .../Transforms/VectorTransferOpTransforms.cpp | 6 +- .../lib/Interfaces/ValueBoundsOpInterface.cpp | 27 ++-- mlir/test/Dialect/Linalg/hoisting.mlir | 132 ++++++++++++++++++ .../Dialect/Vector/vector-transferop-opt.mlir | 104 ++++++++++++++ .../Dialect/Affine/TestReifyValueBounds.cpp | 30 ++-- .../llvm-project-overlay/mlir/BUILD.bazel | 2 + 13 files changed, 370 insertions(+), 60 deletions(-) diff --git a/mlir/include/mlir/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.h b/mlir/include/mlir/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.h index 5d4774861bdfd..6e617ef40a53d 100644 --- a/mlir/include/mlir/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.h +++ b/mlir/include/mlir/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.h @@ -18,16 +18,18 @@ class Value; namespace affine { void registerValueBoundsOpInterfaceExternalModels(DialectRegistry ®istry); -/// Compute whether the given values are equal. Return "failure" if equality -/// could not be determined. `value1`/`value2` must be index-typed. +/// Compute a constant delta of the given two values. Return "failure" if we +/// cannot determine a constant delta. `value1`/`value2` must be index-typed. /// -/// This function is similar to `ValueBoundsConstraintSet::areEqual`. To work -/// around limitations in `FlatLinearConstraints`, this function fully composes +/// This function is similar to +/// `ValueBoundsConstraintSet::computeConstantDistance`. To work around +/// limitations in `FlatLinearConstraints`, this function fully composes /// `value1` and `value2` (if they are the result of affine.apply ops) before /// populating the constraint set. The folding/composing logic can see /// opportunities for simplifications that the constraint set implementation /// cannot see. -FailureOr fullyComposeAndCheckIfEqual(Value value1, Value value2); +FailureOr fullyComposeAndComputeConstantDelta(Value value1, + Value value2); } // namespace affine } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h index fc0c80036ff79..9ab20e20d9754 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h @@ -105,16 +105,23 @@ bool checkSameValueRAW(TransferWriteOp defWrite, TransferReadOp read); /// op. bool checkSameValueWAW(TransferWriteOp write, TransferWriteOp priorWrite); -/// Same behavior as `isDisjointTransferSet` but doesn't require the operations -/// to have the same tensor/memref. This allows comparing operations accessing -/// different tensors. +/// Return true if we can prove that the transfer operations access disjoint +/// memory, without requring the accessed tensor/memref to be the same. +/// +/// If `testDynamicValueUsingBounds` is true, tries to test dynamic values +/// via ValueBoundsOpInterface. bool isDisjointTransferIndices(VectorTransferOpInterface transferA, - VectorTransferOpInterface transferB); + VectorTransferOpInterface transferB, + bool testDynamicValueUsingBounds = false); /// Return true if we can prove that the transfer operations access disjoint -/// memory. +/// memory, requiring the operations to access the same tensor/memref. +/// +/// If `testDynamicValueUsingBounds` is true, tries to test dynamic values +/// via ValueBoundsOpInterface. bool isDisjointTransferSet(VectorTransferOpInterface transferA, - VectorTransferOpInterface transferB); + VectorTransferOpInterface transferB, + bool testDynamicValueUsingBounds = false); /// Return the result value of reducing two scalar/vector values with the /// corresponding arith operation. diff --git a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h index 2687d79aec68e..8f11c563e0cbd 100644 --- a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h +++ b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h @@ -176,6 +176,16 @@ class ValueBoundsConstraintSet { presburger::BoundType type, AffineMap map, ValueDimList mapOperands, StopConditionFn stopCondition = nullptr, bool closedUB = false); + /// Compute a constant delta between the given two values. Return "failure" + /// if a constant delta could not be determined. + /// + /// `dim1`/`dim2` must be `nullopt` if and only if `value1`/`value2` are + /// index-typed. + static FailureOr + computeConstantDelta(Value value1, Value value2, + std::optional dim1 = std::nullopt, + std::optional dim2 = std::nullopt); + /// Compute whether the given values/dimensions are equal. Return "failure" if /// equality could not be determined. /// diff --git a/mlir/lib/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.cpp b/mlir/lib/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.cpp index d47c8eb8ccb42..e0c3abe7a0f71 100644 --- a/mlir/lib/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.cpp @@ -103,8 +103,8 @@ void mlir::affine::registerValueBoundsOpInterfaceExternalModels( }); } -FailureOr mlir::affine::fullyComposeAndCheckIfEqual(Value value1, - Value value2) { +FailureOr +mlir::affine::fullyComposeAndComputeConstantDelta(Value value1, Value value2) { assert(value1.getType().isIndex() && "expected index type"); assert(value2.getType().isIndex() && "expected index type"); @@ -123,9 +123,6 @@ FailureOr mlir::affine::fullyComposeAndCheckIfEqual(Value value1, ValueDimList valueDims; for (Value v : mapOperands) valueDims.push_back({v, std::nullopt}); - FailureOr bound = ValueBoundsConstraintSet::computeConstantBound( + return ValueBoundsConstraintSet::computeConstantBound( presburger::BoundType::EQ, map, valueDims); - if (failed(bound)) - return failure(); - return *bound == 0; } diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp index 221bec713b38a..cbb2c507de69f 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp @@ -173,16 +173,16 @@ void mlir::linalg::hoistRedundantVectorTransfers(func::FuncOp func) { if (auto transferWriteUse = dyn_cast(use.getOwner())) { if (!vector::isDisjointTransferSet( - cast(transferWrite.getOperation()), - cast( - transferWriteUse.getOperation()))) + cast(*transferWrite), + cast(*transferWriteUse), + /*testDynamicValueUsingBounds=*/true)) return WalkResult::advance(); } else if (auto transferReadUse = dyn_cast(use.getOwner())) { if (!vector::isDisjointTransferSet( - cast(transferWrite.getOperation()), - cast( - transferReadUse.getOperation()))) + cast(*transferWrite), + cast(*transferReadUse), + /*testDynamicValueUsingBounds=*/true)) return WalkResult::advance(); } else { // Unknown use, we cannot prove that it doesn't alias with the diff --git a/mlir/lib/Dialect/Vector/IR/CMakeLists.txt b/mlir/lib/Dialect/Vector/IR/CMakeLists.txt index 9ec919423b342..70f3fa8c297d4 100644 --- a/mlir/lib/Dialect/Vector/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Vector/IR/CMakeLists.txt @@ -11,6 +11,7 @@ add_mlir_dialect_library(MLIRVectorDialect MLIRVectorAttributesIncGen LINK_LIBS PUBLIC + MLIRAffineDialect MLIRArithDialect MLIRControlFlowInterfaces MLIRDataLayoutInterfaces @@ -22,5 +23,6 @@ add_mlir_dialect_library(MLIRVectorDialect MLIRMemRefDialect MLIRSideEffectInterfaces MLIRTensorDialect + MLIRValueBoundsOpInterface MLIRVectorInterfaces ) diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index 044b6cc07d3d6..68a5cf209f2fb 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -13,6 +13,7 @@ #include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" @@ -30,6 +31,7 @@ #include "mlir/IR/OpImplementation.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/TypeUtilities.h" +#include "mlir/Interfaces/ValueBoundsOpInterface.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" @@ -168,39 +170,76 @@ bool mlir::vector::checkSameValueWAW(vector::TransferWriteOp write, } bool mlir::vector::isDisjointTransferIndices( - VectorTransferOpInterface transferA, VectorTransferOpInterface transferB) { + VectorTransferOpInterface transferA, VectorTransferOpInterface transferB, + bool testDynamicValueUsingBounds) { // For simplicity only look at transfer of same type. if (transferA.getVectorType() != transferB.getVectorType()) return false; unsigned rankOffset = transferA.getLeadingShapedRank(); for (unsigned i = 0, e = transferA.indices().size(); i < e; i++) { - auto indexA = getConstantIntValue(transferA.indices()[i]); - auto indexB = getConstantIntValue(transferB.indices()[i]); - // If any of the indices are dynamic we cannot prove anything. - if (!indexA.has_value() || !indexB.has_value()) - continue; + Value indexA = transferA.indices()[i]; + Value indexB = transferB.indices()[i]; + std::optional cstIndexA = getConstantIntValue(indexA); + std::optional cstIndexB = getConstantIntValue(indexB); if (i < rankOffset) { // For leading dimensions, if we can prove that index are different we // know we are accessing disjoint slices. - if (*indexA != *indexB) - return true; + if (cstIndexA.has_value() && cstIndexB.has_value()) { + if (*cstIndexA != *cstIndexB) + return true; + continue; + } + if (testDynamicValueUsingBounds) { + // First try to see if we can fully compose and simplify the affine + // expression as a fast track. + FailureOr delta = + affine::fullyComposeAndComputeConstantDelta(indexA, indexB); + if (succeeded(delta) && *delta != 0) + return true; + + FailureOr testEqual = + ValueBoundsConstraintSet::areEqual(indexA, indexB); + if (succeeded(testEqual) && !testEqual.value()) + return true; + } } else { // For this dimension, we slice a part of the memref we need to make sure // the intervals accessed don't overlap. - int64_t distance = std::abs(*indexA - *indexB); - if (distance >= transferA.getVectorType().getDimSize(i - rankOffset)) - return true; + int64_t vectorDim = transferA.getVectorType().getDimSize(i - rankOffset); + if (cstIndexA.has_value() && cstIndexB.has_value()) { + int64_t distance = std::abs(*cstIndexA - *cstIndexB); + if (distance >= vectorDim) + return true; + continue; + } + if (testDynamicValueUsingBounds) { + // First try to see if we can fully compose and simplify the affine + // expression as a fast track. + FailureOr delta = + affine::fullyComposeAndComputeConstantDelta(indexA, indexB); + if (succeeded(delta) && std::abs(*delta) >= vectorDim) + return true; + + FailureOr computeDelta = + ValueBoundsConstraintSet::computeConstantDelta(indexA, indexB); + if (succeeded(computeDelta)) { + if (std::abs(computeDelta.value()) >= vectorDim) + return true; + } + } } } return false; } bool mlir::vector::isDisjointTransferSet(VectorTransferOpInterface transferA, - VectorTransferOpInterface transferB) { + VectorTransferOpInterface transferB, + bool testDynamicValueUsingBounds) { if (transferA.source() != transferB.source()) return false; - return isDisjointTransferIndices(transferA, transferB); + return isDisjointTransferIndices(transferA, transferB, + testDynamicValueUsingBounds); } // Helper to iterate over n-D vector slice elements. Calculate the next diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp index 603b88f11c8e0..a5f1b28152b9b 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp @@ -142,7 +142,8 @@ void TransferOptimization::deadStoreOp(vector::TransferWriteOp write) { // Don't need to consider disjoint accesses. if (vector::isDisjointTransferSet( cast(write.getOperation()), - cast(transferOp.getOperation()))) + cast(transferOp.getOperation()), + /*testDynamicValueUsingBounds=*/true)) continue; } blockingAccesses.push_back(user); @@ -217,7 +218,8 @@ void TransferOptimization::storeToLoadForwarding(vector::TransferReadOp read) { // the write. if (vector::isDisjointTransferSet( cast(write.getOperation()), - cast(read.getOperation()))) + cast(read.getOperation()), + /*testDynamicValueUsingBounds=*/true)) continue; if (write.getSource() == read.getSource() && dominators.dominates(write, read) && checkSameValueRAW(write, read)) { diff --git a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp index c00ee0315a963..ff941115219f6 100644 --- a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp +++ b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp @@ -484,25 +484,32 @@ FailureOr ValueBoundsConstraintSet::computeConstantBound( return failure(); } -FailureOr -ValueBoundsConstraintSet::areEqual(Value value1, Value value2, - std::optional dim1, - std::optional dim2) { +FailureOr +ValueBoundsConstraintSet::computeConstantDelta(Value value1, Value value2, + std::optional dim1, + std::optional dim2) { #ifndef NDEBUG assertValidValueDim(value1, dim1); assertValidValueDim(value2, dim2); #endif // NDEBUG - // Subtract the two values/dimensions from each other. If the result is 0, - // both are equal. Builder b(value1.getContext()); AffineMap map = AffineMap::get(/*dimCount=*/2, /*symbolCount=*/0, b.getAffineDimExpr(0) - b.getAffineDimExpr(1)); - FailureOr bound = computeConstantBound( - presburger::BoundType::EQ, map, {{value1, dim1}, {value2, dim2}}); - if (failed(bound)) + return computeConstantBound(presburger::BoundType::EQ, map, + {{value1, dim1}, {value2, dim2}}); +} + +FailureOr +ValueBoundsConstraintSet::areEqual(Value value1, Value value2, + std::optional dim1, + std::optional dim2) { + // Subtract the two values/dimensions from each other. If the result is 0, + // both are equal. + FailureOr delta = computeConstantDelta(value1, value2, dim1, dim2); + if (failed(delta)) return failure(); - return *bound == 0; + return *delta == 0; } ValueBoundsConstraintSet::BoundBuilder & diff --git a/mlir/test/Dialect/Linalg/hoisting.mlir b/mlir/test/Dialect/Linalg/hoisting.mlir index 7d0c3648c344b..11bf4b58b95c8 100644 --- a/mlir/test/Dialect/Linalg/hoisting.mlir +++ b/mlir/test/Dialect/Linalg/hoisting.mlir @@ -872,3 +872,135 @@ transform.sequence failures(propagate) { transform.structured.hoist_redundant_vector_transfers %0 : (!transform.any_op) -> !transform.any_op } + +// ----- + +// Test that we can hoist out 1-D read-write pairs whose indices are dynamic values. + +// CHECK: #[[$MAP1:.+]] = affine_map<()[s0] -> (s0 + 1)> +// CHECK: #[[$MAP4:.+]] = affine_map<()[s0] -> (s0 + 4)> + +// CHECK-LABEL: func.func @hoist_vector_transfer_pairs_disjoint_dynamic +// CHECK-SAME: (%[[BUFFER:.+]]: memref, %{{.+}}: index, %{{.+}}: index, %{{.+}}: index, %[[I0:.+]]: index) + +// CHECK: %[[PLUS1:.+]] = affine.apply #[[$MAP1]]()[%[[I0]]] +// CHECK: %[[PLUS4:.+]] = affine.apply #[[$MAP4]]()[%[[I0]]] +// CHECK: %2 = vector.transfer_read %[[BUFFER]][%[[I0]], %[[I0]]] +// CHECK: %3 = vector.transfer_read %[[BUFFER]][%[[PLUS1]], %[[I0]]] +// CHECK: %4 = vector.transfer_read %[[BUFFER]][%[[PLUS1]], %[[PLUS4]]] +// CHECK-COUNT-2: scf.for %{{.+}} = {{.+}} -> (vector<4xf32>, vector<4xf32>, vector<4xf32>) +// CHECK-COUNT-3: "some_use" +// CHECK-COUNT-2: scf.yield {{.+}} : vector<4xf32>, vector<4xf32>, vector<4xf32> +// CHECK: vector.transfer_write %{{.+}}, %[[BUFFER]][%[[PLUS1]], %[[PLUS4]]] +// CHECK: vector.transfer_write %{{.+}}, %[[BUFFER]][%[[PLUS1]], %[[I0]]] +// CHECK: vector.transfer_write %{{.+}}, %[[BUFFER]][%[[I0]], %[[I0]]] + +func.func @hoist_vector_transfer_pairs_disjoint_dynamic( + %buffer: memref, %lb : index, %ub : index, %step: index, %i0 : index) { + %cst = arith.constant 0.0 : f32 + %i1 = affine.apply affine_map<(d0) -> (d0 + 1)>(%i0) + %i2 = affine.apply affine_map<(d0) -> (d0 + 4)>(%i0) + + scf.for %i = %lb to %ub step %step { + scf.for %j = %lb to %ub step %step { + %r0 = vector.transfer_read %buffer[%i0, %i0], %cst: memref, vector<4xf32> + // Disjoint leading dim + %r1 = vector.transfer_read %buffer[%i1, %i0], %cst: memref, vector<4xf32> + // Non-overlap trailing dim + %r2 = vector.transfer_read %buffer[%i1, %i2], %cst: memref, vector<4xf32> + %u0 = "some_use"(%r0) : (vector<4xf32>) -> vector<4xf32> + %u1 = "some_use"(%r1) : (vector<4xf32>) -> vector<4xf32> + %u2 = "some_use"(%r2) : (vector<4xf32>) -> vector<4xf32> + vector.transfer_write %u0, %buffer[%i0, %i0] : vector<4xf32>, memref + vector.transfer_write %u1, %buffer[%i1, %i0] : vector<4xf32>, memref + vector.transfer_write %u2, %buffer[%i1, %i2] : vector<4xf32>, memref + } + } + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["func.func"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + transform.structured.hoist_redundant_vector_transfers %0 + : (!transform.any_op) -> !transform.any_op +} + +// ----- + +// Test that we cannot hoist out read-write pairs whose indices are overlapping. + +// CHECK-LABEL: func.func @hoist_vector_transfer_pairs_overlapping_dynamic +// CHECK-COUNT-2: scf.for +// CHECK-COUNT-2: vector.transfer_read +// CHECK-COUNT-2: vector.transfer_write + +func.func @hoist_vector_transfer_pairs_overlapping_dynamic( + %buffer: memref, %lb : index, %ub : index, %step: index, %i0 : index) { + %cst = arith.constant 0.0 : f32 + %i1 = affine.apply affine_map<(d0) -> (d0 + 3)>(%i0) + + scf.for %i = %lb to %ub step %step { + scf.for %j = %lb to %ub step %step { + %r0 = vector.transfer_read %buffer[%i0, %i0], %cst: memref, vector<4xf32> + // Overlapping range with the above + %r1 = vector.transfer_read %buffer[%i0, %i1], %cst: memref, vector<4xf32> + %u0 = "some_use"(%r0) : (vector<4xf32>) -> vector<4xf32> + %u1 = "some_use"(%r1) : (vector<4xf32>) -> vector<4xf32> + vector.transfer_write %u0, %buffer[%i0, %i0] : vector<4xf32>, memref + vector.transfer_write %u1, %buffer[%i0, %i1] : vector<4xf32>, memref + } + } + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["func.func"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + transform.structured.hoist_redundant_vector_transfers %0 + : (!transform.any_op) -> !transform.any_op +} + +// ----- + +// Test that we can hoist out 2-D read-write pairs whose indices are dynamic values. + +// CHECK-LABEL: func.func @hoist_vector_transfer_pairs_disjoint_dynamic +// CHECK-COUNT-3: vector.transfer_read +// CHECK-COUNT-2: %{{.+}}:3 = scf.for {{.+}} -> (vector<16x8xf32>, vector<16x8xf32>, vector<16x8xf32>) +// CHECK-COUNT-2: scf.yield {{.+}} : vector<16x8xf32>, vector<16x8xf32>, vector<16x8xf32> +// CHECK-COUNT-3: vector.transfer_write +// CHECK: return + +func.func @hoist_vector_transfer_pairs_disjoint_dynamic( + %buffer: memref, %lb : index, %ub : index, %step: index, %i0 : index, %i1 : index) { + %cst = arith.constant 0.0 : f32 + %i2 = affine.apply affine_map<(d0) -> ((d0 floordiv 32) * 16)>(%i1) + %i3 = affine.apply affine_map<(d0) -> ((d0 floordiv 32) * 16 + 8)>(%i1) + %i4 = affine.apply affine_map<(d0) -> ((d0 floordiv 32) * 16 + 16)>(%i1) + + scf.for %i = %lb to %ub step %step { + scf.for %j = %lb to %ub step %step { + %r0 = vector.transfer_read %buffer[%i0, %i2], %cst: memref, vector<16x8xf32> + %r1 = vector.transfer_read %buffer[%i0, %i3], %cst: memref, vector<16x8xf32> + %r2 = vector.transfer_read %buffer[%i0, %i4], %cst: memref, vector<16x8xf32> + %u0 = "some_use"(%r0) : (vector<16x8xf32>) -> vector<16x8xf32> + %u1 = "some_use"(%r1) : (vector<16x8xf32>) -> vector<16x8xf32> + %u2 = "some_use"(%r2) : (vector<16x8xf32>) -> vector<16x8xf32> + vector.transfer_write %u2, %buffer[%i0, %i4] : vector<16x8xf32>, memref + vector.transfer_write %u1, %buffer[%i0, %i3] : vector<16x8xf32>, memref + vector.transfer_write %u0, %buffer[%i0, %i2] : vector<16x8xf32>, memref + } + } + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["func.func"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + transform.structured.hoist_redundant_vector_transfers %0 + : (!transform.any_op) -> !transform.any_op +} diff --git a/mlir/test/Dialect/Vector/vector-transferop-opt.mlir b/mlir/test/Dialect/Vector/vector-transferop-opt.mlir index f43367ab4aeba..13957af014b89 100644 --- a/mlir/test/Dialect/Vector/vector-transferop-opt.mlir +++ b/mlir/test/Dialect/Vector/vector-transferop-opt.mlir @@ -256,3 +256,107 @@ func.func @collapse_shape(%in_0: memref<1x20x1xi32>, %vec: vector<4xi32>) { } return } + +// CHECK-LABEL: func @forward_dead_store_dynamic_same_index +// CHECK-NOT: vector.transfer_write +// CHECK-NOT: vector.transfer_read +// CHECK: scf.for +// CHECK: } +// CHECK: vector.transfer_write +// CHECK: return +func.func @forward_dead_store_dynamic_same_index( + %buffer : memref, %v0 : vector<4xf32>, %v1 : vector<4xf32>, %i : index) { + %c1 = arith.constant 1 : index + %c4 = arith.constant 4 : index + %c0 = arith.constant 0 : index + %cf0 = arith.constant 0.0 : f32 + vector.transfer_write %v0, %buffer[%i, %i] {in_bounds = [true]} : vector<4xf32>, memref + // The following transfer op reads/writes to the same address so that we can forward. + %0 = vector.transfer_read %buffer[%i, %i], %cf0 {in_bounds = [true]} : memref, vector<4xf32> + %x = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%acc = %0) -> (vector<4xf32>) { + %1 = arith.addf %acc, %acc : vector<4xf32> + scf.yield %1 : vector<4xf32> + } + vector.transfer_write %x, %buffer[%i, %i] {in_bounds = [true]} : vector<4xf32>, memref + return +} + +// CHECK-LABEL: func @dont_forward_dead_store_dynamic_overlap +// CHECK-COUNT-2: vector.transfer_write +// CHECK: vector.transfer_read +// CHECK: scf.for +// CHECK: } +// CHECK: vector.transfer_write +// CHECK: return +func.func @dont_forward_dead_store_dynamic_overlap( + %buffer : memref, %v0 : vector<4xf32>, %v1 : vector<4xf32>, %i0 : index) { + %c1 = arith.constant 1 : index + %c4 = arith.constant 4 : index + %c0 = arith.constant 0 : index + %cf0 = arith.constant 0.0 : f32 + %i1 = affine.apply affine_map<(d0) -> (d0 + 3)>(%i0) + vector.transfer_write %v0, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref + // The following transfer op writes to an overlapping range so we cannot forward. + vector.transfer_write %v0, %buffer[%i0, %i1] {in_bounds = [true]} : vector<4xf32>, memref + %0 = vector.transfer_read %buffer[%i0, %i0], %cf0 {in_bounds = [true]} : memref, vector<4xf32> + %x = scf.for %iv = %c0 to %c4 step %c1 iter_args(%acc = %0) -> (vector<4xf32>) { + %1 = arith.addf %acc, %acc : vector<4xf32> + scf.yield %1 : vector<4xf32> + } + vector.transfer_write %x, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref + return +} + +// CHECK-LABEL: func @forward_dead_store_dynamic_non_overlap_leading_dim +// CHECK: vector.transfer_write +// CHECK-NOT: vector.transfer_write +// CHECK-NOT: vector.transfer_read +// CHECK: scf.for +// CHECK: } +// CHECK: vector.transfer_write +// CHECK: return +func.func @forward_dead_store_dynamic_non_overlap_leading_dim( + %buffer : memref, %v0 : vector<4xf32>, %v1 : vector<4xf32>, %i0 : index) { + %c1 = arith.constant 1 : index + %c4 = arith.constant 4 : index + %c0 = arith.constant 0 : index + %cf0 = arith.constant 0.0 : f32 + %i1 = affine.apply affine_map<(d0) -> (d0 + 1)>(%i0) + vector.transfer_write %v0, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref + // The following transfer op writes to an non-overlapping range so we can forward. + vector.transfer_write %v0, %buffer[%i1, %i0] {in_bounds = [true]} : vector<4xf32>, memref + %0 = vector.transfer_read %buffer[%i0, %i0], %cf0 {in_bounds = [true]} : memref, vector<4xf32> + %x = scf.for %iv = %c0 to %c4 step %c1 iter_args(%acc = %0) -> (vector<4xf32>) { + %1 = arith.addf %acc, %acc : vector<4xf32> + scf.yield %1 : vector<4xf32> + } + vector.transfer_write %x, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref + return +} + +// CHECK-LABEL: func @forward_dead_store_dynamic_non_overlap_trailing_dim +// CHECK: vector.transfer_write +// CHECK-NOT: vector.transfer_write +// CHECK-NOT: vector.transfer_read +// CHECK: scf.for +// CHECK: } +// CHECK: vector.transfer_write +// CHECK: return +func.func @forward_dead_store_dynamic_non_overlap_trailing_dim( + %buffer : memref, %v0 : vector<4xf32>, %v1 : vector<4xf32>, %i0 : index) { + %c1 = arith.constant 1 : index + %c4 = arith.constant 4 : index + %c0 = arith.constant 0 : index + %cf0 = arith.constant 0.0 : f32 + %i1 = affine.apply affine_map<(d0) -> (d0 + 4)>(%i0) + vector.transfer_write %v0, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref + // The following transfer op writes to an non-overlapping range so we can forward. + vector.transfer_write %v0, %buffer[%i0, %i1] {in_bounds = [true]} : vector<4xf32>, memref + %0 = vector.transfer_read %buffer[%i0, %i0], %cf0 {in_bounds = [true]} : memref, vector<4xf32> + %x = scf.for %iv = %c0 to %c4 step %c1 iter_args(%acc = %0) -> (vector<4xf32>) { + %1 = arith.addf %acc, %acc : vector<4xf32> + scf.yield %1 : vector<4xf32> + } + vector.transfer_write %x, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref + return +} diff --git a/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp b/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp index 6e3c3dff759a2..2f1631cbdb02e 100644 --- a/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp +++ b/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp @@ -187,20 +187,26 @@ static LogicalResult testEquality(func::FuncOp funcOp) { op->emitOpError("invalid op"); return WalkResult::skip(); } - FailureOr equal = failure(); if (op->hasAttr("compose")) { - equal = affine::fullyComposeAndCheckIfEqual(op->getOperand(0), - op->getOperand(1)); - } else { - equal = ValueBoundsConstraintSet::areEqual(op->getOperand(0), - op->getOperand(1)); - } - if (failed(equal)) { - op->emitError("could not determine equality"); - } else if (*equal) { - op->emitRemark("equal"); + FailureOr equal = affine::fullyComposeAndComputeConstantDelta( + op->getOperand(0), op->getOperand(1)); + if (failed(equal)) { + op->emitError("could not determine equality"); + } else if (*equal == 0) { + op->emitRemark("equal"); + } else { + op->emitRemark("different"); + } } else { - op->emitRemark("different"); + FailureOr equal = ValueBoundsConstraintSet::areEqual( + op->getOperand(0), op->getOperand(1)); + if (failed(equal)) { + op->emitError("could not determine equality"); + } else if (*equal) { + op->emitRemark("equal"); + } else { + op->emitRemark("different"); + } } } return WalkResult::advance(); diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index de13e03807e82..63f9cdafce88b 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -4422,6 +4422,7 @@ cc_library( ]), includes = ["include"], deps = [ + ":AffineDialect", ":ArithDialect", ":ArithUtils", ":ControlFlowInterfaces", @@ -4436,6 +4437,7 @@ cc_library( ":SideEffectInterfaces", ":Support", ":TensorDialect", + ":ValueBoundsOpInterface", ":VectorAttributesIncGen", ":VectorDialectIncGen", ":VectorInterfaces", From ab737a86993bc7bf92cbb9d51f47f8825a717333 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Mon, 16 Oct 2023 08:45:48 +0900 Subject: [PATCH 178/720] [mlir][Interfaces] `LoopLikeOpInterface`: Add helper to get yielded values (#67305) Add a new interface method that returns the yielded values. Also add a verifier that checks the number of inits/iter_args/yielded values. Most of the checked invariants (but not all of them) are already covered by the `RegionBranchOpInterface`, but the `LoopLikeOpInterface` now provides (additional) error messages that are easier to read. --- .../include/flang/Optimizer/Dialect/FIROps.td | 4 ++ flang/lib/Optimizer/Dialect/FIROps.cpp | 12 ++++++ .../mlir/Dialect/Affine/IR/AffineOps.td | 3 +- mlir/include/mlir/Dialect/SCF/IR/SCFOps.td | 11 +++++- .../mlir/Interfaces/LoopLikeInterface.h | 5 +++ .../mlir/Interfaces/LoopLikeInterface.td | 30 +++++++++++++++ mlir/lib/Dialect/Affine/IR/AffineOps.cpp | 4 ++ .../Linalg/Transforms/HoistPadding.cpp | 5 +-- mlir/lib/Dialect/SCF/IR/SCF.cpp | 27 ++++++++------ .../BufferizableOpInterfaceImpl.cpp | 3 +- .../SCF/Transforms/LoopCanonicalization.cpp | 5 +-- mlir/lib/Interfaces/LoopLikeInterface.cpp | 37 +++++++++++++++++++ mlir/test/Dialect/SCF/invalid.mlir | 30 ++++++++++++++- mlir/test/lib/Dialect/SCF/TestSCFUtils.cpp | 5 +-- 14 files changed, 153 insertions(+), 28 deletions(-) diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 2b877379f1384..80d1635e50da2 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -2119,6 +2119,8 @@ def fir_DoLoopOp : region_Op<"do_loop", mlir::Operation::operand_range getIterOperands() { return getOperands().drop_front(getNumControlOperands()); } + mlir::OperandRange getInits() { return getIterOperands(); } + mlir::ValueRange getYieldedValues(); void setLowerBound(mlir::Value bound) { (*this)->setOperand(0, bound); } void setUpperBound(mlir::Value bound) { (*this)->setOperand(1, bound); } @@ -2270,6 +2272,8 @@ def fir_IterWhileOp : region_Op<"iterate_while", mlir::Operation::operand_range getIterOperands() { return getOperands().drop_front(getNumControlOperands()); } + mlir::OperandRange getInits() { return getIterOperands(); } + mlir::ValueRange getYieldedValues(); void setLowerBound(mlir::Value bound) { (*this)->setOperand(0, bound); } void setUpperBound(mlir::Value bound) { (*this)->setOperand(1, bound); } diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index c35147f6d07b8..38311832f20dd 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -1933,6 +1933,12 @@ mlir::Value fir::IterWhileOp::blockArgToSourceOp(unsigned blockArgNum) { return {}; } +mlir::ValueRange fir::IterWhileOp::getYieldedValues() { + auto *term = getRegion().front().getTerminator(); + return getFinalValue() ? term->getOperands().drop_front() + : term->getOperands(); +} + //===----------------------------------------------------------------------===// // LenParamIndexOp //===----------------------------------------------------------------------===// @@ -2238,6 +2244,12 @@ mlir::Value fir::DoLoopOp::blockArgToSourceOp(unsigned blockArgNum) { return {}; } +mlir::ValueRange fir::DoLoopOp::getYieldedValues() { + auto *term = getRegion().front().getTerminator(); + return getFinalValue() ? term->getOperands().drop_front() + : term->getOperands(); +} + //===----------------------------------------------------------------------===// // DTEntryOp //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td index f90a7b23ec12e..36fdf390a7617 100644 --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td @@ -121,7 +121,8 @@ def AffineForOp : Affine_Op<"for", ImplicitAffineTerminator, ConditionallySpeculatable, RecursiveMemoryEffects, DeclareOpInterfaceMethods, + "getSingleUpperBound", "getYieldedValues", + "replaceWithAdditionalYields"]>, DeclareOpInterfaceMethods]> { let summary = "for operation"; diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td index e1a604a88715f..f2ea7dd868a37 100644 --- a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td @@ -122,8 +122,8 @@ def ExecuteRegionOp : SCF_Op<"execute_region", [ def ForOp : SCF_Op<"for", [AutomaticAllocationScope, DeclareOpInterfaceMethods, + "getSingleStep", "getSingleUpperBound", "getYieldedValues", + "promoteIfSingleIteration", "replaceWithAdditionalYields"]>, AllTypesMatch<["lowerBound", "upperBound", "step"]>, ConditionallySpeculatable, DeclareOpInterfaceMethods; Value getInductionVar() { return getBody()->getArgument(0); } + Block::BlockArgListType getRegionIterArgs() { return getBody()->getArguments().drop_front(getNumInductionVars()); } + /// Return the `index`-th region iteration argument. BlockArgument getRegionIterArg(unsigned index) { assert(index < getNumRegionIterArgs() && @@ -1086,6 +1088,11 @@ def WhileOp : SCF_Op<"while", ConditionOp getConditionOp(); YieldOp getYieldOp(); + + /// Return the values that are yielded from the "after" region (by the + /// scf.yield op). + ValueRange getYieldedValues(); + Block::BlockArgListType getBeforeArguments(); Block::BlockArgListType getAfterArguments(); Block *getBeforeBody() { return &getBefore().front(); } diff --git a/mlir/include/mlir/Interfaces/LoopLikeInterface.h b/mlir/include/mlir/Interfaces/LoopLikeInterface.h index 0eebb984e5897..7c7d378d0590a 100644 --- a/mlir/include/mlir/Interfaces/LoopLikeInterface.h +++ b/mlir/include/mlir/Interfaces/LoopLikeInterface.h @@ -24,6 +24,11 @@ class RewriterBase; /// arguments in `newBbArgs`. using NewYieldValuesFn = std::function( OpBuilder &b, Location loc, ArrayRef newBbArgs)>; + +namespace detail { +/// Verify invariants of the LoopLikeOpInterface. +LogicalResult verifyLoopLikeOpInterface(Operation *op); +} // namespace detail } // namespace mlir /// Include the generated interface declarations. diff --git a/mlir/include/mlir/Interfaces/LoopLikeInterface.td b/mlir/include/mlir/Interfaces/LoopLikeInterface.td index ded0a29292ff6..4d2a66dd3143d 100644 --- a/mlir/include/mlir/Interfaces/LoopLikeInterface.td +++ b/mlir/include/mlir/Interfaces/LoopLikeInterface.td @@ -20,6 +20,19 @@ def LoopLikeOpInterface : OpInterface<"LoopLikeOpInterface"> { Contains helper functions to query properties and perform transformations of a loop. Operations that implement this interface will be considered by loop-invariant code motion. + + Loop-carried variables can be exposed through this interface. There are + 3 components to a loop-carried variable. + - The "region iter_arg" is the block argument of the entry block that + represents the loop-carried variable in each iteration. + - The "init value" is an operand of the loop op that serves as the initial + region iter_arg value for the first iteration (if any). + - The "yielded" value is the value that is forwarded from one iteration to + serve as the region iter_arg of the next iteration. + + If one of the respective interface methods is implemented, so must the other + two. The interface verifier ensures that the number of types of the region + iter_args, init values and yielded values match. }]; let cppNamespace = "::mlir"; @@ -141,6 +154,17 @@ def LoopLikeOpInterface : OpInterface<"LoopLikeOpInterface"> { return ::mlir::Block::BlockArgListType(); }] >, + InterfaceMethod<[{ + Return the values that are yielded to the next iteration. + }], + /*retTy=*/"::mlir::ValueRange", + /*methodName=*/"getYieldedValues", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return ::mlir::ValueRange(); + }] + >, InterfaceMethod<[{ Append the specified additional "init" operands: replace this loop with a new loop that has the additional init operands. The loop body of @@ -192,6 +216,12 @@ def LoopLikeOpInterface : OpInterface<"LoopLikeOpInterface"> { }); } }]; + + let verifyWithRegions = 1; + + let verify = [{ + return detail::verifyLoopLikeOpInterface($_op); + }]; } #endif // MLIR_INTERFACES_LOOPLIKEINTERFACE diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp index 113f4cfc31c10..f2b3171c1ab83 100644 --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -2215,6 +2215,10 @@ unsigned AffineForOp::getNumIterOperands() { return getNumOperands() - lbMap.getNumInputs() - ubMap.getNumInputs(); } +ValueRange AffineForOp::getYieldedValues() { + return cast(getBody()->getTerminator()).getOperands(); +} + void AffineForOp::print(OpAsmPrinter &p) { p << ' '; p.printRegionArgument(getBody()->getArgument(0), /*argAttrs=*/{}, diff --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp index 72bd2b409f5d5..8fef99bb37509 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp @@ -811,8 +811,7 @@ padThroughLoopIterArg(RewriterBase &rewriter, Value paddedValueBeforeHoisting, rewriter.setInsertionPointAfter(hoistedPackedTensor.getDefiningOp()); unsigned iterArgNumber = forOp.getResultForOpOperand(*pUse).getResultNumber(); - auto yieldOp = cast(forOp.getBody(0)->getTerminator()); - auto yieldingExtractSliceOp = yieldOp->getOperand(iterArgNumber) + auto yieldingExtractSliceOp = forOp.getYieldedValues()[iterArgNumber] .getDefiningOp(); if (!yieldingExtractSliceOp) return tensor::ExtractSliceOp(); @@ -826,7 +825,7 @@ padThroughLoopIterArg(RewriterBase &rewriter, Value paddedValueBeforeHoisting, SmallVector initArgs = forOp.getInitArgs(); initArgs[iterArgNumber] = hoistedPackedTensor; - SmallVector yieldOperands = yieldOp.getOperands(); + SmallVector yieldOperands = llvm::to_vector(forOp.getYieldedValues()); yieldOperands[iterArgNumber] = yieldingExtractSliceOp.getSource(); int64_t numOriginalForOpResults = initArgs.size(); diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index 8d8481421e18d..508227d6e7ce4 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -400,7 +400,7 @@ LogicalResult ForOp::promoteIfSingleIteration(RewriterBase &rewriter) { // Replace all results with the yielded values. auto yieldOp = cast(getBody()->getTerminator()); - rewriter.replaceAllUsesWith(getResults(), yieldOp.getOperands()); + rewriter.replaceAllUsesWith(getResults(), getYieldedValues()); // Replace block arguments with lower bound (replacement for IV) and // iter_args. @@ -772,27 +772,26 @@ struct ForOpIterArgsFolder : public OpRewritePattern { LogicalResult matchAndRewrite(scf::ForOp forOp, PatternRewriter &rewriter) const final { bool canonicalize = false; - Block &block = forOp.getRegion().front(); - auto yieldOp = cast(block.getTerminator()); // An internal flat vector of block transfer // arguments `newBlockTransferArgs` keeps the 1-1 mapping of original to // transformed block argument mappings. This plays the role of a // IRMapping for the particular use case of calling into // `inlineBlockBefore`. + int64_t numResults = forOp.getNumResults(); SmallVector keepMask; - keepMask.reserve(yieldOp.getNumOperands()); + keepMask.reserve(numResults); SmallVector newBlockTransferArgs, newIterArgs, newYieldValues, newResultValues; - newBlockTransferArgs.reserve(1 + forOp.getInitArgs().size()); + newBlockTransferArgs.reserve(1 + numResults); newBlockTransferArgs.push_back(Value()); // iv placeholder with null value newIterArgs.reserve(forOp.getInitArgs().size()); - newYieldValues.reserve(yieldOp.getNumOperands()); - newResultValues.reserve(forOp.getNumResults()); + newYieldValues.reserve(numResults); + newResultValues.reserve(numResults); for (auto it : llvm::zip(forOp.getInitArgs(), // iter from outside forOp.getRegionIterArgs(), // iter inside region forOp.getResults(), // op results - yieldOp.getOperands() // iter yield + forOp.getYieldedValues() // iter yield )) { // Forwarded is `true` when: // 1) The region `iter` argument is yielded. @@ -946,12 +945,10 @@ struct SimplifyTrivialLoops : public OpRewritePattern { return failure(); // If the loop is empty, iterates at least once, and only returns values // defined outside of the loop, remove it and replace it with yield values. - auto yieldOp = cast(block.getTerminator()); - auto yieldOperands = yieldOp.getOperands(); - if (llvm::any_of(yieldOperands, + if (llvm::any_of(op.getYieldedValues(), [&](Value v) { return !op.isDefinedOutsideOfLoop(v); })) return failure(); - rewriter.replaceOp(op, yieldOperands); + rewriter.replaceOp(op, op.getYieldedValues()); return success(); } }; @@ -1224,6 +1221,10 @@ std::optional ForOp::getConstantStep() { return {}; } +ValueRange ForOp::getYieldedValues() { + return cast(getBody()->getTerminator()).getResults(); +} + Speculation::Speculatability ForOp::getSpeculatability() { // `scf.for (I = Start; I < End; I += 1)` terminates for all values of Start // and End. @@ -3205,6 +3206,8 @@ YieldOp WhileOp::getYieldOp() { return cast(getAfterBody()->getTerminator()); } +ValueRange WhileOp::getYieldedValues() { return getYieldOp().getResults(); } + Block::BlockArgListType WhileOp::getBeforeArguments() { return getBeforeBody()->getArguments(); } diff --git a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp index 0d02a590f2969..455b7d8bcaff0 100644 --- a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp @@ -627,9 +627,8 @@ struct ForOpInterface auto forOp = cast(op); OpOperand &forOperand = forOp.getOpOperandForResult(opResult); auto bbArg = forOp.getRegionIterArgForOpOperand(forOperand); - auto yieldOp = cast(forOp.getBody()->getTerminator()); bool equivalentYield = state.areEquivalentBufferizedValues( - bbArg, yieldOp->getOperand(opResult.getResultNumber())); + bbArg, forOp.getYieldedValues()[opResult.getResultNumber()]); return equivalentYield ? BufferRelation::Equivalent : BufferRelation::Unknown; } diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopCanonicalization.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopCanonicalization.cpp index 0cd19fbefa8ef..43e79d309c667 100644 --- a/mlir/lib/Dialect/SCF/Transforms/LoopCanonicalization.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/LoopCanonicalization.cpp @@ -36,10 +36,9 @@ using namespace mlir::scf; /// type of the corresponding basic block argument of the loop. /// Note: This function handles only simple cases. Expand as needed. static bool isShapePreserving(ForOp forOp, int64_t arg) { - auto yieldOp = cast(forOp.getBody()->getTerminator()); - assert(arg < static_cast(yieldOp.getResults().size()) && + assert(arg < static_cast(forOp.getNumResults()) && "arg is out of bounds"); - Value value = yieldOp.getResults()[arg]; + Value value = forOp.getYieldedValues()[arg]; while (value) { if (value == forOp.getRegionIterArgs()[arg]) return true; diff --git a/mlir/lib/Interfaces/LoopLikeInterface.cpp b/mlir/lib/Interfaces/LoopLikeInterface.cpp index 781a21bb3ecd3..15a816f4e4488 100644 --- a/mlir/lib/Interfaces/LoopLikeInterface.cpp +++ b/mlir/lib/Interfaces/LoopLikeInterface.cpp @@ -52,3 +52,40 @@ bool LoopLikeOpInterface::blockIsInLoop(Block *block) { } return false; } + +LogicalResult detail::verifyLoopLikeOpInterface(Operation *op) { + // Note: These invariants are also verified by the RegionBranchOpInterface, + // but the LoopLikeOpInterface provides better error messages. + auto loopLikeOp = cast(op); + + // Verify number of inits/iter_args/yielded values. + if (loopLikeOp.getInits().size() != loopLikeOp.getRegionIterArgs().size()) + return op->emitOpError("different number of inits and region iter_args: ") + << loopLikeOp.getInits().size() + << " != " << loopLikeOp.getRegionIterArgs().size(); + if (loopLikeOp.getRegionIterArgs().size() != + loopLikeOp.getYieldedValues().size()) + return op->emitOpError( + "different number of region iter_args and yielded values: ") + << loopLikeOp.getRegionIterArgs().size() + << " != " << loopLikeOp.getYieldedValues().size(); + + // Verify types of inits/iter_args/yielded values. + int64_t i = 0; + for (const auto it : + llvm::zip_equal(loopLikeOp.getInits(), loopLikeOp.getRegionIterArgs(), + loopLikeOp.getYieldedValues())) { + if (std::get<0>(it).getType() != std::get<1>(it).getType()) + op->emitOpError(std::to_string(i)) + << "-th init and " << i << "-th region iter_arg have different type: " + << std::get<0>(it).getType() << " != " << std::get<1>(it).getType(); + if (std::get<1>(it).getType() != std::get<2>(it).getType()) + op->emitOpError(std::to_string(i)) + << "-th region iter_arg and " << i + << "-th yielded value have different type: " + << std::get<1>(it).getType() << " != " << std::get<2>(it).getType(); + ++i; + } + + return success(); +} diff --git a/mlir/test/Dialect/SCF/invalid.mlir b/mlir/test/Dialect/SCF/invalid.mlir index f6044ad108292..1b2c3f563195c 100644 --- a/mlir/test/Dialect/SCF/invalid.mlir +++ b/mlir/test/Dialect/SCF/invalid.mlir @@ -96,6 +96,32 @@ func.func @not_enough_loop_results(%arg0: index, %init: f32) { // ----- +func.func @too_many_iter_args(%arg0: index, %init: f32) { + // expected-error @below{{different number of inits and region iter_args: 1 != 2}} + %x = "scf.for"(%arg0, %arg0, %arg0, %init) ( + { + ^bb0(%i0 : index, %iter: f32, %iter2: f32): + scf.yield %iter, %iter : f32, f32 + } + ) : (index, index, index, f32) -> (f32) + return +} + +// ----- + +func.func @too_few_yielded_values(%arg0: index, %init: f32) { + // expected-error @below{{different number of region iter_args and yielded values: 2 != 1}} + %x, %x2 = "scf.for"(%arg0, %arg0, %arg0, %init, %init) ( + { + ^bb0(%i0 : index, %iter: f32, %iter2: f32): + scf.yield %iter : f32 + } + ) : (index, index, index, f32, f32) -> (f32, f32) + return +} + +// ----- + func.func @loop_if_not_i1(%arg0: index) { // expected-error@+1 {{operand #0 must be 1-bit signless integer}} "scf.if"(%arg0) ({}, {}) : (index) -> () @@ -422,7 +448,8 @@ func.func @std_for_operands_mismatch_3(%arg0 : index, %arg1 : index, %arg2 : ind func.func @std_for_operands_mismatch_4(%arg0 : index, %arg1 : index, %arg2 : index) { %s0 = arith.constant 0.0 : f32 %t0 = arith.constant 1.0 : f32 - // expected-error @+1 {{along control flow edge from Region #0 to Region #0: source type #1 'i32' should match input type #1 'f32'}} + // expected-error @below {{1-th region iter_arg and 1-th yielded value have different type: 'f32' != 'i32'}} + // expected-error @below {{along control flow edge from Region #0 to Region #0: source type #1 'i32' should match input type #1 'f32'}} %result1:2 = scf.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0, %ti = %t0) -> (f32, f32) { %sn = arith.addf %si, %si : f32 @@ -432,7 +459,6 @@ func.func @std_for_operands_mismatch_4(%arg0 : index, %arg1 : index, %arg2 : ind return } - // ----- func.func @parallel_invalid_yield( diff --git a/mlir/test/lib/Dialect/SCF/TestSCFUtils.cpp b/mlir/test/lib/Dialect/SCF/TestSCFUtils.cpp index 1d40615305c02..565d07669792f 100644 --- a/mlir/test/lib/Dialect/SCF/TestSCFUtils.cpp +++ b/mlir/test/lib/Dialect/SCF/TestSCFUtils.cpp @@ -50,9 +50,8 @@ struct TestSCFForUtilsPass auto newInitValues = forOp.getInitArgs(); if (newInitValues.empty()) return; - auto yieldOp = cast(forOp.getBody()->getTerminator()); - SmallVector oldYieldValues(yieldOp.getResults().begin(), - yieldOp.getResults().end()); + SmallVector oldYieldValues = + llvm::to_vector(forOp.getYieldedValues()); NewYieldValuesFn fn = [&](OpBuilder &b, Location loc, ArrayRef newBBArgs) { SmallVector newYieldValues; From 5ae5af1d7c60ac10d91573d251c2d81083cd6ada Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Mon, 16 Oct 2023 09:02:53 +0800 Subject: [PATCH 179/720] [clang-tidy][modernize-loop-convert]check isDependentSizedArrayType (#69062) --- .../clang-tidy/modernize/LoopConvertCheck.cpp | 1 + clang-tools-extra/docs/ReleaseNotes.rst | 3 ++- .../checkers/modernize/loop-convert-basic.cpp | 14 ++++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp index f90d99a8d6606..8beaa62c78ba0 100644 --- a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp @@ -753,6 +753,7 @@ void LoopConvertCheck::doConversion( bool IsCheapToCopy = !Descriptor.ElemType.isNull() && Descriptor.ElemType.isTriviallyCopyableType(*Context) && + !Descriptor.ElemType->isDependentSizedArrayType() && // TypeInfo::Width is in bits. Context->getTypeInfo(Descriptor.ElemType).Width <= 8 * MaxCopySize; bool UseCopy = CanCopy && ((VarNameFromAlias && !AliasVarIsRef) || diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index c732d4904df13..af164d0462d52 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -271,7 +271,8 @@ Changes in existing checks - Improved :doc:`modernize-loop-convert ` to support for-loops with - iterators initialized by free functions like ``begin``, ``end``, or ``size``. + iterators initialized by free functions like ``begin``, ``end``, or ``size`` + and avoid crash for array of dependent array. - Improved :doc:`modernize-return-braced-init-list ` check to ignore diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/loop-convert-basic.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/loop-convert-basic.cpp index 71ae4c46e6a5e..e2b9336d620f5 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/loop-convert-basic.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/loop-convert-basic.cpp @@ -939,4 +939,18 @@ void fundamentalTypesTest() { // CHECK-FIXES: for (double Double : Doubles) } +template void _dependenceArrayTest() { + unsigned test[3][p]; + for (unsigned i = 0; i < p; ++i) + for (unsigned j = 0; j < 3; ++j) + printf("%d", test[j][i]); + // CHECK-MESSAGES: :[[@LINE-2]]:5: warning: use range-based for loop instead + // CHECK-FIXES: (auto & j : test) + // CHECK-FIXES: printf("%d", j[i]); +} +void dependenceArrayTest() { + _dependenceArrayTest<1>(); + _dependenceArrayTest<2>(); +} + } // namespace PseudoArray From 97c9f9a20af42a6efb3d3912a147cb7f513a9441 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Sun, 15 Oct 2023 17:01:57 -0700 Subject: [PATCH 180/720] [mlir][affine] NFC: Improve variable name in TestReifyValueBounds --- mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp b/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp index 2f1631cbdb02e..393e83beb475b 100644 --- a/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp +++ b/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp @@ -188,11 +188,11 @@ static LogicalResult testEquality(func::FuncOp funcOp) { return WalkResult::skip(); } if (op->hasAttr("compose")) { - FailureOr equal = affine::fullyComposeAndComputeConstantDelta( + FailureOr delta = affine::fullyComposeAndComputeConstantDelta( op->getOperand(0), op->getOperand(1)); - if (failed(equal)) { + if (failed(delta)) { op->emitError("could not determine equality"); - } else if (*equal == 0) { + } else if (*delta == 0) { op->emitRemark("equal"); } else { op->emitRemark("different"); From 0ae4622126a2ea66de8f40b9366d486725529a82 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 15 Oct 2023 18:16:06 -0700 Subject: [PATCH 181/720] [RISCV][GISel] Move variadic-call.ll from call-lowering directory to irtranslator. NFC Keeps it consistent with the other call tests. --- .../GlobalISel/{call-lowering => irtranslator}/variadic-call.ll | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename llvm/test/CodeGen/RISCV/GlobalISel/{call-lowering => irtranslator}/variadic-call.ll (100%) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/call-lowering/variadic-call.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/variadic-call.ll similarity index 100% rename from llvm/test/CodeGen/RISCV/GlobalISel/call-lowering/variadic-call.ll rename to llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/variadic-call.ll From 993e839480449de63aefb1a1ae9142eefed5e7a6 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 15 Oct 2023 19:12:35 -0700 Subject: [PATCH 182/720] [Driver] Don't pass -Z to ld for ELF platforms (#69120) -Z is an Apple ld64 option. ELF linkers don't recognize -Z, except OpenBSD which patched GNU ld to add -Z for zmagic (seems unused) > -Z Produce 'Standard' executables, disables Writable XOR Executable features in resulting binaries. Some `ToolChain`s have -Z due to copy-and-paste mistakes. --- clang/lib/Driver/ToolChains/BareMetal.cpp | 5 ++--- clang/lib/Driver/ToolChains/CSKYToolChain.cpp | 5 ++--- clang/lib/Driver/ToolChains/FreeBSD.cpp | 5 ++--- clang/lib/Driver/ToolChains/Haiku.cpp | 5 ++--- clang/lib/Driver/ToolChains/MinGW.cpp | 1 - clang/lib/Driver/ToolChains/NetBSD.cpp | 5 ++--- clang/lib/Driver/ToolChains/OpenBSD.cpp | 5 ++--- clang/lib/Driver/ToolChains/RISCVToolchain.cpp | 5 ++--- clang/test/Driver/openbsd.c | 4 ---- 9 files changed, 14 insertions(+), 26 deletions(-) diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index f363d277a7b71..842061c1e1488 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -452,9 +452,8 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Arch == llvm::Triple::aarch64_be ? "-EB" : "-EL"); } - Args.addAllArgs(CmdArgs, - {options::OPT_L, options::OPT_T_Group, options::OPT_s, - options::OPT_t, options::OPT_Z_Flag, options::OPT_r}); + Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, + options::OPT_s, options::OPT_t, options::OPT_r}); TC.AddFilePathLibArgs(Args, CmdArgs); diff --git a/clang/lib/Driver/ToolChains/CSKYToolChain.cpp b/clang/lib/Driver/ToolChains/CSKYToolChain.cpp index 2bd91e63fdd5a..0c280347b2af6 100644 --- a/clang/lib/Driver/ToolChains/CSKYToolChain.cpp +++ b/clang/lib/Driver/ToolChains/CSKYToolChain.cpp @@ -169,9 +169,8 @@ void CSKY::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.AddAllArgs(CmdArgs, options::OPT_L); ToolChain.AddFilePathLibArgs(Args, CmdArgs); - Args.addAllArgs(CmdArgs, - {options::OPT_T_Group, options::OPT_s, options::OPT_t, - options::OPT_Z_Flag, options::OPT_r}); + Args.addAllArgs(CmdArgs, {options::OPT_T_Group, options::OPT_s, + options::OPT_t, options::OPT_r}); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); diff --git a/clang/lib/Driver/ToolChains/FreeBSD.cpp b/clang/lib/Driver/ToolChains/FreeBSD.cpp index c936fb88d18cc..7a61159ba4a73 100644 --- a/clang/lib/Driver/ToolChains/FreeBSD.cpp +++ b/clang/lib/Driver/ToolChains/FreeBSD.cpp @@ -262,9 +262,8 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.AddAllArgs(CmdArgs, options::OPT_L); ToolChain.AddFilePathLibArgs(Args, CmdArgs); - Args.addAllArgs(CmdArgs, - {options::OPT_T_Group, options::OPT_s, options::OPT_t, - options::OPT_Z_Flag, options::OPT_r}); + Args.addAllArgs(CmdArgs, {options::OPT_T_Group, options::OPT_s, + options::OPT_t, options::OPT_r}); if (D.isUsingLTO()) { assert(!Inputs.empty() && "Must have at least one input."); diff --git a/clang/lib/Driver/ToolChains/Haiku.cpp b/clang/lib/Driver/ToolChains/Haiku.cpp index c2653a4a2022e..9f56a0ea5d612 100644 --- a/clang/lib/Driver/ToolChains/Haiku.cpp +++ b/clang/lib/Driver/ToolChains/Haiku.cpp @@ -80,9 +80,8 @@ void haiku::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("init_term_dyn.o"))); } - Args.addAllArgs(CmdArgs, - {options::OPT_L, options::OPT_T_Group, options::OPT_s, - options::OPT_t, options::OPT_Z_Flag, options::OPT_r}); + Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, + options::OPT_s, options::OPT_t, options::OPT_r}); ToolChain.AddFilePathLibArgs(Args, CmdArgs); addLinkerCompressDebugSectionsOption(ToolChain, Args, CmdArgs); diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp index d3d829a8ddbdb..39d767795445d 100644 --- a/clang/lib/Driver/ToolChains/MinGW.cpp +++ b/clang/lib/Driver/ToolChains/MinGW.cpp @@ -201,7 +201,6 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_s); Args.AddLastArg(CmdArgs, options::OPT_t); Args.AddAllArgs(CmdArgs, options::OPT_u_Group); - Args.AddLastArg(CmdArgs, options::OPT_Z_Flag); // Add asan_dynamic as the first import lib before other libs. This allows // asan to be initialized as early as possible to increase its instrumentation diff --git a/clang/lib/Driver/ToolChains/NetBSD.cpp b/clang/lib/Driver/ToolChains/NetBSD.cpp index 316e4d56c242a..1c901f70f72ca 100644 --- a/clang/lib/Driver/ToolChains/NetBSD.cpp +++ b/clang/lib/Driver/ToolChains/NetBSD.cpp @@ -266,9 +266,8 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, } } - Args.addAllArgs(CmdArgs, - {options::OPT_L, options::OPT_T_Group, options::OPT_s, - options::OPT_t, options::OPT_Z_Flag, options::OPT_r}); + Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, + options::OPT_s, options::OPT_t, options::OPT_r}); bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs); bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs); diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp index 5a9a8584cccb2..2508ef57f827c 100644 --- a/clang/lib/Driver/ToolChains/OpenBSD.cpp +++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp @@ -195,9 +195,8 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.AddAllArgs(CmdArgs, options::OPT_L); ToolChain.AddFilePathLibArgs(Args, CmdArgs); - Args.addAllArgs(CmdArgs, - {options::OPT_T_Group, options::OPT_s, options::OPT_t, - options::OPT_Z_Flag, options::OPT_r}); + Args.addAllArgs(CmdArgs, {options::OPT_T_Group, options::OPT_s, + options::OPT_t, options::OPT_r}); bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs); bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs); diff --git a/clang/lib/Driver/ToolChains/RISCVToolchain.cpp b/clang/lib/Driver/ToolChains/RISCVToolchain.cpp index c98f43f6e05eb..7e6abd1444287 100644 --- a/clang/lib/Driver/ToolChains/RISCVToolchain.cpp +++ b/clang/lib/Driver/ToolChains/RISCVToolchain.cpp @@ -193,9 +193,8 @@ void RISCV::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_u}); ToolChain.AddFilePathLibArgs(Args, CmdArgs); - Args.addAllArgs(CmdArgs, - {options::OPT_T_Group, options::OPT_s, options::OPT_t, - options::OPT_Z_Flag, options::OPT_r}); + Args.addAllArgs(CmdArgs, {options::OPT_T_Group, options::OPT_s, + options::OPT_t, options::OPT_r}); // TODO: add C++ includes and libs if compiling C++. diff --git a/clang/test/Driver/openbsd.c b/clang/test/Driver/openbsd.c index 05d290a309c40..c84b54f24fdc2 100644 --- a/clang/test/Driver/openbsd.c +++ b/clang/test/Driver/openbsd.c @@ -30,8 +30,6 @@ // RUN: | FileCheck --check-prefix=CHECK-LD-S %s // RUN: %clang --target=i686-pc-openbsd -t -### %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-LD-T %s -// RUN: %clang --target=i686-pc-openbsd -Z -### %s 2>&1 \ -// RUN: | FileCheck --check-prefix=CHECK-LD-Z %s // RUN: %clang --target=mips64-unknown-openbsd -### %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-MIPS64-LD %s // RUN: %clang --target=mips64el-unknown-openbsd -### %s 2>&1 \ @@ -44,8 +42,6 @@ // CHECK-LD-S: ld{{.*}}" "-e" "__start" "--eh-frame-hdr" "-Bdynamic" "-dynamic-linker" "{{.*}}ld.so" "-o" "a.out" "{{.*}}crt0.o" "{{.*}}crtbegin.o" "-L{{.*}}" "-s" "{{.*}}.o" "-lcompiler_rt" "-lc" "-lcompiler_rt" "{{.*}}crtend.o" // CHECK-LD-T: "-cc1" "-triple" "i686-pc-openbsd" // CHECK-LD-T: ld{{.*}}" "-e" "__start" "--eh-frame-hdr" "-Bdynamic" "-dynamic-linker" "{{.*}}ld.so" "-o" "a.out" "{{.*}}crt0.o" "{{.*}}crtbegin.o" "-L{{.*}}" "-t" "{{.*}}.o" "-lcompiler_rt" "-lc" "-lcompiler_rt" "{{.*}}crtend.o" -// CHECK-LD-Z: "-cc1" "-triple" "i686-pc-openbsd" -// CHECK-LD-Z: ld{{.*}}" "-e" "__start" "--eh-frame-hdr" "-Bdynamic" "-dynamic-linker" "{{.*}}ld.so" "-o" "a.out" "{{.*}}crt0.o" "{{.*}}crtbegin.o" "-L{{.*}}" "-Z" "{{.*}}.o" "-lcompiler_rt" "-lc" "-lcompiler_rt" "{{.*}}crtend.o" // CHECK-MIPS64-LD: "-cc1" "-triple" "mips64-unknown-openbsd" // CHECK-MIPS64-LD: ld{{.*}}" "-EB" "-e" "__start" "--eh-frame-hdr" "-Bdynamic" "-dynamic-linker" "{{.*}}ld.so" "-o" "a.out" "{{.*}}crt0.o" "{{.*}}crtbegin.o" "-L{{.*}}" "{{.*}}.o" "-lcompiler_rt" "-lc" "-lcompiler_rt" "{{.*}}crtend.o" // CHECK-MIPS64EL-LD: "-cc1" "-triple" "mips64el-unknown-openbsd" From 819ac45d1c1b7a2d784b2606c84de46ce714f278 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Sun, 15 Oct 2023 19:12:53 -0700 Subject: [PATCH 183/720] [X86] Add USER_MSR instructions. (#68944) For more details about this instruction, please refer to the latest ISE document: https://www.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html --- clang/docs/ReleaseNotes.rst | 3 + clang/include/clang/Basic/BuiltinsX86_64.def | 3 + clang/include/clang/Driver/Options.td | 2 + clang/lib/Basic/Targets/X86.cpp | 6 ++ clang/lib/Basic/Targets/X86.h | 1 + clang/lib/Headers/CMakeLists.txt | 1 + clang/lib/Headers/usermsrintrin.h | 30 +++++++++ clang/lib/Headers/x86gprintrin.h | 5 ++ .../CodeGen/X86/usermsr-builtins-error-32.c | 14 ++++ clang/test/CodeGen/X86/usermsr-builtins.c | 29 +++++++++ clang/test/Driver/x86-target-features.c | 5 ++ clang/test/Preprocessor/x86_target_features.c | 6 ++ llvm/docs/ReleaseNotes.rst | 1 + llvm/include/llvm/IR/IntrinsicsX86.td | 10 ++- .../Support/X86DisassemblerDecoderCommon.h | 5 +- .../llvm/TargetParser/X86TargetParser.def | 1 + .../X86/Disassembler/X86Disassembler.cpp | 9 +++ .../X86/Disassembler/X86DisassemblerDecoder.h | 3 +- .../lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 3 +- .../X86/MCTargetDesc/X86MCCodeEmitter.cpp | 4 ++ llvm/lib/Target/X86/X86.td | 2 + llvm/lib/Target/X86/X86InstrFormats.td | 4 ++ llvm/lib/Target/X86/X86InstrInfo.td | 1 + llvm/lib/Target/X86/X86InstrSystem.td | 16 +++++ llvm/lib/TargetParser/Host.cpp | 1 + llvm/lib/TargetParser/X86TargetParser.cpp | 1 + llvm/test/CodeGen/X86/usermsr-intrinsics.ll | 64 +++++++++++++++++++ llvm/test/MC/Disassembler/X86/usermsr-64.txt | 28 ++++++++ llvm/test/MC/X86/usermsr-64-att.s | 18 ++++++ llvm/test/MC/X86/usermsr-64-intel.s | 18 ++++++ llvm/utils/TableGen/X86DisassemblerTables.cpp | 1 + llvm/utils/TableGen/X86DisassemblerTables.h | 3 +- llvm/utils/TableGen/X86RecognizableInstr.cpp | 1 + llvm/utils/TableGen/X86RecognizableInstr.h | 2 +- 34 files changed, 295 insertions(+), 6 deletions(-) create mode 100644 clang/lib/Headers/usermsrintrin.h create mode 100644 clang/test/CodeGen/X86/usermsr-builtins-error-32.c create mode 100644 clang/test/CodeGen/X86/usermsr-builtins.c create mode 100644 llvm/test/CodeGen/X86/usermsr-intrinsics.ll create mode 100644 llvm/test/MC/Disassembler/X86/usermsr-64.txt create mode 100644 llvm/test/MC/X86/usermsr-64-att.s create mode 100644 llvm/test/MC/X86/usermsr-64-intel.s diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6d315e9f84ddf..52d5b9a3f66d1 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -551,6 +551,9 @@ X86 Support - Added option ``-m[no-]evex512`` to disable ZMM and 64-bit mask instructions for AVX512 features. +- Support ISA of ``USER_MSR``. + * Support intrinsic of ``_urdmsr``. + * Support intrinsic of ``_uwrmsr``. Arm and AArch64 Support ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def index e5c1fe8b31921..5e00916d4b25a 100644 --- a/clang/include/clang/Basic/BuiltinsX86_64.def +++ b/clang/include/clang/Basic/BuiltinsX86_64.def @@ -104,6 +104,9 @@ TARGET_BUILTIN(__builtin_ia32_clui, "v", "n", "uintr") TARGET_BUILTIN(__builtin_ia32_stui, "v", "n", "uintr") TARGET_BUILTIN(__builtin_ia32_testui, "Uc", "n", "uintr") TARGET_BUILTIN(__builtin_ia32_senduipi, "vUWi", "n", "uintr") +// USERMSR +TARGET_BUILTIN(__builtin_ia32_urdmsr, "ULLiULLi", "n", "usermsr") +TARGET_BUILTIN(__builtin_ia32_uwrmsr, "vULLiULLi", "n", "usermsr") // AMX internal builtin TARGET_BUILTIN(__builtin_ia32_tile_loadconfig_internal, "vvC*", "n", "amx-tile") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 54afd652ad3d0..640044622fc09 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5904,6 +5904,8 @@ def mtsxldtrk : Flag<["-"], "mtsxldtrk">, Group; def mno_tsxldtrk : Flag<["-"], "mno-tsxldtrk">, Group; def muintr : Flag<["-"], "muintr">, Group; def mno_uintr : Flag<["-"], "mno-uintr">, Group; +def musermsr : Flag<["-"], "musermsr">, Group; +def mno_usermsr : Flag<["-"], "mno-usermsr">, Group; def mvaes : Flag<["-"], "mvaes">, Group; def mno_vaes : Flag<["-"], "mno-vaes">, Group; def mvpclmulqdq : Flag<["-"], "mvpclmulqdq">, Group; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 022d5753135e1..bea5c52a7b8d7 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -376,6 +376,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, HasTSXLDTRK = true; } else if (Feature == "+uintr") { HasUINTR = true; + } else if (Feature == "+usermsr") { + HasUSERMSR = true; } else if (Feature == "+crc32") { HasCRC32 = true; } else if (Feature == "+x87") { @@ -869,6 +871,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__TSXLDTRK__"); if (HasUINTR) Builder.defineMacro("__UINTR__"); + if (HasUSERMSR) + Builder.defineMacro("__USERMSR__"); if (HasCRC32) Builder.defineMacro("__CRC32__"); @@ -1053,6 +1057,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("tbm", true) .Case("tsxldtrk", true) .Case("uintr", true) + .Case("usermsr", true) .Case("vaes", true) .Case("vpclmulqdq", true) .Case("wbnoinvd", true) @@ -1162,6 +1167,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("tbm", HasTBM) .Case("tsxldtrk", HasTSXLDTRK) .Case("uintr", HasUINTR) + .Case("usermsr", HasUSERMSR) .Case("vaes", HasVAES) .Case("vpclmulqdq", HasVPCLMULQDQ) .Case("wbnoinvd", HasWBNOINVD) diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 4fdc94de1e0cb..298db55c67442 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -162,6 +162,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasAMXCOMPLEX = false; bool HasSERIALIZE = false; bool HasTSXLDTRK = false; + bool HasUSERMSR = false; bool HasUINTR = false; bool HasCRC32 = false; bool HasX87 = false; diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 8deea823e3966..3b6fec3da2b16 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -224,6 +224,7 @@ set(x86_files tmmintrin.h tsxldtrkintrin.h uintrintrin.h + usermsrintrin.h vaesintrin.h vpclmulqdqintrin.h waitpkgintrin.h diff --git a/clang/lib/Headers/usermsrintrin.h b/clang/lib/Headers/usermsrintrin.h new file mode 100644 index 0000000000000..6d1424ad3b2ed --- /dev/null +++ b/clang/lib/Headers/usermsrintrin.h @@ -0,0 +1,30 @@ +/*===--------------- usermsrintrin.h - USERMSR intrinsics -----------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __X86GPRINTRIN_H +#error "Never use directly; include instead." +#endif // __X86GPRINTRIN_H + +#ifndef __USERMSRINTRIN_H +#define __USERMSRINTRIN_H +#ifdef __x86_64__ + +static __inline__ unsigned long long + __attribute__((__always_inline__, __nodebug__, __target__("usermsr"))) + _urdmsr(unsigned long long __A) { + return __builtin_ia32_urdmsr(__A); +} + +static __inline__ void + __attribute__((__always_inline__, __nodebug__, __target__("usermsr"))) + _uwrmsr(unsigned long long __A, unsigned long long __B) { + return __builtin_ia32_uwrmsr(__A, __B); +} + +#endif // __x86_64__ +#endif // __USERMSRINTRIN_H diff --git a/clang/lib/Headers/x86gprintrin.h b/clang/lib/Headers/x86gprintrin.h index f9a765be43221..ed141879fbc74 100644 --- a/clang/lib/Headers/x86gprintrin.h +++ b/clang/lib/Headers/x86gprintrin.h @@ -20,6 +20,11 @@ #include #endif +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__USERMSR__) +#include +#endif + #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__CRC32__) #include diff --git a/clang/test/CodeGen/X86/usermsr-builtins-error-32.c b/clang/test/CodeGen/X86/usermsr-builtins-error-32.c new file mode 100644 index 0000000000000..180b99a4212a1 --- /dev/null +++ b/clang/test/CodeGen/X86/usermsr-builtins-error-32.c @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown -target-feature +usermsr \ +// RUN: -emit-llvm -fsyntax-only -verify + +#include + +unsigned long long test_urdmsr(unsigned long long __A) { + return _urdmsr(__A); // expected-error {{call to undeclared function '_urdmsr'}} +} + +void test_uwrmsr(unsigned long long __A, unsigned long long __B) { + // CHECK-LABEL: @test_uwrmsr( + // CHECK: call void @llvm.x86.uwrmsr( + _uwrmsr(__A, __B); // expected-error {{call to undeclared function '_uwrmsr'}} +} diff --git a/clang/test/CodeGen/X86/usermsr-builtins.c b/clang/test/CodeGen/X86/usermsr-builtins.c new file mode 100644 index 0000000000000..0d58bc98c204c --- /dev/null +++ b/clang/test/CodeGen/X86/usermsr-builtins.c @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +usermsr \ +// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s + +#include + +unsigned long long test_urdmsr(unsigned long long __A) { + // CHECK-LABEL: @test_urdmsr( + // CHECK: call i64 @llvm.x86.urdmsr( + return _urdmsr(__A); +} + +unsigned long long test_urdmsr_const(unsigned long long __A) { + // CHECK-LABEL: @test_urdmsr_const( + // CHECK: call i64 @llvm.x86.urdmsr( + return _urdmsr(123u); +} + +void test_uwrmsr(unsigned long long __A, unsigned long long __B) { + // CHECK-LABEL: @test_uwrmsr( + // CHECK: call void @llvm.x86.uwrmsr( + _uwrmsr(__A, __B); +} + +void test_uwrmsr_const(unsigned long long __A, unsigned long long __B) { + // CHECK-LABEL: @test_uwrmsr_const( + // CHECK: call void @llvm.x86.uwrmsr( + _uwrmsr(123u, __B); +} + diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c index a6ecedbb8a58e..464dcda504bbd 100644 --- a/clang/test/Driver/x86-target-features.c +++ b/clang/test/Driver/x86-target-features.c @@ -374,6 +374,11 @@ // EVEX512: "-target-feature" "+evex512" // NO-EVEX512: "-target-feature" "-evex512" +// RUN: %clang --target=i386 -musermsr %s -### -o %t.o 2>&1 | FileCheck -check-prefix=USERMSR %s +// RUN: %clang --target=i386 -mno-usermsr %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-USERMSR %s +// USERMSR: "-target-feature" "+usermsr" +// NO-USERMSR: "-target-feature" "-usermsr" + // RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s // RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s // CRC32: "-target-feature" "+crc32" diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c index 36d4af59d4c66..873416d79b125 100644 --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -750,6 +750,12 @@ // AVXVNNIINT16NOAVX2-NOT: #define __AVX2__ 1 // AVXVNNIINT16NOAVX2-NOT: #define __AVXVNNIINT16__ 1 +// RUN: %clang -target i686-unknown-linux-gnu -march=atom -musermsr -x c -E -dM -o - %s | FileCheck -check-prefix=USERMSR %s +// USERMSR: #define __USERMSR__ 1 + +// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mno-usermsr -x c -E -dM -o - %s | FileCheck -check-prefix=NO-USERMSR %s +// NO-USERMSR-NOT: #define __USERMSR__ 1 + // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s // CRC32: #define __CRC32__ 1 diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 467b4b5320ad9..94b43800c17bd 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -133,6 +133,7 @@ Changes to the X86 Backend benefits external projects such as Rust which aim to be binary compatible with C, but also fixes code generation where LLVM already assumed that the type matched and called into libgcc helper functions. +* Support ISA of ``USER_MSR``. Changes to the OCaml bindings ----------------------------- diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 57cd1dc47bd9f..fdc2b0fb7f80f 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -5673,8 +5673,16 @@ let TargetPrefix = "x86" in { Intrinsic<[], [llvm_i64_ty], []>; } +let TargetPrefix = "x86" in { +def int_x86_urdmsr : ClangBuiltin<"__builtin_ia32_urdmsr">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty], + [IntrInaccessibleMemOnly]>; +def int_x86_uwrmsr : ClangBuiltin<"__builtin_ia32_uwrmsr">, + Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], + [IntrInaccessibleMemOnly]>; +} + //===----------------------------------------------------------------------===// -// avx512_fp16: vaddph let TargetPrefix = "x86" in { def int_x86_avx512fp16_add_ph_512 : ClangBuiltin<"__builtin_ia32_addph512">, diff --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h index 169b8e97986e1..6e08fc6a0ccb6 100644 --- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h +++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h @@ -33,6 +33,7 @@ namespace X86Disassembler { #define THREEDNOW_MAP_SYM x86Disassembler3DNowOpcodes #define MAP5_SYM x86DisassemblerMap5Opcodes #define MAP6_SYM x86DisassemblerMap6Opcodes +#define MAP7_SYM x86DisassemblerMap7Opcodes #define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers" #define CONTEXTS_STR "x86DisassemblerContexts" @@ -46,6 +47,7 @@ namespace X86Disassembler { #define THREEDNOW_MAP_STR "x86Disassembler3DNowOpcodes" #define MAP5_STR "x86DisassemblerMap5Opcodes" #define MAP6_STR "x86DisassemblerMap6Opcodes" +#define MAP7_STR "x86DisassemblerMap7Opcodes" // Attributes of an instruction that must be known before the opcode can be // processed correctly. Most of these indicate the presence of particular @@ -296,7 +298,8 @@ enum OpcodeType { XOPA_MAP = 6, THREEDNOW_MAP = 7, MAP5 = 8, - MAP6 = 9 + MAP6 = 9, + MAP7 = 10 }; // The following structs are used for the hierarchical decode table. After diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def index 85ff6996d335a..709ff8603b042 100644 --- a/llvm/include/llvm/TargetParser/X86TargetParser.def +++ b/llvm/include/llvm/TargetParser/X86TargetParser.def @@ -241,6 +241,7 @@ X86_FEATURE (SM3, "sm3") X86_FEATURE (SM4, "sm4") X86_FEATURE (AVXVNNIINT16, "avxvnniint16") X86_FEATURE (EVEX512, "evex512") +X86_FEATURE (USERMSR, "usermsr") // These features aren't really CPU features, but the frontend can set them. X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk") X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches") diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 967c7574355db..2ec7a57093f4b 100644 --- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -156,6 +156,9 @@ static InstrUID decode(OpcodeType type, InstructionContext insnContext, case MAP6: dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; break; + case MAP7: + dec = &MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; + break; } switch (dec->modrm_type) { @@ -918,6 +921,9 @@ static bool readOpcode(struct InternalInstruction *insn) { case VEX_LOB_MAP6: insn->opcodeType = MAP6; return consume(insn, insn->opcode); + case VEX_LOB_MAP7: + insn->opcodeType = MAP7; + return consume(insn, insn->opcode); } } else if (insn->vectorExtensionType == TYPE_VEX_2B) { insn->opcodeType = TWOBYTE; @@ -1059,6 +1065,9 @@ static int getInstructionIDWithAttrMask(uint16_t *instructionID, case MAP6: decision = &MAP6_SYM; break; + case MAP7: + decision = &MAP7_SYM; + break; } if (decision->opcodeDecisions[insnCtx] diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 95d3c8ede366f..2d728143d3c9a 100644 --- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -484,7 +484,8 @@ enum VEXLeadingOpcodeByte { VEX_LOB_0F38 = 0x2, VEX_LOB_0F3A = 0x3, VEX_LOB_MAP5 = 0x5, - VEX_LOB_MAP6 = 0x6 + VEX_LOB_MAP6 = 0x6, + VEX_LOB_MAP7 = 0x7 }; enum XOPMapSelect { diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index e2293fe30561f..1e5a3606f33a6 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -829,9 +829,10 @@ namespace X86II { /// this flag to indicate that the encoder should do the wacky 3DNow! thing. ThreeDNow = 7 << OpMapShift, - // MAP5, MAP6 - Prefix after the 0x0F prefix. + // MAP5, MAP6, MAP7 - Prefix after the 0x0F prefix. T_MAP5 = 8 << OpMapShift, T_MAP6 = 9 << OpMapShift, + T_MAP7 = 10 << OpMapShift, //===------------------------------------------------------------------===// // REX_W - REX prefixes are instruction prefixes used in 64-bit mode. diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 59a04f3167d86..b85404be3063d 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -89,6 +89,7 @@ class X86OpcodePrefixHelper { // 0b00100: Reserved for future use // 0b00101: VEX MAP5 // 0b00110: VEX MAP6 + // 0b00111: VEX MAP7 // 0b00111-0b11111: Reserved for future use // 0b01000: XOP map select - 08h instructions with imm byte // 0b01001: XOP map select - 09h instructions with no imm byte @@ -917,6 +918,9 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, case X86II::T_MAP6: Prefix.set5M(0x6); break; + case X86II::T_MAP7: + Prefix.set5M(0x7); + break; } Prefix.setL(TSFlags & X86II::VEX_L); diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 64f91ae90e2b0..f3f8d5718dfc2 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -325,6 +325,8 @@ def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true", "Support TSXLDTRK instructions">; def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true", "Has UINTR Instructions">; +def FeatureUSERMSR : SubtargetFeature<"usermsr", "HasUSERMSR", "true", + "Support USERMSR instructions">; def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true", "platform configuration instruction">; def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td index f45869e15267c..70ffd4175e1f1 100644 --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -163,6 +163,7 @@ def XOPA : Map<6>; def ThreeDNow : Map<7>; def T_MAP5 : Map<8>; def T_MAP6 : Map<9>; +def T_MAP7 : Map<10>; // Class specifying the encoding class Encoding val> { @@ -217,6 +218,9 @@ class T_MAP6PS : T_MAP6 { Prefix OpPrefix = PS; } class T_MAP6PD : T_MAP6 { Prefix OpPrefix = PD; } class T_MAP6XS : T_MAP6 { Prefix OpPrefix = XS; } class T_MAP6XD : T_MAP6 { Prefix OpPrefix = XD; } +class T_MAP7 { Map OpMap = T_MAP7; } +class T_MAP7XS : T_MAP7 { Prefix OpPrefix = XS; } // 0xF3 +class T_MAP7XD : T_MAP7 { Prefix OpPrefix = XD; } // 0xF2 class OBXS { Prefix OpPrefix = XS; } class PS : TB { Prefix OpPrefix = PS; } class PD : TB { Prefix OpPrefix = PD; } diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index a20fa6a0c3b6c..cb740bc99f788 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -1017,6 +1017,7 @@ def HasAMXBF16 : Predicate<"Subtarget->hasAMXBF16()">; def HasAMXINT8 : Predicate<"Subtarget->hasAMXINT8()">; def HasAMXCOMPLEX : Predicate<"Subtarget->hasAMXCOMPLEX()">; def HasUINTR : Predicate<"Subtarget->hasUINTR()">; +def HasUSERMSR : Predicate<"Subtarget->hasUSERMSR()">; def HasCRC32 : Predicate<"Subtarget->hasCRC32()">; def HasX86_64 : Predicate<"Subtarget->hasX86_64()">; diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td index 0272f7de0f9e4..b55956169ff2c 100644 --- a/llvm/lib/Target/X86/X86InstrSystem.td +++ b/llvm/lib/Target/X86/X86InstrSystem.td @@ -436,6 +436,22 @@ def WRMSRLIST : I<0x01, MRM_C6, (outs), (ins), "wrmsrlist", []>, XS; def RDMSRLIST : I<0x01, MRM_C6, (outs), (ins), "rdmsrlist", []>, XD; } +let Predicates = [HasUSERMSR], mayLoad = 1 in { + def URDMSRrr : I<0xf8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "urdmsr\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (int_x86_urdmsr GR64:$src))]>, T8XD; + def URDMSRri : Ii32<0xf8, MRM0r, (outs GR64:$dst), (ins i64i32imm:$imm), + "urdmsr\t{$imm, $dst|$dst, $imm}", + [(set GR64:$dst, (int_x86_urdmsr i64immSExt32_su:$imm))]>, T_MAP7XD, VEX; +} +let Predicates = [HasUSERMSR], mayStore = 1 in { + def UWRMSRrr : I<0xf8, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), + "uwrmsr\t{$src1, $src2|$src2, $src1}", + [(int_x86_uwrmsr GR64:$src1, GR64:$src2)]>, T8XS; + def UWRMSRir : Ii32<0xf8, MRM0r, (outs), (ins GR64:$src, i64i32imm:$imm), + "uwrmsr\t{$src, $imm|$imm, $src}", + [(int_x86_uwrmsr GR64:$src, i64immSExt32_su:$imm)]>, T_MAP7XS, VEX; +} let Defs = [RAX, RDX], Uses = [ECX] in def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB; diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 436a5eb04c8d3..b320911d3ce27 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -1796,6 +1796,7 @@ bool sys::getHostCPUFeatures(StringMap &Features) { Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave; Features["avxvnniint16"] = HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave; Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1); + Features["usermsr"] = HasLeaf7Subleaf1 && ((EDX >> 15) & 1); bool HasLeafD = MaxLevel >= 0xd && !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index b9908dd2629ff..94849f915daa1 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -509,6 +509,7 @@ constexpr FeatureBitset ImpliedFeaturesSHSTK = {}; constexpr FeatureBitset ImpliedFeaturesTBM = {}; constexpr FeatureBitset ImpliedFeaturesTSXLDTRK = {}; constexpr FeatureBitset ImpliedFeaturesUINTR = {}; +constexpr FeatureBitset ImpliedFeaturesUSERMSR = {}; constexpr FeatureBitset ImpliedFeaturesWAITPKG = {}; constexpr FeatureBitset ImpliedFeaturesWBNOINVD = {}; constexpr FeatureBitset ImpliedFeaturesVZEROUPPER = {}; diff --git a/llvm/test/CodeGen/X86/usermsr-intrinsics.ll b/llvm/test/CodeGen/X86/usermsr-intrinsics.ll new file mode 100644 index 0000000000000..29801a494f498 --- /dev/null +++ b/llvm/test/CodeGen/X86/usermsr-intrinsics.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+usermsr | FileCheck %s --check-prefixes=X64 + +define i64 @test_int_x86_urdmsr(i64 %A) nounwind { +; X64-LABEL: test_int_x86_urdmsr: +; X64: # %bb.0: +; X64-NEXT: urdmsr %rdi, %rax # encoding: [0xf2,0x0f,0x38,0xf8,0xc7] +; X64-NEXT: retq # encoding: [0xc3] + %ret = call i64 @llvm.x86.urdmsr(i64 %A) + ret i64 %ret +} + +define i64 @test_int_x86_urdmsr_const() nounwind { +; X64-LABEL: test_int_x86_urdmsr_const: +; X64: # %bb.0: +; X64-NEXT: urdmsr $123, %rax # encoding: [0xc4,0xe7,0x7b,0xf8,0xc0,0x7b,0x00,0x00,0x00] +; X64-NEXT: retq # encoding: [0xc3] + %ret = call i64 @llvm.x86.urdmsr(i64 123) + ret i64 %ret +} + +define i64 @test_int_x86_urdmsr_const_i64() nounwind { +; X64-LABEL: test_int_x86_urdmsr_const_i64: +; X64: # %bb.0: +; X64-NEXT: movabsq $8589934591, %rax # encoding: [0x48,0xb8,0xff,0xff,0xff,0xff,0x01,0x00,0x00,0x00] +; X64-NEXT: # imm = 0x1FFFFFFFF +; X64-NEXT: urdmsr %rax, %rax # encoding: [0xf2,0x0f,0x38,0xf8,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %ret = call i64 @llvm.x86.urdmsr(i64 8589934591) + ret i64 %ret +} + +declare i64 @llvm.x86.urdmsr(i64 %A) + +define void @test_int_x86_uwrmsr(i64 %A, i64 %B) nounwind { +; X64-LABEL: test_int_x86_uwrmsr: +; X64: # %bb.0: +; X64-NEXT: uwrmsr %rdi, %rsi # encoding: [0xf3,0x0f,0x38,0xf8,0xfe] +; X64-NEXT: retq # encoding: [0xc3] + call void @llvm.x86.uwrmsr(i64 %A, i64 %B) + ret void +} + +define void @test_int_x86_uwrmsr_const(i64 %A) nounwind { +; X64-LABEL: test_int_x86_uwrmsr_const: +; X64: # %bb.0: +; X64-NEXT: uwrmsr %rdi, $123 # encoding: [0xc4,0xe7,0x7a,0xf8,0xc7,0x7b,0x00,0x00,0x00] +; X64-NEXT: retq # encoding: [0xc3] + call void @llvm.x86.uwrmsr(i64 %A, i64 123) + ret void +} + +define void @test_int_x86_uwrmsr_const_i64(i64 %A) nounwind { +; X64-LABEL: test_int_x86_uwrmsr_const_i64: +; X64: # %bb.0: +; X64-NEXT: movabsq $8589934591, %rax # encoding: [0x48,0xb8,0xff,0xff,0xff,0xff,0x01,0x00,0x00,0x00] +; X64-NEXT: # imm = 0x1FFFFFFFF +; X64-NEXT: uwrmsr %rdi, %rax # encoding: [0xf3,0x0f,0x38,0xf8,0xf8] +; X64-NEXT: retq # encoding: [0xc3] + call void @llvm.x86.uwrmsr(i64 %A, i64 8589934591) + ret void +} + +declare void @llvm.x86.uwrmsr(i64 %A, i64 %B) diff --git a/llvm/test/MC/Disassembler/X86/usermsr-64.txt b/llvm/test/MC/Disassembler/X86/usermsr-64.txt new file mode 100644 index 0000000000000..592a1a204f5c6 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/usermsr-64.txt @@ -0,0 +1,28 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: urdmsr $123, %r9 +# INTEL: urdmsr r9, 123 +0xc4,0xc7,0x7b,0xf8,0xc1,0x7b,0x00,0x00,0x00 + +# ATT: urdmsr %r9, %r9 +# INTEL: urdmsr r9, r9 +0xf2,0x45,0x0f,0x38,0xf8,0xc9 + +# Test if WIG is supported for this instruction/form. +# ATT: urdmsr %r9, %r9 +# INTEL: urdmsr r9, r9 +0xf2,0x4d,0x0f,0x38,0xf8,0xc9 + +# ATT: uwrmsr %r9, $123 +# INTEL: uwrmsr 123, r9 +0xc4,0xc7,0x7a,0xf8,0xc1,0x7b,0x00,0x00,0x00 + +# ATT: uwrmsr %r9, %r9 +# INTEL: uwrmsr r9, r9 +0xf3,0x45,0x0f,0x38,0xf8,0xc9 + +# Test if WIG is supported for this instruction/form. +# ATT: uwrmsr %r9, %r9 +# INTEL: uwrmsr r9, r9 +0xf3,0x4d,0x0f,0x38,0xf8,0xc9 diff --git a/llvm/test/MC/X86/usermsr-64-att.s b/llvm/test/MC/X86/usermsr-64-att.s new file mode 100644 index 0000000000000..e89d0a800ab0b --- /dev/null +++ b/llvm/test/MC/X86/usermsr-64-att.s @@ -0,0 +1,18 @@ +// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s + +// CHECK: urdmsr $123, %r9 +// CHECK: encoding: [0xc4,0xc7,0x7b,0xf8,0xc1,0x7b,0x00,0x00,0x00] + urdmsr $123, %r9 + +// CHECK: urdmsr %r9, %r9 +// CHECK: encoding: [0xf2,0x45,0x0f,0x38,0xf8,0xc9] + urdmsr %r9, %r9 + +// CHECK: uwrmsr %r9, $123 +// CHECK: encoding: [0xc4,0xc7,0x7a,0xf8,0xc1,0x7b,0x00,0x00,0x00] + uwrmsr %r9, $123 + +// CHECK: uwrmsr %r9, %r9 +// CHECK: encoding: [0xf3,0x45,0x0f,0x38,0xf8,0xc9] + uwrmsr %r9, %r9 + diff --git a/llvm/test/MC/X86/usermsr-64-intel.s b/llvm/test/MC/X86/usermsr-64-intel.s new file mode 100644 index 0000000000000..13d9161080af4 --- /dev/null +++ b/llvm/test/MC/X86/usermsr-64-intel.s @@ -0,0 +1,18 @@ +// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: urdmsr r9, 123 +// CHECK: encoding: [0xc4,0xc7,0x7b,0xf8,0xc1,0x7b,0x00,0x00,0x00] + urdmsr r9, 123 + +// CHECK: urdmsr r9, r9 +// CHECK: encoding: [0xf2,0x45,0x0f,0x38,0xf8,0xc9] + urdmsr r9, r9 + +// CHECK: uwrmsr 123, r9 +// CHECK: encoding: [0xc4,0xc7,0x7a,0xf8,0xc1,0x7b,0x00,0x00,0x00] + uwrmsr 123, r9 + +// CHECK: uwrmsr r9, r9 +// CHECK: encoding: [0xf3,0x45,0x0f,0x38,0xf8,0xc9] + uwrmsr r9, r9 + diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp index 708c92aecfc85..ba51bf4858e19 100644 --- a/llvm/utils/TableGen/X86DisassemblerTables.cpp +++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp @@ -982,6 +982,7 @@ void DisassemblerTables::emitContextDecisions(raw_ostream &o1, raw_ostream &o2, emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[7], THREEDNOW_MAP_STR); emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[8], MAP5_STR); emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[9], MAP6_STR); + emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[10], MAP7_STR); } void DisassemblerTables::emit(raw_ostream &o) const { diff --git a/llvm/utils/TableGen/X86DisassemblerTables.h b/llvm/utils/TableGen/X86DisassemblerTables.h index 966f7406efec1..4b6f6543acccf 100644 --- a/llvm/utils/TableGen/X86DisassemblerTables.h +++ b/llvm/utils/TableGen/X86DisassemblerTables.h @@ -46,7 +46,8 @@ class DisassemblerTables { /// [7] 3dnow map opcode /// [8] fixed length MAP5 opcode /// [9] fixed length MAP6 opcode - std::unique_ptr Tables[10]; + /// [10] fixed length MAP7 opcode + std::unique_ptr Tables[11]; // Table of ModRM encodings. typedef std::map, unsigned> ModRMMapTy; diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp index b2f51ba016899..962da623b1cad 100644 --- a/llvm/utils/TableGen/X86RecognizableInstr.cpp +++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp @@ -791,6 +791,7 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { case X86Local::ThreeDNow: opcodeType = THREEDNOW_MAP; break; case X86Local::T_MAP5: opcodeType = MAP5; break; case X86Local::T_MAP6: opcodeType = MAP6; break; + case X86Local::T_MAP7: opcodeType = MAP7; break; } std::unique_ptr filter; diff --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h index 5efacdb27465b..38bca87bfe614 100644 --- a/llvm/utils/TableGen/X86RecognizableInstr.h +++ b/llvm/utils/TableGen/X86RecognizableInstr.h @@ -137,7 +137,7 @@ namespace X86Local { enum { OB = 0, TB = 1, T8 = 2, TA = 3, XOP8 = 4, XOP9 = 5, XOPA = 6, ThreeDNow = 7, - T_MAP5 = 8, T_MAP6 = 9 + T_MAP5 = 8, T_MAP6 = 9, T_MAP7 = 10 }; enum { From 6121b9088ef0d9769d1939214537defbcdf57df2 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 16 Oct 2023 02:16:00 +0000 Subject: [PATCH 184/720] [gn build] Port 819ac45d1c1b --- llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn index 9fe2dda6f2a35..c227d81162838 100644 --- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -273,6 +273,7 @@ copy("Headers") { "tsxldtrkintrin.h", "uintrintrin.h", "unwind.h", + "usermsrintrin.h", "vadefs.h", "vaesintrin.h", "varargs.h", From be72dca5e3ab3301e6927aca1c0823e382519bb3 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Mon, 16 Oct 2023 10:37:54 +0800 Subject: [PATCH 185/720] [docs] [C++20] [Modules] Mentioning that -fdelayed-template-parsing is not working with modules Catched in https://github.com/llvm/llvm-project/issues/61068. Add this to the document to avoid further misunderstandings. --- clang/docs/StandardCPlusPlusModules.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/clang/docs/StandardCPlusPlusModules.rst b/clang/docs/StandardCPlusPlusModules.rst index 579431bd9aa32..8dd86edc64a80 100644 --- a/clang/docs/StandardCPlusPlusModules.rst +++ b/clang/docs/StandardCPlusPlusModules.rst @@ -686,6 +686,15 @@ the BMI within ``clang-cl.exe``. This is tracked in: https://github.com/llvm/llvm-project/issues/64118 +delayed template parsing is not supported/broken with C++ modules +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The feature `-fdelayed-template-parsing` can't work well with C++ modules now. +Note that this is significant on Windows since the option will be enabled by default +on Windows. + +This is tracked in: https://github.com/llvm/llvm-project/issues/61068 + Header Units ============ From 7fb2b4d7f55afe69aa8ea5d14d7cbdeeceac3b5e Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 15 Oct 2023 20:40:47 -0700 Subject: [PATCH 186/720] [CodeGen] Remove unused declaration createJumpInstrTablesPass The corresponding function definition was removed by: commit 3b94e33277beebd8ec3e654702d4fa912803115d Author: Eric Christopher Date: Fri Feb 27 19:03:38 2015 +0000 --- llvm/include/llvm/CodeGen/Passes.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index befa8a6eb9a27..598c0b838c1b9 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -446,9 +446,6 @@ namespace llvm { /// LiveDebugValues pass extern char &LiveDebugValuesID; - /// createJumpInstrTables - This pass creates jump-instruction tables. - ModulePass *createJumpInstrTablesPass(); - /// InterleavedAccess Pass - This pass identifies and matches interleaved /// memory accesses to target specific intrinsics. /// From 019d67f19721f54ad6be81bcc29285713ae23249 Mon Sep 17 00:00:00 2001 From: wangpc Date: Thu, 12 Oct 2023 17:02:13 +0800 Subject: [PATCH 187/720] [RISCV][NFC] Remove space --- llvm/lib/Target/RISCV/RISCVInstrInfo.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 54efe67f600a9..94de559b1e6e0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -169,7 +169,7 @@ class RISCVSImmOp : RISCVOp { let OperandType = "OPERAND_SIMM" # bitsNum; } -class RISCVSImmLeafOp : +class RISCVSImmLeafOp : RISCVSImmOp, ImmLeaf(Imm);">; def FenceArg : AsmOperandClass { From fd673e8c4e4fc5a892309fe201c8d238dd72c941 Mon Sep 17 00:00:00 2001 From: Stephen Chou Date: Sun, 15 Oct 2023 21:20:43 -0700 Subject: [PATCH 188/720] [MLIR][SCF] Removes incorrect assertion in loop unroller (#69028) In particular, `upperBoundUnrolledCst` may be larger than `ubCst` when: 1. the step size is greater than 1; 2. `ub - lb` is not evenly divisible by the step size; and 3. the loop's trip count is evenly divisible by the unroll factor. This is okay since the non-unit step size ensures that the unrolled loop maintains the same trip count as the original loop. Added a test case for this. Fixes #61832. Co-authored-by: Stephen Chou --- mlir/lib/Dialect/SCF/Utils/Utils.cpp | 1 - mlir/test/Dialect/SCF/loop-unroll.mlir | 30 ++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp index 5360c493f8f8d..e85825595e3c1 100644 --- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp +++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp @@ -391,7 +391,6 @@ LogicalResult mlir::loopUnrollByFactor( int64_t tripCountEvenMultiple = tripCount - (tripCount % unrollFactor); int64_t upperBoundUnrolledCst = lbCst + tripCountEvenMultiple * stepCst; - assert(upperBoundUnrolledCst <= ubCst); int64_t stepUnrolledCst = stepCst * unrollFactor; // Create constant for 'upperBoundUnrolled' and set epilogue loop flag. diff --git a/mlir/test/Dialect/SCF/loop-unroll.mlir b/mlir/test/Dialect/SCF/loop-unroll.mlir index c83e33d7fbc9c..e28efbb6ec2b9 100644 --- a/mlir/test/Dialect/SCF/loop-unroll.mlir +++ b/mlir/test/Dialect/SCF/loop-unroll.mlir @@ -186,6 +186,36 @@ func.func @static_loop_unroll_by_2(%arg0 : memref) { // UNROLL-BY-2-ANNOTATE: memref.store %{{.*}}, %[[MEM:.*0]][%{{.*}}] {unrolled_iteration = 0 : ui32} : memref // UNROLL-BY-2-ANNOTATE: memref.store %{{.*}}, %[[MEM]][%{{.*}}] {unrolled_iteration = 1 : ui32} : memref +// Test that no epilogue clean-up loop is generated because the trip count +// (taking into account the non-unit step size) is a multiple of the unroll +// factor. +func.func @static_loop_step_2_unroll_by_2(%arg0 : memref) { + %0 = arith.constant 7.0 : f32 + %lb = arith.constant 0 : index + %ub = arith.constant 19 : index + %step = arith.constant 2 : index + scf.for %i0 = %lb to %ub step %step { + memref.store %0, %arg0[%i0] : memref + } + return +} + +// UNROLL-BY-2-LABEL: func @static_loop_step_2_unroll_by_2 +// UNROLL-BY-2-SAME: %[[MEM:.*0]]: memref +// +// UNROLL-BY-2-DAG: %[[C0:.*]] = arith.constant 0 : index +// UNROLL-BY-2-DAG: %[[C2:.*]] = arith.constant 2 : index +// UNROLL-BY-2-DAG: %[[C19:.*]] = arith.constant 19 : index +// UNROLL-BY-2-DAG: %[[C4:.*]] = arith.constant 4 : index +// UNROLL-BY-2: scf.for %[[IV:.*]] = %[[C0]] to %[[C19]] step %[[C4]] { +// UNROLL-BY-2-NEXT: memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref +// UNROLL-BY-2-NEXT: %[[C1_IV:.*]] = arith.constant 1 : index +// UNROLL-BY-2-NEXT: %[[V0:.*]] = arith.muli %[[C2]], %[[C1_IV]] : index +// UNROLL-BY-2-NEXT: %[[V1:.*]] = arith.addi %[[IV]], %[[V0]] : index +// UNROLL-BY-2-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref +// UNROLL-BY-2-NEXT: } +// UNROLL-BY-2-NEXT: return + // Test that epilogue clean up loop is generated (trip count is not // a multiple of unroll factor). func.func @static_loop_unroll_by_3(%arg0 : memref) { From e3f533201c61beb49ffcf7c565ffe07763b7a616 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 15 Oct 2023 21:27:03 -0700 Subject: [PATCH 189/720] [RISCV][GISel] Don't setType on PtrReg in RISCVInstructionSelector::replacePtrWithInt. PtrReg is still a pointer. It's being passed to G_PTRTOINT as a pointer. --- llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 3a86dcbd86a0a..12d1d64212720 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -210,7 +210,6 @@ bool RISCVInstructionSelector::replacePtrWithInt(MachineOperand &Op, const LLT XLenLLT = LLT::scalar(STI.getXLen()); auto PtrToInt = MIB.buildPtrToInt(XLenLLT, PtrReg); MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(RISCV::GPRRegBankID)); - MRI.setType(PtrReg, XLenLLT); Op.setReg(PtrToInt.getReg(0)); return select(*PtrToInt); } From 58c9ef5a2da5c99bdb891c0e7894056c7d201e85 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 15 Oct 2023 21:35:14 -0700 Subject: [PATCH 190/720] [RISCV] Use f64 for LocVT for ilp32 when whole f64 is passed on the stack. NFC (#69118) This removes the special case from unpackF64OnRV32DSoftABI. We can use the default MemLoc handling. This also allows us to remove a isRegLoc() check from LowerCall. This part of preparation for supporting FP arguments with GISel. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index d7552317fd8bc..ed1f7b6c50a4d 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16452,13 +16452,13 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these // cases. Register Reg = State.AllocateReg(ArgGPRs); - LocVT = MVT::i32; if (!Reg) { unsigned StackOffset = State.AllocateStack(8, Align(8)); State.addLoc( CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); return false; } + LocVT = MVT::i32; if (!State.AllocateReg(ArgGPRs)) State.AllocateStack(4, Align(4)); State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); @@ -16777,15 +16777,6 @@ static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); - if (VA.isMemLoc()) { - // f64 is passed on the stack. - int FI = - MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*IsImmutable=*/true); - SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - return DAG.getLoad(MVT::f64, DL, Chain, FIN, - MachinePointerInfo::getFixedStack(MF, FI)); - } - assert(VA.isRegLoc() && "Expected register VA assignment"); Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); @@ -17298,9 +17289,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, ISD::ArgFlagsTy Flags = Outs[i].Flags; // Handle passing f64 on RV32D with a soft float ABI as a special case. - bool IsF64OnRV32DSoftABI = - VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; - if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { + if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { + assert(VA.isRegLoc() && "Expected register VA assignment"); SDValue SplitF64 = DAG.getNode( RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); SDValue Lo = SplitF64.getValue(0); @@ -17326,9 +17316,6 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, continue; } - // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way - // as any other MemLoc. - // Promote the value if needed. // For now, only handle fully promoted and indirect arguments. if (VA.getLocInfo() == CCValAssign::Indirect) { From 94d0a3c4a8b43759cb896bbbe8bd38e7e02eb70e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Mon, 16 Oct 2023 04:52:35 +0200 Subject: [PATCH 191/720] [clang][Interp][NFC] Add comments to Descriptor ctors I can't tell these apart every time I look at them. --- clang/lib/AST/Interp/Descriptor.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/lib/AST/Interp/Descriptor.cpp b/clang/lib/AST/Interp/Descriptor.cpp index 4ecb7466998e7..3990282686fe3 100644 --- a/clang/lib/AST/Interp/Descriptor.cpp +++ b/clang/lib/AST/Interp/Descriptor.cpp @@ -221,6 +221,7 @@ static BlockMoveFn getMoveArrayPrim(PrimType Type) { COMPOSITE_TYPE_SWITCH(Type, return moveArrayTy, return nullptr); } +/// Primitives. Descriptor::Descriptor(const DeclTy &D, PrimType Type, MetadataSize MD, bool IsConst, bool IsTemporary, bool IsMutable) : Source(D), ElemSize(primSize(Type)), Size(ElemSize), @@ -231,6 +232,7 @@ Descriptor::Descriptor(const DeclTy &D, PrimType Type, MetadataSize MD, assert(Source && "Missing source"); } +/// Primitive arrays. Descriptor::Descriptor(const DeclTy &D, PrimType Type, MetadataSize MD, size_t NumElems, bool IsConst, bool IsTemporary, bool IsMutable) @@ -243,6 +245,7 @@ Descriptor::Descriptor(const DeclTy &D, PrimType Type, MetadataSize MD, assert(Source && "Missing source"); } +/// Primitive unknown-size arrays. Descriptor::Descriptor(const DeclTy &D, PrimType Type, bool IsTemporary, UnknownSize) : Source(D), ElemSize(primSize(Type)), Size(UnknownSizeMark), MDSize(0), @@ -252,6 +255,7 @@ Descriptor::Descriptor(const DeclTy &D, PrimType Type, bool IsTemporary, assert(Source && "Missing source"); } +/// Arrays of composite elements. Descriptor::Descriptor(const DeclTy &D, Descriptor *Elem, MetadataSize MD, unsigned NumElems, bool IsConst, bool IsTemporary, bool IsMutable) @@ -264,6 +268,7 @@ Descriptor::Descriptor(const DeclTy &D, Descriptor *Elem, MetadataSize MD, assert(Source && "Missing source"); } +/// Unknown-size arrays of composite elements. Descriptor::Descriptor(const DeclTy &D, Descriptor *Elem, bool IsTemporary, UnknownSize) : Source(D), ElemSize(Elem->getAllocSize() + sizeof(InlineDescriptor)), @@ -274,6 +279,7 @@ Descriptor::Descriptor(const DeclTy &D, Descriptor *Elem, bool IsTemporary, assert(Source && "Missing source"); } +/// Composite records. Descriptor::Descriptor(const DeclTy &D, Record *R, MetadataSize MD, bool IsConst, bool IsTemporary, bool IsMutable) : Source(D), ElemSize(std::max(alignof(void *), R->getFullSize())), From 96e473a6be2e82e3fb4060805c7928c981111025 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Mon, 16 Oct 2023 07:41:18 +0200 Subject: [PATCH 192/720] [RFC][GlobalISel] Use Builders in MatchTable (#65955) The MatchTableExecutor did not use the MachineIRBuilder but instead created instructions ad-hoc. Making it use a Builder has the benefit that any observer added by a combine is now notified when instructions are created by MIR patterns. Another benefit is that it allows me to improve how constants are created in apply MIR patterns. `MachineIRBuilder::buildConstant` automatically handles splats for us, this means that we may change `addCImm` to use that and handle vector cases automatically. --- .../CodeGen/GlobalISel/GIMatchTableExecutor.h | 19 +++++---- .../GlobalISel/GIMatchTableExecutorImpl.h | 41 ++++++++++++------- .../GlobalISelCombinerEmitter/match-table.td | 4 +- llvm/test/TableGen/GlobalISelEmitter.td | 4 +- .../TableGen/GlobalISelCombinerEmitter.cpp | 6 +-- llvm/utils/TableGen/GlobalISelEmitter.cpp | 4 +- 6 files changed, 45 insertions(+), 33 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h index 2b0733cf9353e..45da6d96aa3de 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h @@ -40,6 +40,7 @@ class APInt; class APFloat; class GISelKnownBits; class MachineInstr; +class MachineIRBuilder; class MachineInstrBuilder; class MachineFunction; class MachineOperand; @@ -555,15 +556,15 @@ class GIMatchTableExecutor { /// and false otherwise. template - bool executeMatchTable( - TgtExecutor &Exec, NewMIVector &OutMIs, MatcherState &State, - const ExecInfoTy - &ISelInfo, - const int64_t *MatchTable, const TargetInstrInfo &TII, - MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI, const PredicateBitset &AvailableFeatures, - CodeGenCoverage *CoverageInfo, - GISelChangeObserver *Observer = nullptr) const; + bool executeMatchTable(TgtExecutor &Exec, MatcherState &State, + const ExecInfoTy &ExecInfo, + MachineIRBuilder &Builder, const int64_t *MatchTable, + const TargetInstrInfo &TII, MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI, + const PredicateBitset &AvailableFeatures, + CodeGenCoverage *CoverageInfo) const; virtual const int64_t *getMatchTable() const { llvm_unreachable("Should have been overridden by tablegen if used"); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h index 883c1ca0fe350..6f0f9a6a46c7c 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" @@ -42,17 +43,20 @@ namespace llvm { template bool GIMatchTableExecutor::executeMatchTable( - TgtExecutor &Exec, NewMIVector &OutMIs, MatcherState &State, + TgtExecutor &Exec, MatcherState &State, const ExecInfoTy &ExecInfo, - const int64_t *MatchTable, const TargetInstrInfo &TII, - MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI, const PredicateBitset &AvailableFeatures, - CodeGenCoverage *CoverageInfo, GISelChangeObserver *Observer) const { + MachineIRBuilder &Builder, const int64_t *MatchTable, + const TargetInstrInfo &TII, MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI, + const PredicateBitset &AvailableFeatures, + CodeGenCoverage *CoverageInfo) const { uint64_t CurrentIdx = 0; SmallVector OnFailResumeAt; + NewMIVector OutMIs; + GISelChangeObserver *Observer = Builder.getObserver(); // Bypass the flag check on the instruction, and only look at the MCInstrDesc. bool NoFPException = !State.MIs[0]->getDesc().mayRaiseFPException(); @@ -71,14 +75,18 @@ bool GIMatchTableExecutor::executeMatchTable( return RejectAndResume; }; - auto propagateFlags = [=](NewMIVector &OutMIs) { + const auto propagateFlags = [&]() { for (auto MIB : OutMIs) { // Set the NoFPExcept flag when no original matched instruction could // raise an FP exception, but the new instruction potentially might. uint16_t MIBFlags = Flags; if (NoFPException && MIB->mayRaiseFPException()) MIBFlags |= MachineInstr::NoFPExcept; + if (Observer) + Observer->changingInstr(*MIB); MIB.setMIFlags(MIBFlags); + if (Observer) + Observer->changedInstr(*MIB); } return true; @@ -898,9 +906,13 @@ bool GIMatchTableExecutor::executeMatchTable( if (NewInsnID >= OutMIs.size()) OutMIs.resize(NewInsnID + 1); - OutMIs[NewInsnID] = MachineInstrBuilder(*State.MIs[OldInsnID]->getMF(), - State.MIs[OldInsnID]); + MachineInstr *OldMI = State.MIs[OldInsnID]; + if (Observer) + Observer->changingInstr(*OldMI); + OutMIs[NewInsnID] = MachineInstrBuilder(*OldMI->getMF(), OldMI); OutMIs[NewInsnID]->setDesc(TII.get(NewOpcode)); + if (Observer) + Observer->changedInstr(*OldMI); DEBUG_WITH_TYPE(TgtExecutor::getName(), dbgs() << CurrentIdx << ": GIR_MutateOpcode(OutMIs[" << NewInsnID << "], MIs[" << OldInsnID << "], " @@ -914,8 +926,7 @@ bool GIMatchTableExecutor::executeMatchTable( if (NewInsnID >= OutMIs.size()) OutMIs.resize(NewInsnID + 1); - OutMIs[NewInsnID] = BuildMI(*State.MIs[0]->getParent(), State.MIs[0], - MIMetadata(*State.MIs[0]), TII.get(Opcode)); + OutMIs[NewInsnID] = Builder.buildInstr(Opcode); DEBUG_WITH_TYPE(TgtExecutor::getName(), dbgs() << CurrentIdx << ": GIR_BuildMI(OutMIs[" << NewInsnID << "], " << Opcode << ")\n"); @@ -1239,6 +1250,10 @@ bool GIMatchTableExecutor::executeMatchTable( DEBUG_WITH_TYPE(TgtExecutor::getName(), dbgs() << CurrentIdx << ": GIR_EraseFromParent(MIs[" << InsnID << "])\n"); + // If we're erasing the insertion point, ensure we don't leave a dangling + // pointer in the builder. + if (Builder.getInsertPt() == MI) + Builder.setInsertPt(*MI->getParent(), ++MI->getIterator()); if (Observer) Observer->erasingInstr(*MI); MI->eraseFromParent(); @@ -1309,11 +1324,7 @@ bool GIMatchTableExecutor::executeMatchTable( case GIR_Done: DEBUG_WITH_TYPE(TgtExecutor::getName(), dbgs() << CurrentIdx << ": GIR_Done\n"); - if (Observer) { - for (MachineInstr *MI : OutMIs) - Observer->createdInstr(*MI); - } - propagateFlags(OutMIs); + propagateFlags(); return true; default: llvm_unreachable("Unexpected command"); diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td index b810c519d2ac3..f51a18c4d3e73 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td @@ -93,12 +93,12 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK: bool GenMyCombiner::tryCombineAll(MachineInstr &I) const { // CHECK-NEXT: const TargetSubtargetInfo &ST = MF.getSubtarget(); // CHECK-NEXT: const PredicateBitset AvailableFeatures = getAvailableFeatures(); -// CHECK-NEXT: NewMIVector OutMIs; +// CHECK-NEXT: B.setInstrAndDebugLoc(I); // CHECK-NEXT: State.MIs.clear(); // CHECK-NEXT: State.MIs.push_back(&I); // CHECK-NEXT: MatchInfos = MatchInfosTy(); // CHECK-EMPTY: -// CHECK-NEXT: if (executeMatchTable(*this, OutMIs, State, ExecInfo, getMatchTable(), *ST.getInstrInfo(), MRI, *MRI.getTargetRegisterInfo(), *ST.getRegBankInfo(), AvailableFeatures, /*CoverageInfo*/ nullptr, &Observer)) +// CHECK-NEXT: if (executeMatchTable(*this, State, ExecInfo, B, getMatchTable(), *ST.getInstrInfo(), MRI, *MRI.getTargetRegisterInfo(), *ST.getRegBankInfo(), AvailableFeatures, /*CoverageInfo*/ nullptr)) // CHECK-NEXT: return true; // CHECK-NEXT: } // CHECK-EMPTY: diff --git a/llvm/test/TableGen/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter.td index 7cca2d52e4062..b7a81894f6442 100644 --- a/llvm/test/TableGen/GlobalISelEmitter.td +++ b/llvm/test/TableGen/GlobalISelEmitter.td @@ -216,11 +216,11 @@ def HasC : Predicate<"Subtarget->hasC()"> { let RecomputePerFunction = 1; } // CHECK: bool MyTargetInstructionSelector::selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const { // CHECK-NEXT: const PredicateBitset AvailableFeatures = getAvailableFeatures(); -// CHECK-NEXT: NewMIVector OutMIs; +// CHECK-NEXT: MachineIRBuilder B(I); // CHECK-NEXT: State.MIs.clear(); // CHECK-NEXT: State.MIs.push_back(&I); -// CHECK: if (executeMatchTable(*this, OutMIs, State, ExecInfo, getMatchTable(), TII, MF->getRegInfo(), TRI, RBI, AvailableFeatures, &CoverageInfo)) { +// CHECK: if (executeMatchTable(*this, State, ExecInfo, B, getMatchTable(), TII, MF->getRegInfo(), TRI, RBI, AvailableFeatures, &CoverageInfo)) { // CHECK-NEXT: return true; // CHECK-NEXT: } diff --git a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp index b28915148ee51..809415aeff153 100644 --- a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp @@ -3465,15 +3465,15 @@ void GICombinerEmitter::emitAdditionalImpl(raw_ostream &OS) { << " const TargetSubtargetInfo &ST = MF.getSubtarget();\n" << " const PredicateBitset AvailableFeatures = " "getAvailableFeatures();\n" - << " NewMIVector OutMIs;\n" + << " B.setInstrAndDebugLoc(I);\n" << " State.MIs.clear();\n" << " State.MIs.push_back(&I);\n" << " " << MatchDataInfo::StructName << " = " << MatchDataInfo::StructTypeName << "();\n\n" - << " if (executeMatchTable(*this, OutMIs, State, ExecInfo" + << " if (executeMatchTable(*this, State, ExecInfo, B" << ", getMatchTable(), *ST.getInstrInfo(), MRI, " "*MRI.getTargetRegisterInfo(), *ST.getRegBankInfo(), AvailableFeatures" - << ", /*CoverageInfo*/ nullptr, &Observer)) {\n" + << ", /*CoverageInfo*/ nullptr)) {\n" << " return true;\n" << " }\n\n" << " return false;\n" diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index 2ea48904466af..8d9ded1b2ac5e 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -2267,10 +2267,10 @@ void GlobalISelEmitter::emitAdditionalImpl(raw_ostream &OS) { "&CoverageInfo) const {\n" << " const PredicateBitset AvailableFeatures = " "getAvailableFeatures();\n" - << " NewMIVector OutMIs;\n" + << " MachineIRBuilder B(I);\n" << " State.MIs.clear();\n" << " State.MIs.push_back(&I);\n\n" - << " if (executeMatchTable(*this, OutMIs, State, ExecInfo" + << " if (executeMatchTable(*this, State, ExecInfo, B" << ", getMatchTable(), TII, MF->getRegInfo(), TRI, RBI, AvailableFeatures" << ", &CoverageInfo)) {\n" << " return true;\n" From cd88466dafdb1137196eaa04527c3aa4c742328f Mon Sep 17 00:00:00 2001 From: pvanhout Date: Mon, 16 Oct 2023 08:02:10 +0200 Subject: [PATCH 193/720] [TableGen] Fix GlobalISelEmitterHwModes.td after 96e473a --- llvm/test/TableGen/GlobalISelEmitterHwModes.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/TableGen/GlobalISelEmitterHwModes.td b/llvm/test/TableGen/GlobalISelEmitterHwModes.td index 678acb4cd0c4d..b185feaf009fb 100644 --- a/llvm/test/TableGen/GlobalISelEmitterHwModes.td +++ b/llvm/test/TableGen/GlobalISelEmitterHwModes.td @@ -113,11 +113,11 @@ class I Pat> // CHECK: bool MyTargetInstructionSelector::selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const { // CHECK-NEXT: const PredicateBitset AvailableFeatures = getAvailableFeatures(); -// CHECK-NEXT: NewMIVector OutMIs; +// CHECK-NEXT: MachineIRBuilder B(I); // CHECK-NEXT: State.MIs.clear(); // CHECK-NEXT: State.MIs.push_back(&I); -// CHECK: if (executeMatchTable(*this, OutMIs, State, ExecInfo, getMatchTable(), TII, MF->getRegInfo(), TRI, RBI, AvailableFeatures, &CoverageInfo)) { +// CHECK: if (executeMatchTable(*this, State, ExecInfo, B, getMatchTable(), TII, MF->getRegInfo(), TRI, RBI, AvailableFeatures, &CoverageInfo)) { // CHECK-NEXT: return true; // CHECK-NEXT: } From 544d91280c26fd5f7acd70eac4d667863562f4cc Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Mon, 16 Oct 2023 08:21:48 +0200 Subject: [PATCH 194/720] [AMDGPU] Remove Code Object V3 (#67118) V3 has been deprecated for a while as well, so it can safely be removed like V2 was removed. - [Clang] Set minimum code object version to 4 - [lld] Fix tests using code object v3 - Remove code object V3 from the AMDGPU backend, and delete or port v3 tests to v4. - Update docs to make it clear V3 can no longer be emitted. --- clang/include/clang/Basic/TargetOptions.h | 2 +- clang/include/clang/Driver/Options.td | 4 +- clang/lib/Driver/ToolChains/CommonArgs.cpp | 2 +- .../CodeGenCUDA/amdgpu-code-object-version.cu | 4 - clang/test/Driver/hip-code-object-version.hip | 22 +- clang/test/Driver/hip-device-libs.hip | 6 - lld/test/ELF/amdgpu-abi-version.s | 8 - llvm/docs/AMDGPUUsage.rst | 9 +- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 3 - .../AMDGPU/AMDGPUHSAMetadataStreamer.cpp | 85 ++- .../Target/AMDGPU/AMDGPUHSAMetadataStreamer.h | 21 +- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 5 - .../MCTargetDesc/AMDGPUTargetStreamer.cpp | 1 - llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 5 - .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 21 - llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 9 +- ...licit-kernarg-backend-usage-global-isel.ll | 162 ------ .../attr-amdgpu-flat-work-group-size-v3.ll | 148 ----- .../AMDGPU/directive-amdgcn-target-v3.ll | 168 ------ ...-v3.ll => hsa-metadata-enqueue-kernel-.ll} | 4 +- .../hsa-metadata-from-llvm-ctor-dtor-list.ll | 2 +- ...3.ll => hsa-metadata-from-llvm-ir-full.ll} | 4 +- ...s-v3.ll => hsa-metadata-hidden-args-v4.ll} | 4 +- ... => hsa-metadata-hostcall-present-asan.ll} | 4 +- ...call-v3.ll => hsa-metadata-hostcall-v4.ll} | 2 +- ...ta-images-v3.ll => hsa-metadata-images.ll} | 4 +- ... => hsa-metadata-invalid-ocl-version-1.ll} | 4 +- ... => hsa-metadata-invalid-ocl-version-3.ll} | 4 +- ...3.ll => hsa-metadata-kernel-code-props.ll} | 4 +- .../AMDGPU/implicit-kernarg-backend-usage.ll | 157 ------ llvm/test/CodeGen/AMDGPU/kernarg-size.ll | 9 - .../CodeGen/AMDGPU/stack-realign-kernel.ll | 2 +- llvm/test/CodeGen/AMDGPU/trap-abis.ll | 517 ++++++------------ .../AMDGPU/{hsa-diag-v3.s => hsa-diag-v4.s} | 18 +- llvm/test/MC/AMDGPU/hsa-gfx10-v3.s | 226 -------- llvm/test/MC/AMDGPU/hsa-gfx11-v3.s | 213 -------- llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s | 184 ------- llvm/test/MC/AMDGPU/hsa-gfx940-v3.s | 178 ------ llvm/test/MC/AMDGPU/hsa-v3.s | 304 ---------- llvm/test/MC/AMDGPU/user-sgpr-count-diag.s | 2 +- llvm/test/MC/AMDGPU/user-sgpr-count.s | 6 +- 41 files changed, 257 insertions(+), 2280 deletions(-) delete mode 100644 llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/directive-amdgcn-target-v3.ll rename llvm/test/CodeGen/AMDGPU/{hsa-metadata-enqueue-kernel-v3.ll => hsa-metadata-enqueue-kernel-.ll} (98%) rename llvm/test/CodeGen/AMDGPU/{hsa-metadata-from-llvm-ir-full-v3.ll => hsa-metadata-from-llvm-ir-full.ll} (99%) rename llvm/test/CodeGen/AMDGPU/{hsa-metadata-hidden-args-v3.ll => hsa-metadata-hidden-args-v4.ll} (99%) rename llvm/test/CodeGen/AMDGPU/{hsa-metadata-hostcall-present-v3-asan.ll => hsa-metadata-hostcall-present-asan.ll} (96%) rename llvm/test/CodeGen/AMDGPU/{hsa-metadata-hostcall-v3.ll => hsa-metadata-hostcall-v4.ll} (99%) rename llvm/test/CodeGen/AMDGPU/{hsa-metadata-images-v3.ll => hsa-metadata-images.ll} (98%) rename llvm/test/CodeGen/AMDGPU/{hsa-metadata-invalid-ocl-version-1-v3.ll => hsa-metadata-invalid-ocl-version-1.ll} (80%) rename llvm/test/CodeGen/AMDGPU/{hsa-metadata-invalid-ocl-version-3-v3.ll => hsa-metadata-invalid-ocl-version-3.ll} (81%) rename llvm/test/CodeGen/AMDGPU/{hsa-metadata-kernel-code-props-v3.ll => hsa-metadata-kernel-code-props.ll} (99%) rename llvm/test/MC/AMDGPU/{hsa-diag-v3.s => hsa-diag-v4.s} (94%) delete mode 100644 llvm/test/MC/AMDGPU/hsa-gfx10-v3.s delete mode 100644 llvm/test/MC/AMDGPU/hsa-gfx11-v3.s delete mode 100644 llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s delete mode 100644 llvm/test/MC/AMDGPU/hsa-gfx940-v3.s delete mode 100644 llvm/test/MC/AMDGPU/hsa-v3.s diff --git a/clang/include/clang/Basic/TargetOptions.h b/clang/include/clang/Basic/TargetOptions.h index 8bb03249b7f83..ba3acd0295871 100644 --- a/clang/include/clang/Basic/TargetOptions.h +++ b/clang/include/clang/Basic/TargetOptions.h @@ -83,7 +83,7 @@ class TargetOptions { enum CodeObjectVersionKind { COV_None, COV_2 = 200, // Unsupported. - COV_3 = 300, + COV_3 = 300, // Unsupported. COV_4 = 400, COV_5 = 500, }; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 640044622fc09..a89d6b6579f11 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4682,9 +4682,9 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee", def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group, HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">, Visibility<[ClangOption, CC1Option]>, - Values<"none,3,4,5">, + Values<"none,4,5">, NormalizedValuesScope<"TargetOptions">, - NormalizedValues<["COV_None", "COV_3", "COV_4", "COV_5"]>, + NormalizedValues<["COV_None", "COV_4", "COV_5"]>, MarshallingInfoEnum, "COV_4">; defm cumode : SimpleMFlag<"cumode", diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 25fd940584624..f104ec5a881cb 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2338,7 +2338,7 @@ getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) { void tools::checkAMDGPUCodeObjectVersion(const Driver &D, const llvm::opt::ArgList &Args) { - const unsigned MinCodeObjVer = 3; + const unsigned MinCodeObjVer = 4; const unsigned MaxCodeObjVer = 5; if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) { diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu index 0ddd63faf46f2..ff5deaf9ab850 100644 --- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu +++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu @@ -3,9 +3,6 @@ // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -o - %s | FileCheck %s -check-prefix=V4 -// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ -// RUN: -mcode-object-version=3 -o - %s | FileCheck -check-prefix=V3 %s - // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=4 -o - %s | FileCheck -check-prefix=V4 %s @@ -18,7 +15,6 @@ // RUN: not %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=4.1 -o - %s 2>&1| FileCheck %s -check-prefix=INV -// V3: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 300} // V4: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 400} // V5: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 500} // NONE-NOT: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", diff --git a/clang/test/Driver/hip-code-object-version.hip b/clang/test/Driver/hip-code-object-version.hip index 33559b6576e7d..af5f9a3da21df 100644 --- a/clang/test/Driver/hip-code-object-version.hip +++ b/clang/test/Driver/hip-code-object-version.hip @@ -1,20 +1,5 @@ // REQUIRES: amdgpu-registered-target -// Check bundle ID for code object v3. - -// RUN: not %clang -### --target=x86_64-linux-gnu \ -// RUN: -mcode-object-version=3 \ -// RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \ -// RUN: %s 2>&1 | FileCheck -check-prefix=V3 %s - -// RUN: not %clang -### --target=x86_64-linux-gnu \ -// RUN: -mcode-object-version=4 -mcode-object-version=3 \ -// RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \ -// RUN: %s 2>&1 | FileCheck -check-prefix=V3 %s - -// V3: "-mcode-object-version=3" -// V3: "-mllvm" "--amdhsa-code-object-version=3" -// V3: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa--gfx906" // Check bundle ID for code object version 4. @@ -62,6 +47,13 @@ // INVALID_2: error: invalid integral value '2' in '-mcode-object-version=2' // INVALID_2-NOT: error: invalid integral value +// RUN: not %clang -### --target=x86_64-linux-gnu \ +// RUN: -mcode-object-version=3 \ +// RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \ +// RUN: %s 2>&1 | FileCheck -check-prefix=INVALID_3 %s +// INVALID_3: error: invalid integral value '3' in '-mcode-object-version=3' +// INVALID_3-NOT: error: invalid integral value + // Check LLVM code object version option --amdhsa-code-object-version // is passed to -cc1 and -cc1as, and -mcode-object-version is passed // to -cc1 but not -cc1as. diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index 71d9554da696b..6ac5778721ba5 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -168,12 +168,6 @@ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4 -// Test -mcode-object-version=3 -// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ -// RUN: -mcode-object-version=3 \ -// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI4 - // Test -mcode-object-version=4 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: -mcode-object-version=4 \ diff --git a/lld/test/ELF/amdgpu-abi-version.s b/lld/test/ELF/amdgpu-abi-version.s index 455a52aec9210..72b67fdaeb1a1 100644 --- a/lld/test/ELF/amdgpu-abi-version.s +++ b/lld/test/ELF/amdgpu-abi-version.s @@ -1,11 +1,3 @@ -# REQUIRES: amdgpu -# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj %s -o %t.o -# RUN: ld.lld -shared %t.o -o %t.so -# RUN: llvm-readobj --file-headers %t.so | FileCheck --check-prefix=COV3 %s - -# COV3: OS/ABI: AMDGPU_HSA (0x40) -# COV3: ABIVersion: 1 - # RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 -filetype=obj %s -o %t.o # RUN: ld.lld -shared %t.o -o %t.so # RUN: llvm-readobj --file-headers %t.so | FileCheck --check-prefix=COV4 %s diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 8022816d7e616..ed9581ccc93df 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1409,12 +1409,10 @@ The AMDGPU backend uses the following ELF header: object conforms: * ``ELFABIVERSION_AMDGPU_HSA_V2`` is used to specify the version of AMD HSA - runtime ABI for code object V2. Specify using the Clang option - ``-mcode-object-version=2``. + runtime ABI for code object V2. Can no longer be emitted by this version of LLVM. * ``ELFABIVERSION_AMDGPU_HSA_V3`` is used to specify the version of AMD HSA - runtime ABI for code object V3. Specify using the Clang option - ``-mcode-object-version=3``. + runtime ABI for code object V3. Can no longer be emitted by this version of LLVM. * ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA runtime ABI for code object V4. Specify using the Clang option @@ -3402,8 +3400,7 @@ Code Object V3 Metadata +++++++++++++++++++++++ .. warning:: - Code object V3 is not the default code object version emitted by this version - of LLVM. + Code object V3 generation is no longer supported by this version of LLVM. Code object V3 and above metadata is specified by the ``NT_AMDGPU_METADATA`` note record (see :ref:`amdgpu-note-records-v3-onwards`). diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index aadc4a68ea132..8d0ef67a615df 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -341,9 +341,6 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) { if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { switch (CodeObjectVersion) { - case AMDGPU::AMDHSA_COV3: - HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV3()); - break; case AMDGPU::AMDHSA_COV4: HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV4()); break; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index 5060cd3aec581..b51a876750b58 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -49,14 +49,14 @@ namespace AMDGPU { namespace HSAMD { //===----------------------------------------------------------------------===// -// HSAMetadataStreamerV3 +// HSAMetadataStreamerV4 //===----------------------------------------------------------------------===// -void MetadataStreamerMsgPackV3::dump(StringRef HSAMetadataString) const { +void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const { errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; } -void MetadataStreamerMsgPackV3::verify(StringRef HSAMetadataString) const { +void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const { errs() << "AMDGPU HSA Metadata Parser Test: "; msgpack::Document FromHSAMetadataString; @@ -78,7 +78,7 @@ void MetadataStreamerMsgPackV3::verify(StringRef HSAMetadataString) const { } std::optional -MetadataStreamerMsgPackV3::getAccessQualifier(StringRef AccQual) const { +MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const { return StringSwitch>(AccQual) .Case("read_only", StringRef("read_only")) .Case("write_only", StringRef("write_only")) @@ -86,7 +86,7 @@ MetadataStreamerMsgPackV3::getAccessQualifier(StringRef AccQual) const { .Default(std::nullopt); } -std::optional MetadataStreamerMsgPackV3::getAddressSpaceQualifier( +std::optional MetadataStreamerMsgPackV4::getAddressSpaceQualifier( unsigned AddressSpace) const { switch (AddressSpace) { case AMDGPUAS::PRIVATE_ADDRESS: @@ -107,7 +107,7 @@ std::optional MetadataStreamerMsgPackV3::getAddressSpaceQualifier( } StringRef -MetadataStreamerMsgPackV3::getValueKind(Type *Ty, StringRef TypeQual, +MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual, StringRef BaseTypeName) const { if (TypeQual.contains("pipe")) return "pipe"; @@ -134,7 +134,7 @@ MetadataStreamerMsgPackV3::getValueKind(Type *Ty, StringRef TypeQual, : "by_value"); } -std::string MetadataStreamerMsgPackV3::getTypeName(Type *Ty, +std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty, bool Signed) const { switch (Ty->getTypeID()) { case Type::IntegerTyID: { @@ -173,7 +173,7 @@ std::string MetadataStreamerMsgPackV3::getTypeName(Type *Ty, } msgpack::ArrayDocNode -MetadataStreamerMsgPackV3::getWorkGroupDimensions(MDNode *Node) const { +MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const { auto Dims = HSAMetadataDoc->getArrayNode(); if (Node->getNumOperands() != 3) return Dims; @@ -184,14 +184,20 @@ MetadataStreamerMsgPackV3::getWorkGroupDimensions(MDNode *Node) const { return Dims; } -void MetadataStreamerMsgPackV3::emitVersion() { +void MetadataStreamerMsgPackV4::emitVersion() { auto Version = HSAMetadataDoc->getArrayNode(); - Version.push_back(Version.getDocument()->getNode(VersionMajorV3)); - Version.push_back(Version.getDocument()->getNode(VersionMinorV3)); + Version.push_back(Version.getDocument()->getNode(VersionMajorV4)); + Version.push_back(Version.getDocument()->getNode(VersionMinorV4)); getRootMetadata("amdhsa.version") = Version; } -void MetadataStreamerMsgPackV3::emitPrintf(const Module &Mod) { +void MetadataStreamerMsgPackV4::emitTargetID( + const IsaInfo::AMDGPUTargetID &TargetID) { + getRootMetadata("amdhsa.target") = + HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true); +} + +void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) { auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); if (!Node) return; @@ -204,7 +210,7 @@ void MetadataStreamerMsgPackV3::emitPrintf(const Module &Mod) { getRootMetadata("amdhsa.printf") = Printf; } -void MetadataStreamerMsgPackV3::emitKernelLanguage(const Function &Func, +void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern) { // TODO: What about other languages? auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); @@ -223,7 +229,7 @@ void MetadataStreamerMsgPackV3::emitKernelLanguage(const Function &Func, Kern[".language_version"] = LanguageVersion; } -void MetadataStreamerMsgPackV3::emitKernelAttrs(const Function &Func, +void MetadataStreamerMsgPackV4::emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern) { if (auto Node = Func.getMetadata("reqd_work_group_size")) @@ -248,7 +254,7 @@ void MetadataStreamerMsgPackV3::emitKernelAttrs(const Function &Func, Kern[".kind"] = Kern.getDocument()->getNode("fini"); } -void MetadataStreamerMsgPackV3::emitKernelArgs(const MachineFunction &MF, +void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern) { auto &Func = MF.getFunction(); unsigned Offset = 0; @@ -261,7 +267,7 @@ void MetadataStreamerMsgPackV3::emitKernelArgs(const MachineFunction &MF, Kern[".args"] = Args; } -void MetadataStreamerMsgPackV3::emitKernelArg(const Argument &Arg, +void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg, unsigned &Offset, msgpack::ArrayDocNode Args) { auto Func = Arg.getParent(); @@ -326,7 +332,7 @@ void MetadataStreamerMsgPackV3::emitKernelArg(const Argument &Arg, AccQual, TypeQual); } -void MetadataStreamerMsgPackV3::emitKernelArg( +void MetadataStreamerMsgPackV4::emitKernelArg( const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind, unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign, StringRef Name, StringRef TypeName, StringRef BaseTypeName, @@ -375,7 +381,7 @@ void MetadataStreamerMsgPackV3::emitKernelArg( Args.push_back(Arg); } -void MetadataStreamerMsgPackV3::emitHiddenKernelArgs( +void MetadataStreamerMsgPackV4::emitHiddenKernelArgs( const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { auto &Func = MF.getFunction(); const GCNSubtarget &ST = MF.getSubtarget(); @@ -448,9 +454,10 @@ void MetadataStreamerMsgPackV3::emitHiddenKernelArgs( } } -msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps( - const MachineFunction &MF, const SIProgramInfo &ProgramInfo, - unsigned CodeObjectVersion) const { +msgpack::MapDocNode +MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo, + unsigned CodeObjectVersion) const { const GCNSubtarget &STM = MF.getSubtarget(); const SIMachineFunctionInfo &MFI = *MF.getInfo(); const Function &F = MF.getFunction(); @@ -495,18 +502,19 @@ msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps( return Kern; } -bool MetadataStreamerMsgPackV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) { +bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) { return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true); } -void MetadataStreamerMsgPackV3::begin(const Module &Mod, +void MetadataStreamerMsgPackV4::begin(const Module &Mod, const IsaInfo::AMDGPUTargetID &TargetID) { emitVersion(); + emitTargetID(TargetID); emitPrintf(Mod); getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); } -void MetadataStreamerMsgPackV3::end() { +void MetadataStreamerMsgPackV4::end() { std::string HSAMetadataString; raw_string_ostream StrOS(HSAMetadataString); HSAMetadataDoc->toYAML(StrOS); @@ -517,7 +525,7 @@ void MetadataStreamerMsgPackV3::end() { verify(StrOS.str()); } -void MetadataStreamerMsgPackV3::emitKernel(const MachineFunction &MF, +void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) { auto &Func = MF.getFunction(); if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL && @@ -542,31 +550,6 @@ void MetadataStreamerMsgPackV3::emitKernel(const MachineFunction &MF, Kernels.push_back(Kern); } -//===----------------------------------------------------------------------===// -// HSAMetadataStreamerV4 -//===----------------------------------------------------------------------===// - -void MetadataStreamerMsgPackV4::emitVersion() { - auto Version = HSAMetadataDoc->getArrayNode(); - Version.push_back(Version.getDocument()->getNode(VersionMajorV4)); - Version.push_back(Version.getDocument()->getNode(VersionMinorV4)); - getRootMetadata("amdhsa.version") = Version; -} - -void MetadataStreamerMsgPackV4::emitTargetID( - const IsaInfo::AMDGPUTargetID &TargetID) { - getRootMetadata("amdhsa.target") = - HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true); -} - -void MetadataStreamerMsgPackV4::begin(const Module &Mod, - const IsaInfo::AMDGPUTargetID &TargetID) { - emitVersion(); - emitTargetID(TargetID); - emitPrintf(Mod); - getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); -} - //===----------------------------------------------------------------------===// // HSAMetadataStreamerV5 //===----------------------------------------------------------------------===// @@ -680,7 +663,7 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs( void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern) { - MetadataStreamerMsgPackV3::emitKernelAttrs(Func, Kern); + MetadataStreamerMsgPackV4::emitKernelAttrs(Func, Kern); if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool()) Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h index d2b3b8917ce0f..18a7b5d7a9633 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -62,7 +62,7 @@ class MetadataStreamer { msgpack::MapDocNode Kern) = 0; }; -class MetadataStreamerMsgPackV3 : public MetadataStreamer { +class MetadataStreamerMsgPackV4 : public MetadataStreamer { protected: std::unique_ptr HSAMetadataDoc = std::make_unique(); @@ -89,6 +89,8 @@ class MetadataStreamerMsgPackV3 : public MetadataStreamer { void emitVersion() override; + void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID); + void emitPrintf(const Module &Mod); void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern); @@ -120,8 +122,8 @@ class MetadataStreamerMsgPackV3 : public MetadataStreamer { } public: - MetadataStreamerMsgPackV3() = default; - ~MetadataStreamerMsgPackV3() = default; + MetadataStreamerMsgPackV4() = default; + ~MetadataStreamerMsgPackV4() = default; bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override; @@ -134,19 +136,6 @@ class MetadataStreamerMsgPackV3 : public MetadataStreamer { const SIProgramInfo &ProgramInfo) override; }; -class MetadataStreamerMsgPackV4 : public MetadataStreamerMsgPackV3 { -protected: - void emitVersion() override; - void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID); - -public: - MetadataStreamerMsgPackV4() = default; - ~MetadataStreamerMsgPackV4() = default; - - void begin(const Module &Mod, - const IsaInfo::AMDGPUTargetID &TargetID) override; -}; - class MetadataStreamerMsgPackV5 final : public MetadataStreamerMsgPackV4 { protected: void emitVersion() override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 02cb77f6ecaca..d6717c998bec8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -6489,11 +6489,6 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI, ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) return legalizeTrapEndpgm(MI, MRI, B); - const Module *M = B.getMF().getFunction().getParent(); - unsigned CodeObjectVersion = AMDGPU::getCodeObjectVersion(*M); - if (CodeObjectVersion <= AMDGPU::AMDHSA_COV3) - return legalizeTrapHsaQueuePtr(MI, MRI, B); - return ST.supportsGetDoorbellID() ? legalizeTrapHsa(MI, MRI, B) : legalizeTrapHsaQueuePtr(MI, MRI, B); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 6b8c03c1620d2..42af09e27e471 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -424,7 +424,6 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( switch (CodeObjectVersion) { default: break; - case AMDGPU::AMDHSA_COV3: case AMDGPU::AMDHSA_COV4: case AMDGPU::AMDHSA_COV5: if (getTargetID()->isXnackSupported()) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 33f65ab786584..cd849560feac2 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5990,11 +5990,6 @@ SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) return lowerTrapEndpgm(Op, DAG); - const Module *M = DAG.getMachineFunction().getFunction().getParent(); - unsigned CodeObjectVersion = AMDGPU::getCodeObjectVersion(*M); - if (CodeObjectVersion <= AMDGPU::AMDHSA_COV3) - return lowerTrapHsaQueuePtr(Op, DAG); - return Subtarget->supportsGetDoorbellID() ? lowerTrapHsa(Op, DAG) : lowerTrapHsaQueuePtr(Op, DAG); } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index d123b384a27d4..5fff19eada75d 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -128,8 +128,6 @@ std::optional getHsaAbiVersion(const MCSubtargetInfo *STI) { return std::nullopt; switch (AmdhsaCodeObjectVersion) { - case 3: - return ELF::ELFABIVERSION_AMDGPU_HSA_V3; case 4: return ELF::ELFABIVERSION_AMDGPU_HSA_V4; case 5: @@ -140,12 +138,6 @@ std::optional getHsaAbiVersion(const MCSubtargetInfo *STI) { } } -bool isHsaAbiVersion3(const MCSubtargetInfo *STI) { - if (std::optional HsaAbiVer = getHsaAbiVersion(STI)) - return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V3; - return false; -} - bool isHsaAbiVersion4(const MCSubtargetInfo *STI) { if (std::optional HsaAbiVer = getHsaAbiVersion(STI)) return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4; @@ -174,7 +166,6 @@ unsigned getCodeObjectVersion(const Module &M) { unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) { switch (CodeObjectVersion) { - case AMDHSA_COV3: case AMDHSA_COV4: return 48; case AMDHSA_COV5: @@ -188,7 +179,6 @@ unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) { // central TD file. unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) { switch (CodeObjectVersion) { - case AMDHSA_COV3: case AMDHSA_COV4: return 24; case AMDHSA_COV5: @@ -199,7 +189,6 @@ unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) { unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) { switch (CodeObjectVersion) { - case AMDHSA_COV3: case AMDHSA_COV4: return 32; case AMDHSA_COV5: @@ -210,7 +199,6 @@ unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) { unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) { switch (CodeObjectVersion) { - case AMDHSA_COV3: case AMDHSA_COV4: return 40; case AMDHSA_COV5: @@ -774,15 +762,6 @@ std::string AMDGPUTargetID::toString() const { std::string Features; if (STI.getTargetTriple().getOS() == Triple::AMDHSA) { switch (CodeObjectVersion) { - case AMDGPU::AMDHSA_COV3: - // xnack. - if (isXnackOnOrAny()) - Features += "+xnack"; - // In code object v2 and v3, "sramecc" feature was spelled with a - // hyphen ("sram-ecc"). - if (isSramEccOnOrAny()) - Features += "+sram-ecc"; - break; case AMDGPU::AMDHSA_COV4: case AMDGPU::AMDHSA_COV5: // sramecc. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index bb2964f592f66..1e0994d0862cf 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -42,19 +42,12 @@ namespace AMDGPU { struct IsaVersion; -enum { - AMDHSA_COV3 = 3, - AMDHSA_COV4 = 4, - AMDHSA_COV5 = 5 -}; +enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5 }; /// \returns True if \p STI is AMDHSA. bool isHsaAbi(const MCSubtargetInfo &STI); /// \returns HSA OS ABI Version identification. std::optional getHsaAbiVersion(const MCSubtargetInfo *STI); -/// \returns True if HSA OS ABI Version identification is 3, -/// false otherwise. -bool isHsaAbiVersion3(const MCSubtargetInfo *STI); /// \returns True if HSA OS ABI Version identification is 4, /// false otherwise. bool isHsaAbiVersion4(const MCSubtargetInfo *STI); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll index c25ecafa1f7c0..4bdbe6604782a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll @@ -1,38 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V3 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V3 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) { -; GFX8V3-LABEL: addrspacecast: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; GFX8V3-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x40 -; GFX8V3-NEXT: v_mov_b32_e32 v2, 1 -; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V3-NEXT: s_mov_b32 s4, s0 -; GFX8V3-NEXT: s_mov_b32 s5, s3 -; GFX8V3-NEXT: s_cmp_lg_u32 s0, -1 -; GFX8V3-NEXT: s_cselect_b64 s[4:5], s[4:5], 0 -; GFX8V3-NEXT: s_mov_b32 s6, s1 -; GFX8V3-NEXT: s_mov_b32 s7, s2 -; GFX8V3-NEXT: s_cmp_lg_u32 s1, -1 -; GFX8V3-NEXT: v_mov_b32_e32 v0, s4 -; GFX8V3-NEXT: s_cselect_b64 s[0:1], s[6:7], 0 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s5 -; GFX8V3-NEXT: flat_store_dword v[0:1], v2 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX8V3-NEXT: v_mov_b32_e32 v2, 2 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s1 -; GFX8V3-NEXT: flat_store_dword v[0:1], v2 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: s_endpgm -; ; GFX8V4-LABEL: addrspacecast: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -82,30 +55,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; -; GFX9V3-LABEL: addrspacecast: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9V3-NEXT: s_mov_b64 s[2:3], src_private_base -; GFX9V3-NEXT: s_mov_b64 s[4:5], src_shared_base -; GFX9V3-NEXT: v_mov_b32_e32 v2, 1 -; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9V3-NEXT: s_mov_b32 s2, s0 -; GFX9V3-NEXT: s_cmp_lg_u32 s0, -1 -; GFX9V3-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 -; GFX9V3-NEXT: s_mov_b32 s4, s1 -; GFX9V3-NEXT: s_cmp_lg_u32 s1, -1 -; GFX9V3-NEXT: v_mov_b32_e32 v0, s2 -; GFX9V3-NEXT: s_cselect_b64 s[0:1], s[4:5], 0 -; GFX9V3-NEXT: v_mov_b32_e32 v1, s3 -; GFX9V3-NEXT: flat_store_dword v[0:1], v2 -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX9V3-NEXT: v_mov_b32_e32 v2, 2 -; GFX9V3-NEXT: v_mov_b32_e32 v1, s1 -; GFX9V3-NEXT: flat_store_dword v[0:1], v2 -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: s_endpgm -; ; GFX9V4-LABEL: addrspacecast: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -161,19 +110,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr } define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) { -; GFX8V3-LABEL: llvm_amdgcn_is_shared: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x40 -; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0 -; GFX8V3-NEXT: s_cselect_b32 s0, 1, 0 -; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX8V3-NEXT: flat_store_dword v[0:1], v0 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: s_endpgm -; ; GFX8V4-LABEL: llvm_amdgcn_is_shared: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -200,18 +136,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) { ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; -; GFX9V3-LABEL: llvm_amdgcn_is_shared: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9V3-NEXT: s_mov_b64 s[2:3], src_shared_base -; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9V3-NEXT: s_cmp_eq_u32 s1, s3 -; GFX9V3-NEXT: s_cselect_b32 s0, 1, 0 -; GFX9V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX9V3-NEXT: global_store_dword v[0:1], v0, off -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: s_endpgm -; ; GFX9V4-LABEL: llvm_amdgcn_is_shared: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -242,19 +166,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) { } define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) { -; GFX8V3-LABEL: llvm_amdgcn_is_private: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x44 -; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0 -; GFX8V3-NEXT: s_cselect_b32 s0, 1, 0 -; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX8V3-NEXT: flat_store_dword v[0:1], v0 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: s_endpgm -; ; GFX8V4-LABEL: llvm_amdgcn_is_private: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -281,18 +192,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) { ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; -; GFX9V3-LABEL: llvm_amdgcn_is_private: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9V3-NEXT: s_mov_b64 s[2:3], src_private_base -; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9V3-NEXT: s_cmp_eq_u32 s1, s3 -; GFX9V3-NEXT: s_cselect_b32 s0, 1, 0 -; GFX9V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX9V3-NEXT: global_store_dword v[0:1], v0, off -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: s_endpgm -; ; GFX9V4-LABEL: llvm_amdgcn_is_private: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -323,11 +222,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) { } define amdgpu_kernel void @llvm_trap() { -; GFX8V3-LABEL: llvm_trap: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_mov_b64 s[0:1], s[4:5] -; GFX8V3-NEXT: s_trap 2 -; ; GFX8V4-LABEL: llvm_trap: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_mov_b64 s[0:1], s[4:5] @@ -339,11 +233,6 @@ define amdgpu_kernel void @llvm_trap() { ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V5-NEXT: s_trap 2 ; -; GFX9V3-LABEL: llvm_trap: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_mov_b64 s[0:1], s[4:5] -; GFX9V3-NEXT: s_trap 2 -; ; GFX9V4-LABEL: llvm_trap: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_trap 2 @@ -356,10 +245,6 @@ define amdgpu_kernel void @llvm_trap() { } define amdgpu_kernel void @llvm_debugtrap() { -; GFX8V3-LABEL: llvm_debugtrap: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_trap 3 -; ; GFX8V4-LABEL: llvm_debugtrap: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_trap 3 @@ -368,10 +253,6 @@ define amdgpu_kernel void @llvm_debugtrap() { ; GFX8V5: ; %bb.0: ; GFX8V5-NEXT: s_trap 3 ; -; GFX9V3-LABEL: llvm_debugtrap: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_trap 3 -; ; GFX9V4-LABEL: llvm_debugtrap: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_trap 3 @@ -384,32 +265,6 @@ define amdgpu_kernel void @llvm_debugtrap() { } define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) { -; GFX8V3-LABEL: llvm_amdgcn_queue_ptr: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: v_mov_b32_e32 v0, s6 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s7 -; GFX8V3-NEXT: s_add_u32 s0, s8, 8 -; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc -; GFX8V3-NEXT: s_addc_u32 s1, s9, 0 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s1 -; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: v_mov_b32_e32 v0, s4 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s5 -; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc -; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: v_mov_b32_e32 v0, s10 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s11 -; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V3-NEXT: v_mov_b32_e32 v3, s1 -; GFX8V3-NEXT: v_mov_b32_e32 v2, s0 -; GFX8V3-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: s_endpgm -; ; GFX8V4-LABEL: llvm_amdgcn_queue_ptr: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: v_mov_b32_e32 v0, s6 @@ -460,23 +315,6 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) { ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; -; GFX9V3-LABEL: llvm_amdgcn_queue_ptr: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: v_mov_b32_e32 v2, 0 -; GFX9V3-NEXT: global_load_ubyte v0, v2, s[6:7] glc -; GFX9V3-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc -; GFX9V3-NEXT: global_load_ubyte v0, v2, s[4:5] glc -; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: v_mov_b32_e32 v0, s10 -; GFX9V3-NEXT: v_mov_b32_e32 v1, s11 -; GFX9V3-NEXT: ; kill: killed $sgpr6_sgpr7 -; GFX9V3-NEXT: ; kill: killed $sgpr4_sgpr5 -; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9V3-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: s_endpgm -; ; GFX9V4-LABEL: llvm_amdgcn_queue_ptr: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: v_mov_b32_e32 v2, 0 diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll deleted file mode 100644 index 20d0aea61f276..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll +++ /dev/null @@ -1,148 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs -amdgpu-verify-hsa-metadata -filetype=obj -o /dev/null < %s 2>&1 | FileCheck --check-prefix=PARSER %s - -; CHECK-LABEL: {{^}}min_64_max_64: -; CHECK: SGPRBlocks: 0 -; CHECK: VGPRBlocks: 0 -; CHECK: NumSGPRsForWavesPerEU: 1 -; CHECK: NumVGPRsForWavesPerEU: 1 -define amdgpu_kernel void @min_64_max_64() #0 { -entry: - ret void -} -attributes #0 = {"amdgpu-flat-work-group-size"="64,64"} - -; CHECK-LABEL: {{^}}min_64_max_128: -; CHECK: SGPRBlocks: 0 -; CHECK: VGPRBlocks: 0 -; CHECK: NumSGPRsForWavesPerEU: 1 -; CHECK: NumVGPRsForWavesPerEU: 1 -define amdgpu_kernel void @min_64_max_128() #1 { -entry: - ret void -} -attributes #1 = {"amdgpu-flat-work-group-size"="64,128"} - -; CHECK-LABEL: {{^}}min_128_max_128: -; CHECK: SGPRBlocks: 0 -; CHECK: VGPRBlocks: 0 -; CHECK: NumSGPRsForWavesPerEU: 1 -; CHECK: NumVGPRsForWavesPerEU: 1 -define amdgpu_kernel void @min_128_max_128() #2 { -entry: - ret void -} -attributes #2 = {"amdgpu-flat-work-group-size"="128,128"} - -; CHECK-LABEL: {{^}}min_1024_max_1024 -; CHECK: SGPRBlocks: 0 -; CHECK: VGPRBlocks: 10 -; CHECK: NumSGPRsForWavesPerEU: 2{{$}} -; CHECK: NumVGPRsForWavesPerEU: 43 -@var = addrspace(1) global float 0.0 -define amdgpu_kernel void @min_1024_max_1024() #3 { - %val0 = load volatile float, ptr addrspace(1) @var - %val1 = load volatile float, ptr addrspace(1) @var - %val2 = load volatile float, ptr addrspace(1) @var - %val3 = load volatile float, ptr addrspace(1) @var - %val4 = load volatile float, ptr addrspace(1) @var - %val5 = load volatile float, ptr addrspace(1) @var - %val6 = load volatile float, ptr addrspace(1) @var - %val7 = load volatile float, ptr addrspace(1) @var - %val8 = load volatile float, ptr addrspace(1) @var - %val9 = load volatile float, ptr addrspace(1) @var - %val10 = load volatile float, ptr addrspace(1) @var - %val11 = load volatile float, ptr addrspace(1) @var - %val12 = load volatile float, ptr addrspace(1) @var - %val13 = load volatile float, ptr addrspace(1) @var - %val14 = load volatile float, ptr addrspace(1) @var - %val15 = load volatile float, ptr addrspace(1) @var - %val16 = load volatile float, ptr addrspace(1) @var - %val17 = load volatile float, ptr addrspace(1) @var - %val18 = load volatile float, ptr addrspace(1) @var - %val19 = load volatile float, ptr addrspace(1) @var - %val20 = load volatile float, ptr addrspace(1) @var - %val21 = load volatile float, ptr addrspace(1) @var - %val22 = load volatile float, ptr addrspace(1) @var - %val23 = load volatile float, ptr addrspace(1) @var - %val24 = load volatile float, ptr addrspace(1) @var - %val25 = load volatile float, ptr addrspace(1) @var - %val26 = load volatile float, ptr addrspace(1) @var - %val27 = load volatile float, ptr addrspace(1) @var - %val28 = load volatile float, ptr addrspace(1) @var - %val29 = load volatile float, ptr addrspace(1) @var - %val30 = load volatile float, ptr addrspace(1) @var - %val31 = load volatile float, ptr addrspace(1) @var - %val32 = load volatile float, ptr addrspace(1) @var - %val33 = load volatile float, ptr addrspace(1) @var - %val34 = load volatile float, ptr addrspace(1) @var - %val35 = load volatile float, ptr addrspace(1) @var - %val36 = load volatile float, ptr addrspace(1) @var - %val37 = load volatile float, ptr addrspace(1) @var - %val38 = load volatile float, ptr addrspace(1) @var - %val39 = load volatile float, ptr addrspace(1) @var - %val40 = load volatile float, ptr addrspace(1) @var - - store volatile float %val0, ptr addrspace(1) @var - store volatile float %val1, ptr addrspace(1) @var - store volatile float %val2, ptr addrspace(1) @var - store volatile float %val3, ptr addrspace(1) @var - store volatile float %val4, ptr addrspace(1) @var - store volatile float %val5, ptr addrspace(1) @var - store volatile float %val6, ptr addrspace(1) @var - store volatile float %val7, ptr addrspace(1) @var - store volatile float %val8, ptr addrspace(1) @var - store volatile float %val9, ptr addrspace(1) @var - store volatile float %val10, ptr addrspace(1) @var - store volatile float %val11, ptr addrspace(1) @var - store volatile float %val12, ptr addrspace(1) @var - store volatile float %val13, ptr addrspace(1) @var - store volatile float %val14, ptr addrspace(1) @var - store volatile float %val15, ptr addrspace(1) @var - store volatile float %val16, ptr addrspace(1) @var - store volatile float %val17, ptr addrspace(1) @var - store volatile float %val18, ptr addrspace(1) @var - store volatile float %val19, ptr addrspace(1) @var - store volatile float %val20, ptr addrspace(1) @var - store volatile float %val21, ptr addrspace(1) @var - store volatile float %val22, ptr addrspace(1) @var - store volatile float %val23, ptr addrspace(1) @var - store volatile float %val24, ptr addrspace(1) @var - store volatile float %val25, ptr addrspace(1) @var - store volatile float %val26, ptr addrspace(1) @var - store volatile float %val27, ptr addrspace(1) @var - store volatile float %val28, ptr addrspace(1) @var - store volatile float %val29, ptr addrspace(1) @var - store volatile float %val30, ptr addrspace(1) @var - store volatile float %val31, ptr addrspace(1) @var - store volatile float %val32, ptr addrspace(1) @var - store volatile float %val33, ptr addrspace(1) @var - store volatile float %val34, ptr addrspace(1) @var - store volatile float %val35, ptr addrspace(1) @var - store volatile float %val36, ptr addrspace(1) @var - store volatile float %val37, ptr addrspace(1) @var - store volatile float %val38, ptr addrspace(1) @var - store volatile float %val39, ptr addrspace(1) @var - store volatile float %val40, ptr addrspace(1) @var - - ret void -} -attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"} - -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} - -; CHECK: amdhsa.kernels: -; CHECK: .max_flat_workgroup_size: 64 -; CHECK: .name: min_64_max_64 -; CHECK: .max_flat_workgroup_size: 128 -; CHECK: .name: min_64_max_128 -; CHECK: .max_flat_workgroup_size: 128 -; CHECK: .name: min_128_max_128 -; CHECK: .max_flat_workgroup_size: 1024 -; CHECK: .name: min_1024_max_1024 -; CHECK: amdhsa.version: -; CHECK: - 1 -; CHECK: - 0 - -; PARSER: AMDGPU HSA Metadata Parser Test: PASS diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target-v3.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target-v3.ll deleted file mode 100644 index 6c553e3726abf..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target-v3.ll +++ /dev/null @@ -1,168 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 < %s | FileCheck --check-prefixes=V3-GFX600 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck --check-prefixes=V3-GFX600 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx601 < %s | FileCheck --check-prefixes=V3-GFX601 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=pitcairn < %s | FileCheck --check-prefixes=V3-GFX601 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=verde < %s | FileCheck --check-prefixes=V3-GFX601 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx602 < %s | FileCheck --check-prefixes=V3-GFX602 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hainan < %s | FileCheck --check-prefixes=V3-GFX602 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=oland < %s | FileCheck --check-prefixes=V3-GFX602 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefixes=V3-GFX700 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck --check-prefixes=V3-GFX700 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx701 < %s | FileCheck --check-prefixes=V3-GFX701 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck --check-prefixes=V3-GFX701 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx702 < %s | FileCheck --check-prefixes=V3-GFX702 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck --check-prefixes=V3-GFX703 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kabini < %s | FileCheck --check-prefixes=V3-GFX703 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=mullins < %s | FileCheck --check-prefixes=V3-GFX703 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx704 < %s | FileCheck --check-prefixes=V3-GFX704 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire < %s | FileCheck --check-prefixes=V3-GFX704 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx705 < %s | FileCheck --check-prefixes=V3-GFX705 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX801-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX801-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 < %s | FileCheck --check-prefixes=V3-GFX802 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland < %s | FileCheck --check-prefixes=V3-GFX802 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga < %s | FileCheck --check-prefixes=V3-GFX802 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefixes=V3-GFX803 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck --check-prefixes=V3-GFX803 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=polaris10 < %s | FileCheck --check-prefixes=V3-GFX803 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=polaris11 < %s | FileCheck --check-prefixes=V3-GFX803 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx805 < %s | FileCheck --check-prefixes=V3-GFX805 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tongapro < %s | FileCheck --check-prefixes=V3-GFX805 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX810-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX810-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=V3-GFX900-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX900-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX900-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 < %s | FileCheck --check-prefixes=V3-GFX902-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX902-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX902-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 < %s | FileCheck --check-prefixes=V3-GFX904-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX904-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX904-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 < %s | FileCheck --check-prefixes=V3-GFX909-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX909-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX909-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX90C-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefixes=V3-GFX940-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX940-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX940-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1010-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 < %s | FileCheck --check-prefixes=V3-GFX1011-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1011-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1011-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1012-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 < %s | FileCheck --check-prefixes=V3-GFX1013-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1013-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1013-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck --check-prefixes=V3-GFX1030 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 < %s | FileCheck --check-prefixes=V3-GFX1031 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1032 < %s | FileCheck --check-prefixes=V3-GFX1032 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1033 < %s | FileCheck --check-prefixes=V3-GFX1033 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1034 < %s | FileCheck --check-prefixes=V3-GFX1034 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1035 < %s | FileCheck --check-prefixes=V3-GFX1035 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1036 < %s | FileCheck --check-prefixes=V3-GFX1036 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=V3-GFX1100 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1101 < %s | FileCheck --check-prefixes=V3-GFX1101 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1102 < %s | FileCheck --check-prefixes=V3-GFX1102 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1103 < %s | FileCheck --check-prefixes=V3-GFX1103 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1150 < %s | FileCheck --check-prefixes=V3-GFX1150 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1151 < %s | FileCheck --check-prefixes=V3-GFX1151 %s - -; V3-GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600" -; V3-GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601" -; V3-GFX602: .amdgcn_target "amdgcn-amd-amdhsa--gfx602" -; V3-GFX700: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" -; V3-GFX701: .amdgcn_target "amdgcn-amd-amdhsa--gfx701" -; V3-GFX702: .amdgcn_target "amdgcn-amd-amdhsa--gfx702" -; V3-GFX703: .amdgcn_target "amdgcn-amd-amdhsa--gfx703" -; V3-GFX704: .amdgcn_target "amdgcn-amd-amdhsa--gfx704" -; V3-GFX705: .amdgcn_target "amdgcn-amd-amdhsa--gfx705" -; V3-GFX801-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801" -; V3-GFX801-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801+xnack" -; V3-GFX802: .amdgcn_target "amdgcn-amd-amdhsa--gfx802" -; V3-GFX803: .amdgcn_target "amdgcn-amd-amdhsa--gfx803" -; V3-GFX805: .amdgcn_target "amdgcn-amd-amdhsa--gfx805" -; V3-GFX810-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810" -; V3-GFX810-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810+xnack" -; V3-GFX900-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" -; V3-GFX900-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack" -; V3-GFX902-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902" -; V3-GFX902-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902+xnack" -; V3-GFX904-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904" -; V3-GFX904-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack" -; V3-GFX906-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906" -; V3-GFX906-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+sram-ecc" -; V3-GFX906-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack" -; V3-GFX906-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack+sram-ecc" -; V3-GFX908-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908" -; V3-GFX908-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+sram-ecc" -; V3-GFX908-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+xnack" -; V3-GFX908-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+xnack+sram-ecc" -; V3-GFX909-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909" -; V3-GFX909-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909+xnack" -; V3-GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c" -; V3-GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c+xnack" -; V3-GFX940-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+sram-ecc" -; V3-GFX940-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc" -; V3-GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010" -; V3-GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack" -; V3-GFX1011-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011" -; V3-GFX1011-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011+xnack" -; V3-GFX1012-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012" -; V3-GFX1012-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012+xnack" -; V3-GFX1013-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013" -; V3-GFX1013-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013+xnack" -; V3-GFX1030: .amdgcn_target "amdgcn-amd-amdhsa--gfx1030" -; V3-GFX1031: .amdgcn_target "amdgcn-amd-amdhsa--gfx1031" -; V3-GFX1032: .amdgcn_target "amdgcn-amd-amdhsa--gfx1032" -; V3-GFX1033: .amdgcn_target "amdgcn-amd-amdhsa--gfx1033" -; V3-GFX1034: .amdgcn_target "amdgcn-amd-amdhsa--gfx1034" -; V3-GFX1035: .amdgcn_target "amdgcn-amd-amdhsa--gfx1035" -; V3-GFX1036: .amdgcn_target "amdgcn-amd-amdhsa--gfx1036" -; V3-GFX1100: .amdgcn_target "amdgcn-amd-amdhsa--gfx1100" -; V3-GFX1101: .amdgcn_target "amdgcn-amd-amdhsa--gfx1101" -; V3-GFX1102: .amdgcn_target "amdgcn-amd-amdhsa--gfx1102" -; V3-GFX1103: .amdgcn_target "amdgcn-amd-amdhsa--gfx1103" -; V3-GFX1150: .amdgcn_target "amdgcn-amd-amdhsa--gfx1150" -; V3-GFX1151: .amdgcn_target "amdgcn-amd-amdhsa--gfx1151" - - - -define amdgpu_kernel void @directive_amdgcn_target() { - ret void -} - -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-.ll similarity index 98% rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-.ll index 37b124e7f59a0..042abe382283a 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-.ll @@ -142,7 +142,7 @@ define amdgpu_kernel void @test_no_default_queue(i8 %a) #3 ; CHECK: amdhsa.version: ; CHECK-NEXT: - 1 -; CHECK-NEXT: - 0 +; CHECK-NEXT: - 1 ; CHECK-NOT: amdhsa.printf: attributes #0 = { optnone noinline "amdgpu-no-default-queue" "amdgpu-no-completion-action" "amdgpu-implicitarg-num-bytes"="48" } @@ -151,7 +151,7 @@ attributes #2 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-implici attributes #3 = { optnone noinline "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="48" } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} !1 = !{i32 0} !2 = !{!"none"} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll index 8e8023aa16f13..fb08fd2c45085 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll @@ -39,4 +39,4 @@ define internal void @bar.5() { ; PARSER: AMDGPU HSA Metadata Parser Test: PASS !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll similarity index 99% rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll index 69efc47008e6a..dc3a6e8b633b2 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll @@ -1739,14 +1739,14 @@ define amdgpu_kernel void @unknown_addrspace_kernarg(ptr addrspace(12345) %ptr) ; CHECK-NEXT: - '2:1:8:%g\n' ; CHECK: amdhsa.version: ; CHECK-NEXT: - 1 -; CHECK-NEXT: - 0 +; CHECK-NEXT: - 1 attributes #0 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" } attributes #1 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} !llvm.printf.fmts = !{!100, !101} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v4.ll similarity index 99% rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v4.ll index 47b882494c919..f4892ebdc9c93 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v4.ll @@ -286,7 +286,7 @@ entry: ; CHECK: amdhsa.version: ; CHECK-NEXT: - 1 -; CHECK-NEXT: - 0 +; CHECK-NEXT: - 1 ; We don't have a use of llvm.amdgcn.implicitarg.ptr, so optnone to ; avoid optimizing out the implicit argument allocation. @@ -298,4 +298,4 @@ attributes #4 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" } attributes #5 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-asan.ll similarity index 96% rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-asan.ll index cb3ae289721bc..22c6e14776220 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-asan.ll @@ -35,12 +35,12 @@ define amdgpu_kernel void @test_kernel(i8 %a) #0 ; CHECK: amdhsa.version: ; CHECK-NEXT: - 1 -; CHECK-NEXT: - 0 +; CHECK-NEXT: - 1 attributes #0 = { sanitize_address "amdgpu-implicitarg-num-bytes"="48" } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} !1 = !{i32 0} !2 = !{!"none"} !3 = !{!"char"} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll similarity index 99% rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll index a3f8c5cff95df..8f90025fe8e29 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll @@ -296,4 +296,4 @@ attributes #3 = { "amdgpu-implicitarg-num-bytes"="48" "amdgpu-no-hostcall-ptr" } attributes #4 = { noinline } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll similarity index 98% rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll index b7f58bbb51bb2..6d49f22eb429b 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll @@ -96,10 +96,10 @@ define amdgpu_kernel void @test(ptr addrspace(1) %a, ; CHECK: amdhsa.version: ; CHECK-NEXT: - 1 -; CHECK-NEXT: - 0 +; CHECK-NEXT: - 1 !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} !1 = !{!"image1d_t", !"image1d_array_t", !"image1d_buffer_t", !"image2d_t", !"image2d_array_t", !"image2d_array_depth_t", diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll similarity index 80% rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll index 8117037baaffc..fc5e6e2731253 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll @@ -5,9 +5,9 @@ ; CHECK: --- ; CHECK: amdhsa.version: ; CHECK-NEXT: - 1 -; CHECK-NEXT: - 0 +; CHECK-NEXT: - 1 ; CHECK: ... !opencl.ocl.version = !{} !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll similarity index 81% rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll index ea744863a9b88..1ec79c95bc2a3 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll @@ -5,10 +5,10 @@ ; CHECK: --- ; CHECK: amdhsa.version: ; CHECK-NEXT: - 1 -; CHECK-NEXT: - 0 +; CHECK-NEXT: - 1 ; CHECK: ... !opencl.ocl.version = !{!0} !llvm.module.flags = !{!1} !0 = !{i32 1} -!1 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!1 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll similarity index 99% rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll index d6f7a92af9dcb..e45c4d1786faf 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll @@ -158,11 +158,11 @@ define amdgpu_kernel void @num_spilled_vgprs() #1 { ; CHECK: amdhsa.version: ; CHECK-NEXT: - 1 -; CHECK-NEXT: - 0 +; CHECK-NEXT: - 1 attributes #0 = { "amdgpu-num-sgpr"="14" } attributes #1 = { "amdgpu-num-vgpr"="20" } attributes #2 = { "amdgpu-flat-work-group-size"="1,256" } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll index 9760e93eb48e6..9e6c0ef86906d 100644 --- a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll @@ -1,36 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V3 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V3 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) { -; GFX8V3-LABEL: addrspacecast: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; GFX8V3-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x40 -; GFX8V3-NEXT: v_mov_b32_e32 v4, 1 -; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V3-NEXT: s_cmp_lg_u32 s0, -1 -; GFX8V3-NEXT: s_cselect_b32 s3, s3, 0 -; GFX8V3-NEXT: s_cselect_b32 s0, s0, 0 -; GFX8V3-NEXT: s_cmp_lg_u32 s1, -1 -; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s3 -; GFX8V3-NEXT: s_cselect_b32 s0, s2, 0 -; GFX8V3-NEXT: s_cselect_b32 s1, s1, 0 -; GFX8V3-NEXT: v_mov_b32_e32 v2, s1 -; GFX8V3-NEXT: v_mov_b32_e32 v3, s0 -; GFX8V3-NEXT: flat_store_dword v[0:1], v4 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: v_mov_b32_e32 v0, 2 -; GFX8V3-NEXT: flat_store_dword v[2:3], v0 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: s_endpgm -; ; GFX8V4-LABEL: addrspacecast: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -77,30 +52,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; -; GFX9V3-LABEL: addrspacecast: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9V3-NEXT: s_mov_b64 s[2:3], src_private_base -; GFX9V3-NEXT: s_mov_b64 s[4:5], src_shared_base -; GFX9V3-NEXT: v_mov_b32_e32 v4, 1 -; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9V3-NEXT: s_cmp_lg_u32 s0, -1 -; GFX9V3-NEXT: s_cselect_b32 s2, s3, 0 -; GFX9V3-NEXT: s_cselect_b32 s0, s0, 0 -; GFX9V3-NEXT: s_cmp_lg_u32 s1, -1 -; GFX9V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX9V3-NEXT: v_mov_b32_e32 v1, s2 -; GFX9V3-NEXT: s_cselect_b32 s0, s5, 0 -; GFX9V3-NEXT: s_cselect_b32 s1, s1, 0 -; GFX9V3-NEXT: v_mov_b32_e32 v2, s1 -; GFX9V3-NEXT: v_mov_b32_e32 v3, s0 -; GFX9V3-NEXT: flat_store_dword v[0:1], v4 -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: v_mov_b32_e32 v0, 2 -; GFX9V3-NEXT: flat_store_dword v[2:3], v0 -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: s_endpgm -; ; GFX9V4-LABEL: addrspacecast: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -156,18 +107,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr } define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) { -; GFX8V3-LABEL: llvm_amdgcn_is_shared: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x40 -; GFX8V3-NEXT: s_load_dword s1, s[6:7], 0x4 -; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0 -; GFX8V3-NEXT: s_cselect_b64 s[0:1], -1, 0 -; GFX8V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] -; GFX8V3-NEXT: flat_store_dword v[0:1], v0 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: s_endpgm -; ; GFX8V4-LABEL: llvm_amdgcn_is_shared: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_load_dword s0, s[4:5], 0x40 @@ -192,18 +131,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) { ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; -; GFX9V3-LABEL: llvm_amdgcn_is_shared: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_load_dword s2, s[4:5], 0x4 -; GFX9V3-NEXT: s_mov_b64 s[0:1], src_shared_base -; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9V3-NEXT: s_cmp_eq_u32 s2, s1 -; GFX9V3-NEXT: s_cselect_b64 s[0:1], -1, 0 -; GFX9V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] -; GFX9V3-NEXT: global_store_dword v[0:1], v0, off -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: s_endpgm -; ; GFX9V4-LABEL: llvm_amdgcn_is_shared: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_load_dword s2, s[4:5], 0x4 @@ -234,18 +161,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) { } define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) { -; GFX8V3-LABEL: llvm_amdgcn_is_private: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x44 -; GFX8V3-NEXT: s_load_dword s1, s[6:7], 0x4 -; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0 -; GFX8V3-NEXT: s_cselect_b64 s[0:1], -1, 0 -; GFX8V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] -; GFX8V3-NEXT: flat_store_dword v[0:1], v0 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: s_endpgm -; ; GFX8V4-LABEL: llvm_amdgcn_is_private: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_load_dword s0, s[4:5], 0x44 @@ -270,18 +185,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) { ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; -; GFX9V3-LABEL: llvm_amdgcn_is_private: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_load_dword s2, s[4:5], 0x4 -; GFX9V3-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9V3-NEXT: s_cmp_eq_u32 s2, s1 -; GFX9V3-NEXT: s_cselect_b64 s[0:1], -1, 0 -; GFX9V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] -; GFX9V3-NEXT: global_store_dword v[0:1], v0, off -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: s_endpgm -; ; GFX9V4-LABEL: llvm_amdgcn_is_private: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_load_dword s2, s[4:5], 0x4 @@ -312,11 +215,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) { } define amdgpu_kernel void @llvm_trap() { -; GFX8V3-LABEL: llvm_trap: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_mov_b64 s[0:1], s[4:5] -; GFX8V3-NEXT: s_trap 2 -; ; GFX8V4-LABEL: llvm_trap: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_mov_b64 s[0:1], s[4:5] @@ -328,11 +226,6 @@ define amdgpu_kernel void @llvm_trap() { ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V5-NEXT: s_trap 2 ; -; GFX9V3-LABEL: llvm_trap: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_mov_b64 s[0:1], s[4:5] -; GFX9V3-NEXT: s_trap 2 -; ; GFX9V4-LABEL: llvm_trap: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_trap 2 @@ -345,10 +238,6 @@ define amdgpu_kernel void @llvm_trap() { } define amdgpu_kernel void @llvm_debugtrap() { -; GFX8V3-LABEL: llvm_debugtrap: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_trap 3 -; ; GFX8V4-LABEL: llvm_debugtrap: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_trap 3 @@ -357,10 +246,6 @@ define amdgpu_kernel void @llvm_debugtrap() { ; GFX8V5: ; %bb.0: ; GFX8V5-NEXT: s_trap 3 ; -; GFX9V3-LABEL: llvm_debugtrap: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_trap 3 -; ; GFX9V4-LABEL: llvm_debugtrap: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_trap 3 @@ -373,31 +258,6 @@ define amdgpu_kernel void @llvm_debugtrap() { } define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) { -; GFX8V3-LABEL: llvm_amdgcn_queue_ptr: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: v_mov_b32_e32 v0, s6 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s7 -; GFX8V3-NEXT: s_add_u32 s0, s8, 8 -; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc -; GFX8V3-NEXT: s_addc_u32 s1, s9, 0 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s1 -; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: v_mov_b32_e32 v0, s4 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s5 -; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc -; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; GFX8V3-NEXT: v_mov_b32_e32 v2, s10 -; GFX8V3-NEXT: v_mov_b32_e32 v3, s11 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s1 -; GFX8V3-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: s_endpgm -; ; GFX8V4-LABEL: llvm_amdgcn_queue_ptr: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: v_mov_b32_e32 v0, s6 @@ -446,23 +306,6 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) { ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; -; GFX9V3-LABEL: llvm_amdgcn_queue_ptr: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: v_mov_b32_e32 v2, 0 -; GFX9V3-NEXT: global_load_ubyte v0, v2, s[6:7] glc -; GFX9V3-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc -; GFX9V3-NEXT: global_load_ubyte v0, v2, s[4:5] glc -; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: v_mov_b32_e32 v0, s10 -; GFX9V3-NEXT: v_mov_b32_e32 v1, s11 -; GFX9V3-NEXT: ; kill: killed $sgpr6_sgpr7 -; GFX9V3-NEXT: ; kill: killed $sgpr4_sgpr5 -; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9V3-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: s_endpgm -; ; GFX9V4-LABEL: llvm_amdgcn_queue_ptr: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: v_mov_b32_e32 v2, 0 diff --git a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll index 0353e7ee49ab9..a04fe28dbffff 100644 --- a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll +++ b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll @@ -1,17 +1,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=DOORBELL %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=DOORBELL %s -; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA %s declare void @llvm.trap() #0 -; HSA: .amdhsa_kernel trap -; HSA-NEXT: .amdhsa_group_segment_fixed_size 0 -; HSA-NEXT: .amdhsa_private_segment_fixed_size 0 -; HSA-NEXT: .amdhsa_kernarg_size 8 -; HSA-NEXT: .amdhsa_user_sgpr_count 8 -; HSA-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 -; HSA: .end_amdhsa_kernel - ; DOORBELL: .amdhsa_kernel trap ; DOORBELL-NEXT: .amdhsa_group_segment_fixed_size 0 ; DOORBELL-NEXT: .amdhsa_private_segment_fixed_size 0 diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll index 792ec2675247f..9ed896c148e64 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll @@ -317,4 +317,4 @@ attributes #1 = { nounwind "stackrealign" } attributes #2 = { nounwind alignstack=128 } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/trap-abis.ll b/llvm/test/CodeGen/AMDGPU/trap-abis.ll index 03ea582698486..54a15513cf0a5 100644 --- a/llvm/test/CodeGen/AMDGPU/trap-abis.ll +++ b/llvm/test/CodeGen/AMDGPU/trap-abis.ll @@ -1,101 +1,54 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V3 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803-V3 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803-V4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900-V3 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900-V4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V3 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V4 %s +; RUN: llc %s -o - -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900 %s +; RUN: llc %s -o - -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803 %s +; RUN: llc %s -o - -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900 %s +; RUN: llc %s -o - -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900 %s declare void @llvm.trap() #0 declare void @llvm.debugtrap() #1 define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) { -; NOHSA-TRAP-GFX900-V3-LABEL: trap: -; NOHSA-TRAP-GFX900-V3: ; %bb.0: -; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 -; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 -; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 -; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) -; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] -; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm -; -; NOHSA-TRAP-GFX900-V4-LABEL: trap: -; NOHSA-TRAP-GFX900-V4: ; %bb.0: -; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 -; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 -; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) -; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] -; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm -; -; HSA-TRAP-GFX803-V3-LABEL: trap: -; HSA-TRAP-GFX803-V3: ; %bb.0: -; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v2, 1 -; HSA-TRAP-GFX803-V3-NEXT: s_mov_b64 s[0:1], s[4:5] -; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s2 -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s3 -; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v2 -; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V3-NEXT: s_trap 2 -; -; HSA-TRAP-GFX803-V4-LABEL: trap: -; HSA-TRAP-GFX803-V4: ; %bb.0: -; HSA-TRAP-GFX803-V4-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v2, 1 -; HSA-TRAP-GFX803-V4-NEXT: s_mov_b64 s[0:1], s[4:5] -; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s2 -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s3 -; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v2 -; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V4-NEXT: s_trap 2 -; -; HSA-TRAP-GFX900-V3-LABEL: trap: -; HSA-TRAP-GFX900-V3: ; %bb.0: -; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 -; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 -; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 -; HSA-TRAP-GFX900-V3-NEXT: s_mov_b64 s[0:1], s[4:5] -; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[2:3] -; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V3-NEXT: s_trap 2 -; -; HSA-TRAP-GFX900-V4-LABEL: trap: -; HSA-TRAP-GFX900-V4: ; %bb.0: -; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 -; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 -; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V4-NEXT: s_trap 2 -; -; HSA-NOTRAP-GFX900-V3-LABEL: trap: -; HSA-NOTRAP-GFX900-V3: ; %bb.0: -; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 -; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 -; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) -; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm -; -; HSA-NOTRAP-GFX900-V4-LABEL: trap: -; HSA-NOTRAP-GFX900-V4: ; %bb.0: -; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 -; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 -; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) -; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm +; NOHSA-TRAP-GFX900-LABEL: trap: +; NOHSA-TRAP-GFX900: ; %bb.0: +; NOHSA-TRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0 +; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v1, 1 +; NOHSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0) +; NOHSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1] +; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-NEXT: s_endpgm +; +; HSA-TRAP-GFX803-LABEL: trap: +; HSA-TRAP-GFX803: ; %bb.0: +; HSA-TRAP-GFX803-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v2, 1 +; HSA-TRAP-GFX803-NEXT: s_mov_b64 s[0:1], s[4:5] +; HSA-TRAP-GFX803-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v0, s2 +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v1, s3 +; HSA-TRAP-GFX803-NEXT: flat_store_dword v[0:1], v2 +; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-NEXT: s_trap 2 +; +; HSA-TRAP-GFX900-LABEL: trap: +; HSA-TRAP-GFX900: ; %bb.0: +; HSA-TRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0 +; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v1, 1 +; HSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-NEXT: s_trap 2 +; +; HSA-NOTRAP-GFX900-LABEL: trap: +; HSA-NOTRAP-GFX900: ; %bb.0: +; HSA-NOTRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0 +; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v1, 1 +; HSA-NOTRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0) +; HSA-NOTRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-NEXT: s_endpgm store volatile i32 1, ptr addrspace(1) %arg0 call void @llvm.trap() unreachable @@ -104,150 +57,77 @@ define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) { } define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { -; NOHSA-TRAP-GFX900-V3-LABEL: non_entry_trap: -; NOHSA-TRAP-GFX900-V3: ; %bb.0: ; %entry -; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 -; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 -; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) -; NOHSA-TRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc -; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; NOHSA-TRAP-GFX900-V3-NEXT: s_cbranch_vccz .LBB1_2 -; NOHSA-TRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret -; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3 -; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] -; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm -; NOHSA-TRAP-GFX900-V3-NEXT: .LBB1_2: ; %trap -; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm -; -; NOHSA-TRAP-GFX900-V4-LABEL: non_entry_trap: -; NOHSA-TRAP-GFX900-V4: ; %bb.0: ; %entry -; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 -; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) -; NOHSA-TRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc -; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; NOHSA-TRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2 -; NOHSA-TRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret -; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3 -; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] -; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm -; NOHSA-TRAP-GFX900-V4-NEXT: .LBB1_2: ; %trap -; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm -; -; HSA-TRAP-GFX803-V3-LABEL: non_entry_trap: -; HSA-TRAP-GFX803-V3: ; %bb.0: ; %entry -; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s0 -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s1 -; HSA-TRAP-GFX803-V3-NEXT: flat_load_dword v0, v[0:1] glc -; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0 -; HSA-TRAP-GFX803-V3-NEXT: s_cbranch_vccz .LBB1_2 -; HSA-TRAP-GFX803-V3-NEXT: ; %bb.1: ; %ret -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s0 -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v2, 3 -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s1 -; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v2 -; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V3-NEXT: s_endpgm -; HSA-TRAP-GFX803-V3-NEXT: .LBB1_2: ; %trap -; HSA-TRAP-GFX803-V3-NEXT: s_mov_b64 s[0:1], s[4:5] -; HSA-TRAP-GFX803-V3-NEXT: s_trap 2 -; -; HSA-TRAP-GFX803-V4-LABEL: non_entry_trap: -; HSA-TRAP-GFX803-V4: ; %bb.0: ; %entry -; HSA-TRAP-GFX803-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s0 -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s1 -; HSA-TRAP-GFX803-V4-NEXT: flat_load_dword v0, v[0:1] glc -; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0 -; HSA-TRAP-GFX803-V4-NEXT: s_cbranch_vccz .LBB1_2 -; HSA-TRAP-GFX803-V4-NEXT: ; %bb.1: ; %ret -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s0 -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v2, 3 -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s1 -; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v2 -; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V4-NEXT: s_endpgm -; HSA-TRAP-GFX803-V4-NEXT: .LBB1_2: ; %trap -; HSA-TRAP-GFX803-V4-NEXT: s_mov_b64 s[0:1], s[4:5] -; HSA-TRAP-GFX803-V4-NEXT: s_trap 2 -; -; HSA-TRAP-GFX900-V3-LABEL: non_entry_trap: -; HSA-TRAP-GFX900-V3: ; %bb.0: ; %entry -; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 -; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc -; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; HSA-TRAP-GFX900-V3-NEXT: s_cbranch_vccz .LBB1_2 -; HSA-TRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret -; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3 -; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V3-NEXT: s_endpgm -; HSA-TRAP-GFX900-V3-NEXT: .LBB1_2: ; %trap -; HSA-TRAP-GFX900-V3-NEXT: s_mov_b64 s[0:1], s[4:5] -; HSA-TRAP-GFX900-V3-NEXT: s_trap 2 -; -; HSA-TRAP-GFX900-V4-LABEL: non_entry_trap: -; HSA-TRAP-GFX900-V4: ; %bb.0: ; %entry -; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 -; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc -; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; HSA-TRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2 -; HSA-TRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret -; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3 -; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V4-NEXT: s_endpgm -; HSA-TRAP-GFX900-V4-NEXT: .LBB1_2: ; %trap -; HSA-TRAP-GFX900-V4-NEXT: s_trap 2 -; -; HSA-NOTRAP-GFX900-V3-LABEL: non_entry_trap: -; HSA-NOTRAP-GFX900-V3: ; %bb.0: ; %entry -; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 -; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) -; HSA-NOTRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc -; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; HSA-NOTRAP-GFX900-V3-NEXT: s_cbranch_vccz .LBB1_2 -; HSA-NOTRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret -; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3 -; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm -; HSA-NOTRAP-GFX900-V3-NEXT: .LBB1_2: ; %trap -; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm -; -; HSA-NOTRAP-GFX900-V4-LABEL: non_entry_trap: -; HSA-NOTRAP-GFX900-V4: ; %bb.0: ; %entry -; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 -; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) -; HSA-NOTRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc -; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; HSA-NOTRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2 -; HSA-NOTRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret -; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3 -; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm -; HSA-NOTRAP-GFX900-V4-NEXT: .LBB1_2: ; %trap -; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm +; NOHSA-TRAP-GFX900-LABEL: non_entry_trap: +; NOHSA-TRAP-GFX900: ; %bb.0: ; %entry +; NOHSA-TRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0 +; NOHSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0) +; NOHSA-TRAP-GFX900-NEXT: global_load_dword v1, v0, s[0:1] glc +; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 +; NOHSA-TRAP-GFX900-NEXT: s_cbranch_vccz .LBB1_2 +; NOHSA-TRAP-GFX900-NEXT: ; %bb.1: ; %ret +; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v1, 3 +; NOHSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1] +; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-NEXT: s_endpgm +; NOHSA-TRAP-GFX900-NEXT: .LBB1_2: ; %trap +; NOHSA-TRAP-GFX900-NEXT: s_endpgm +; +; HSA-TRAP-GFX803-LABEL: non_entry_trap: +; HSA-TRAP-GFX803: ; %bb.0: ; %entry +; HSA-TRAP-GFX803-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-TRAP-GFX803-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v0, s0 +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v1, s1 +; HSA-TRAP-GFX803-NEXT: flat_load_dword v0, v[0:1] glc +; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0 +; HSA-TRAP-GFX803-NEXT: s_cbranch_vccz .LBB1_2 +; HSA-TRAP-GFX803-NEXT: ; %bb.1: ; %ret +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v0, s0 +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v2, 3 +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v1, s1 +; HSA-TRAP-GFX803-NEXT: flat_store_dword v[0:1], v2 +; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-NEXT: s_endpgm +; HSA-TRAP-GFX803-NEXT: .LBB1_2: ; %trap +; HSA-TRAP-GFX803-NEXT: s_mov_b64 s[0:1], s[4:5] +; HSA-TRAP-GFX803-NEXT: s_trap 2 +; +; HSA-TRAP-GFX900-LABEL: non_entry_trap: +; HSA-TRAP-GFX900: ; %bb.0: ; %entry +; HSA-TRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0 +; HSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX900-NEXT: global_load_dword v1, v0, s[0:1] glc +; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 +; HSA-TRAP-GFX900-NEXT: s_cbranch_vccz .LBB1_2 +; HSA-TRAP-GFX900-NEXT: ; %bb.1: ; %ret +; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v1, 3 +; HSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-NEXT: s_endpgm +; HSA-TRAP-GFX900-NEXT: .LBB1_2: ; %trap +; HSA-TRAP-GFX900-NEXT: s_trap 2 +; +; HSA-NOTRAP-GFX900-LABEL: non_entry_trap: +; HSA-NOTRAP-GFX900: ; %bb.0: ; %entry +; HSA-NOTRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0 +; HSA-NOTRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0) +; HSA-NOTRAP-GFX900-NEXT: global_load_dword v1, v0, s[0:1] glc +; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 +; HSA-NOTRAP-GFX900-NEXT: s_cbranch_vccz .LBB1_2 +; HSA-NOTRAP-GFX900-NEXT: ; %bb.1: ; %ret +; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v1, 3 +; HSA-NOTRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-NEXT: s_endpgm +; HSA-NOTRAP-GFX900-NEXT: .LBB1_2: ; %trap +; HSA-NOTRAP-GFX900-NEXT: s_endpgm entry: %tmp29 = load volatile i32, ptr addrspace(1) %arg0 %cmp = icmp eq i32 %tmp29, -1 @@ -263,115 +143,60 @@ ret: } define amdgpu_kernel void @debugtrap(ptr addrspace(1) nocapture readonly %arg0) { -; NOHSA-TRAP-GFX900-V3-LABEL: debugtrap: -; NOHSA-TRAP-GFX900-V3: ; %bb.0: -; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 -; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 -; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2 -; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) -; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] -; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1] -; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm -; -; NOHSA-TRAP-GFX900-V4-LABEL: debugtrap: -; NOHSA-TRAP-GFX900-V4: ; %bb.0: -; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 -; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 -; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2 -; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) -; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] -; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1] -; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm -; -; HSA-TRAP-GFX803-V3-LABEL: debugtrap: -; HSA-TRAP-GFX803-V3: ; %bb.0: -; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v2, 1 -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v3, 2 -; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s0 -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s1 -; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v2 -; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V3-NEXT: s_trap 3 -; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v3 -; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V3-NEXT: s_endpgm -; -; HSA-TRAP-GFX803-V4-LABEL: debugtrap: -; HSA-TRAP-GFX803-V4: ; %bb.0: -; HSA-TRAP-GFX803-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v2, 1 -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v3, 2 -; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s0 -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s1 -; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v2 -; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V4-NEXT: s_trap 3 -; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v3 -; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V4-NEXT: s_endpgm -; -; HSA-TRAP-GFX900-V3-LABEL: debugtrap: -; HSA-TRAP-GFX900-V3: ; %bb.0: -; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 -; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 -; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2 -; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V3-NEXT: s_trap 3 -; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1] -; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V3-NEXT: s_endpgm -; -; HSA-TRAP-GFX900-V4-LABEL: debugtrap: -; HSA-TRAP-GFX900-V4: ; %bb.0: -; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 -; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 -; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2 -; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V4-NEXT: s_trap 3 -; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1] -; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V4-NEXT: s_endpgm -; -; HSA-NOTRAP-GFX900-V3-LABEL: debugtrap: -; HSA-NOTRAP-GFX900-V3: ; %bb.0: -; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 -; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 -; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2 -; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) -; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1] -; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm -; -; HSA-NOTRAP-GFX900-V4-LABEL: debugtrap: -; HSA-NOTRAP-GFX900-V4: ; %bb.0: -; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 -; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 -; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2 -; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) -; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1] -; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm +; NOHSA-TRAP-GFX900-LABEL: debugtrap: +; NOHSA-TRAP-GFX900: ; %bb.0: +; NOHSA-TRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0 +; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v1, 1 +; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v2, 2 +; NOHSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0) +; NOHSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1] +; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-NEXT: global_store_dword v0, v2, s[0:1] +; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-NEXT: s_endpgm +; +; HSA-TRAP-GFX803-LABEL: debugtrap: +; HSA-TRAP-GFX803: ; %bb.0: +; HSA-TRAP-GFX803-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v2, 1 +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v3, 2 +; HSA-TRAP-GFX803-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v0, s0 +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v1, s1 +; HSA-TRAP-GFX803-NEXT: flat_store_dword v[0:1], v2 +; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-NEXT: s_trap 3 +; HSA-TRAP-GFX803-NEXT: flat_store_dword v[0:1], v3 +; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-NEXT: s_endpgm +; +; HSA-TRAP-GFX900-LABEL: debugtrap: +; HSA-TRAP-GFX900: ; %bb.0: +; HSA-TRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0 +; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v1, 1 +; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v2, 2 +; HSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-NEXT: s_trap 3 +; HSA-TRAP-GFX900-NEXT: global_store_dword v0, v2, s[0:1] +; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-NEXT: s_endpgm +; +; HSA-NOTRAP-GFX900-LABEL: debugtrap: +; HSA-NOTRAP-GFX900: ; %bb.0: +; HSA-NOTRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0 +; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v1, 1 +; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v2, 2 +; HSA-NOTRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0) +; HSA-NOTRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-NEXT: global_store_dword v0, v2, s[0:1] +; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-NEXT: s_endpgm store volatile i32 1, ptr addrspace(1) %arg0 call void @llvm.debugtrap() store volatile i32 2, ptr addrspace(1) %arg0 @@ -382,4 +207,4 @@ attributes #0 = { nounwind noreturn } attributes #1 = { nounwind } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/MC/AMDGPU/hsa-diag-v3.s b/llvm/test/MC/AMDGPU/hsa-diag-v4.s similarity index 94% rename from llvm/test/MC/AMDGPU/hsa-diag-v3.s rename to llvm/test/MC/AMDGPU/hsa-diag-v4.s index 369ac905ad2b2..f7a554aedb746 100644 --- a/llvm/test/MC/AMDGPU/hsa-diag-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-diag-v4.s @@ -1,18 +1,18 @@ -// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX8,PREGFX10,AMDHSA -// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10PLUS,GFX10,AMDHSA -// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx1100 -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10PLUS,GFX11,AMDHSA -// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd- -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,NONAMDHSA -// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GFX90A,PREGFX10,AMDHSA,ALL +// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX8,PREGFX10,AMDHSA +// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10PLUS,GFX10,AMDHSA +// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx1100 -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10PLUS,GFX11,AMDHSA +// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd- -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,NONAMDHSA +// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GFX90A,PREGFX10,AMDHSA,ALL .text // GCN-LABEL: warning: test_target // GFX8-NOT: error: -// GFX10: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-amdhsa--gfx1010+xnack -// GFX11: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-amdhsa--gfx1100 -// NONAMDHSA: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-unknown--gfx810 +// GFX10: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--gfx1010:xnack+ +// GFX11: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--gfx1100 +// NONAMDHSA: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-unknown--gfx810 .warning "test_target" -.amdgcn_target "amdgcn-amd-amdhsa--gfx810+xnack" +.amdgcn_target "amdgcn-amd-amdhsa--gfx810:xnack+" // GCN-LABEL: warning: test_amdhsa_kernel_no_name // GCN: error: unknown directive diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s deleted file mode 100644 index ba60000837cdc..0000000000000 --- a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s +++ /dev/null @@ -1,226 +0,0 @@ -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=3 -mattr=+xnack -filetype=obj < %s > %t -// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s -// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s - -// READOBJ: Section Headers -// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256 -// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 0000c0 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64 - -// READOBJ: Relocation section '.rela.rodata' at offset -// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10 -// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110 -// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210 - -// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries: -// READOBJ: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal -// READOBJ-NEXT: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete -// READOBJ-NEXT: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr -// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd -// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd -// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd - -// OBJDUMP: Contents of section .rodata -// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here. -// minimal -// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000 -// complete -// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 -// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000 -// special_sgpr -// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000 - -.text -// ASM: .text - -.amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack" - -.p2align 8 -.type minimal,@function -minimal: - s_endpgm - -.p2align 8 -.type complete,@function -complete: - s_endpgm - -.p2align 8 -.type special_sgpr,@function -special_sgpr: - s_endpgm - -.rodata -// ASM: .rodata - -// Test that only specifying required directives is allowed, and that defaulted -// values are omitted. -.p2align 6 -.amdhsa_kernel minimal - .amdhsa_next_free_vgpr 0 - .amdhsa_next_free_sgpr 0 - .amdhsa_shared_vgpr_count 0 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel minimal -// ASM: .amdhsa_next_free_vgpr 0 -// ASM-NEXT: .amdhsa_next_free_sgpr 0 -// ASM: .amdhsa_shared_vgpr_count 0 -// ASM: .end_amdhsa_kernel - -// Test that we can specify all available directives with non-default values. -.p2align 6 -.amdhsa_kernel complete - .amdhsa_group_segment_fixed_size 1 - .amdhsa_private_segment_fixed_size 1 - .amdhsa_kernarg_size 8 - .amdhsa_user_sgpr_private_segment_buffer 1 - .amdhsa_user_sgpr_dispatch_ptr 1 - .amdhsa_user_sgpr_queue_ptr 1 - .amdhsa_user_sgpr_kernarg_segment_ptr 1 - .amdhsa_user_sgpr_dispatch_id 1 - .amdhsa_user_sgpr_flat_scratch_init 1 - .amdhsa_user_sgpr_private_segment_size 1 - .amdhsa_wavefront_size32 1 - .amdhsa_system_sgpr_private_segment_wavefront_offset 1 - .amdhsa_system_sgpr_workgroup_id_x 0 - .amdhsa_system_sgpr_workgroup_id_y 1 - .amdhsa_system_sgpr_workgroup_id_z 1 - .amdhsa_system_sgpr_workgroup_info 1 - .amdhsa_system_vgpr_workitem_id 1 - .amdhsa_next_free_vgpr 9 - .amdhsa_next_free_sgpr 27 - .amdhsa_reserve_vcc 0 - .amdhsa_reserve_flat_scratch 0 - .amdhsa_reserve_xnack_mask 1 - .amdhsa_float_round_mode_32 1 - .amdhsa_float_round_mode_16_64 1 - .amdhsa_float_denorm_mode_32 1 - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_fp16_overflow 1 - .amdhsa_workgroup_processor_mode 1 - .amdhsa_memory_ordered 1 - .amdhsa_forward_progress 1 - .amdhsa_exception_fp_ieee_invalid_op 1 - .amdhsa_exception_fp_denorm_src 1 - .amdhsa_exception_fp_ieee_div_zero 1 - .amdhsa_exception_fp_ieee_overflow 1 - .amdhsa_exception_fp_ieee_underflow 1 - .amdhsa_exception_fp_ieee_inexact 1 - .amdhsa_exception_int_div_zero 1 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel complete -// ASM-NEXT: .amdhsa_group_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_private_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_kernarg_size 8 -// ASM-NEXT: .amdhsa_user_sgpr_count 15 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 -// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 -// ASM-NEXT: .amdhsa_wavefront_size32 1 -// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1 -// ASM-NEXT: .amdhsa_next_free_vgpr 9 -// ASM-NEXT: .amdhsa_next_free_sgpr 27 -// ASM-NEXT: .amdhsa_reserve_vcc 0 -// ASM-NEXT: .amdhsa_reserve_flat_scratch 0 -// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 -// ASM-NEXT: .amdhsa_float_round_mode_32 1 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0 -// ASM-NEXT: .amdhsa_dx10_clamp 0 -// ASM-NEXT: .amdhsa_ieee_mode 0 -// ASM-NEXT: .amdhsa_fp16_overflow 1 -// ASM-NEXT: .amdhsa_workgroup_processor_mode 1 -// ASM-NEXT: .amdhsa_memory_ordered 1 -// ASM-NEXT: .amdhsa_forward_progress 1 -// ASM-NEXT: .amdhsa_shared_vgpr_count 0 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 -// ASM-NEXT: .amdhsa_exception_int_div_zero 1 -// ASM-NEXT: .end_amdhsa_kernel - -// Test that we are including special SGPR usage in the granulated count. -.p2align 6 -.amdhsa_kernel special_sgpr - // Same next_free_sgpr as "complete", but... - .amdhsa_next_free_sgpr 27 - // ...on GFX10+ this should require an additional 6 SGPRs, pushing us from - // 3 granules to 4 - .amdhsa_reserve_flat_scratch 1 - - .amdhsa_reserve_vcc 0 - .amdhsa_reserve_xnack_mask 1 - - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_next_free_vgpr 0 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel special_sgpr -// ASM: .amdhsa_next_free_vgpr 0 -// ASM-NEXT: .amdhsa_next_free_sgpr 27 -// ASM-NEXT: .amdhsa_reserve_vcc 0 -// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 -// ASM: .amdhsa_float_denorm_mode_16_64 0 -// ASM-NEXT: .amdhsa_dx10_clamp 0 -// ASM-NEXT: .amdhsa_ieee_mode 0 -// ASM: .end_amdhsa_kernel - -.section .foo - -.byte .amdgcn.gfx_generation_number -// ASM: .byte 10 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v7, s10 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 8 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 11 - -.set .amdgcn.next_free_vgpr, 0 -.set .amdgcn.next_free_sgpr, 0 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v16, s3 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 17 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 4 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s deleted file mode 100644 index 7f885b457aa63..0000000000000 --- a/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s +++ /dev/null @@ -1,213 +0,0 @@ -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1100 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=ASM %s -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1100 --amdhsa-code-object-version=3 -filetype=obj < %s > %t -// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s -// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s - -// READOBJ: Section Headers -// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256 -// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 0000c0 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64 - -// READOBJ: Relocation section '.rela.rodata' at offset -// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10 -// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110 -// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210 - -// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries: -// READOBJ: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal -// READOBJ-NEXT: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete -// READOBJ-NEXT: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr -// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd -// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd -// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd - -// OBJDUMP: Contents of section .rodata -// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here. -// minimal -// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000 -// complete -// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 -// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0070 015001e4 130f007f 5e040000 00000000 -// special_sgpr -// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000 - -.text -// ASM: .text - -.amdgcn_target "amdgcn-amd-amdhsa--gfx1100" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1100" - -.p2align 8 -.type minimal,@function -minimal: - s_endpgm - -.p2align 8 -.type complete,@function -complete: - s_endpgm - -.p2align 8 -.type special_sgpr,@function -special_sgpr: - s_endpgm - -.rodata -// ASM: .rodata - -// Test that only specifying required directives is allowed, and that defaulted -// values are omitted. -.p2align 6 -.amdhsa_kernel minimal - .amdhsa_next_free_vgpr 0 - .amdhsa_next_free_sgpr 0 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel minimal -// ASM: .amdhsa_next_free_vgpr 0 -// ASM-NEXT: .amdhsa_next_free_sgpr 0 -// ASM: .end_amdhsa_kernel - -// Test that we can specify all available directives with non-default values. -.p2align 6 -.amdhsa_kernel complete - .amdhsa_group_segment_fixed_size 1 - .amdhsa_private_segment_fixed_size 1 - .amdhsa_kernarg_size 8 - .amdhsa_user_sgpr_dispatch_ptr 1 - .amdhsa_user_sgpr_queue_ptr 1 - .amdhsa_user_sgpr_kernarg_segment_ptr 1 - .amdhsa_user_sgpr_dispatch_id 1 - .amdhsa_user_sgpr_private_segment_size 1 - .amdhsa_wavefront_size32 1 - .amdhsa_enable_private_segment 1 - .amdhsa_system_sgpr_workgroup_id_x 0 - .amdhsa_system_sgpr_workgroup_id_y 1 - .amdhsa_system_sgpr_workgroup_id_z 1 - .amdhsa_system_sgpr_workgroup_info 1 - .amdhsa_system_vgpr_workitem_id 1 - .amdhsa_next_free_vgpr 9 - .amdhsa_next_free_sgpr 27 - .amdhsa_reserve_vcc 0 - .amdhsa_float_round_mode_32 1 - .amdhsa_float_round_mode_16_64 1 - .amdhsa_float_denorm_mode_32 1 - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_fp16_overflow 1 - .amdhsa_workgroup_processor_mode 1 - .amdhsa_memory_ordered 1 - .amdhsa_forward_progress 1 - .amdhsa_exception_fp_ieee_invalid_op 1 - .amdhsa_exception_fp_denorm_src 1 - .amdhsa_exception_fp_ieee_div_zero 1 - .amdhsa_exception_fp_ieee_overflow 1 - .amdhsa_exception_fp_ieee_underflow 1 - .amdhsa_exception_fp_ieee_inexact 1 - .amdhsa_exception_int_div_zero 1 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel complete -// ASM-NEXT: .amdhsa_group_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_private_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_kernarg_size 8 -// ASM-NEXT: .amdhsa_user_sgpr_count 9 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 -// ASM-NEXT: .amdhsa_wavefront_size32 1 -// ASM-NEXT: .amdhsa_enable_private_segment 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1 -// ASM-NEXT: .amdhsa_next_free_vgpr 9 -// ASM-NEXT: .amdhsa_next_free_sgpr 27 -// ASM-NEXT: .amdhsa_reserve_vcc 0 -// ASM-NEXT: .amdhsa_float_round_mode_32 1 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0 -// ASM-NEXT: .amdhsa_dx10_clamp 0 -// ASM-NEXT: .amdhsa_ieee_mode 0 -// ASM-NEXT: .amdhsa_fp16_overflow 1 -// ASM-NEXT: .amdhsa_workgroup_processor_mode 1 -// ASM-NEXT: .amdhsa_memory_ordered 1 -// ASM-NEXT: .amdhsa_forward_progress 1 -// ASM-NEXT: .amdhsa_shared_vgpr_count 0 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 -// ASM-NEXT: .amdhsa_exception_int_div_zero 1 -// ASM-NEXT: .end_amdhsa_kernel - -// Test that we are including special SGPR usage in the granulated count. -.p2align 6 -.amdhsa_kernel special_sgpr - // Same next_free_sgpr as "complete", but... - .amdhsa_next_free_sgpr 27 - // ...on GFX10+ this should require an additional 6 SGPRs, pushing us from - // 3 granules to 4 - - .amdhsa_reserve_vcc 0 - - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_next_free_vgpr 0 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel special_sgpr -// ASM: .amdhsa_next_free_vgpr 0 -// ASM-NEXT: .amdhsa_next_free_sgpr 27 -// ASM-NEXT: .amdhsa_reserve_vcc 0 -// ASM: .amdhsa_float_denorm_mode_16_64 0 -// ASM-NEXT: .amdhsa_dx10_clamp 0 -// ASM-NEXT: .amdhsa_ieee_mode 0 -// ASM: .end_amdhsa_kernel - -.section .foo - -.byte .amdgcn.gfx_generation_number -// ASM: .byte 11 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v7, s10 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 8 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 11 - -.set .amdgcn.next_free_vgpr, 0 -.set .amdgcn.next_free_sgpr, 0 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v16, s3 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 17 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 4 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s deleted file mode 100644 index fd84fab8af816..0000000000000 --- a/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s +++ /dev/null @@ -1,184 +0,0 @@ -// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=ASM %s -// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=obj < %s > %t -// RUN: llvm-readobj --elf-output-style=GNU --sections --symbols --relocations %t | FileCheck --check-prefix=READOBJ %s -// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s - -// READOBJ: Section Headers -// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256 -// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000080 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64 - -// READOBJ: Relocation section '.rela.rodata' at offset -// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10 -// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110 - -// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries: -// READOBJ-DAG: {{[0-9]+}}: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete -// READOBJ-DAG: {{[0-9]+}}: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd -// READOBJ-DAG: {{[0-9]+}}: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal -// READOBJ-DAG: {{[0-9]+}}: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd - -// OBJDUMP: Contents of section .rodata -// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here. -// minimal -// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000 -// complete -// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 -// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100 -// OBJDUMP-NEXT: 0070 c1500104 210f007f 7f008100 00000000 - -.text -// ASM: .text - -.amdgcn_target "amdgcn-amd-amdhsa--gfx90a+xnack+sram-ecc" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a+xnack+sram-ecc" - -.p2align 8 -.type minimal,@function -minimal: - s_endpgm - -.p2align 8 -.type complete,@function -complete: - s_endpgm - -.rodata -// ASM: .rodata - -// Test that only specifying required directives is allowed, and that defaulted -// values are omitted. -.p2align 6 -.amdhsa_kernel minimal - .amdhsa_next_free_vgpr 0 - .amdhsa_next_free_sgpr 0 - .amdhsa_accum_offset 4 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel minimal -// ASM: .amdhsa_next_free_vgpr 0 -// ASM-NEXT: .amdhsa_next_free_sgpr 0 -// ASM-NEXT: .amdhsa_accum_offset 4 -// ASM: .amdhsa_tg_split 0 -// ASM: .end_amdhsa_kernel - -// Test that we can specify all available directives with non-default values. -.p2align 6 -.amdhsa_kernel complete - .amdhsa_group_segment_fixed_size 1 - .amdhsa_private_segment_fixed_size 1 - .amdhsa_user_sgpr_private_segment_buffer 1 - .amdhsa_user_sgpr_dispatch_ptr 1 - .amdhsa_user_sgpr_queue_ptr 1 - .amdhsa_user_sgpr_kernarg_segment_ptr 1 - .amdhsa_user_sgpr_dispatch_id 1 - .amdhsa_user_sgpr_flat_scratch_init 1 - .amdhsa_kernarg_size 8 - .amdhsa_user_sgpr_kernarg_preload_length 1 - .amdhsa_user_sgpr_kernarg_preload_offset 1 - .amdhsa_user_sgpr_private_segment_size 1 - .amdhsa_system_sgpr_private_segment_wavefront_offset 1 - .amdhsa_system_sgpr_workgroup_id_x 0 - .amdhsa_system_sgpr_workgroup_id_y 1 - .amdhsa_system_sgpr_workgroup_id_z 1 - .amdhsa_system_sgpr_workgroup_info 1 - .amdhsa_system_vgpr_workitem_id 1 - .amdhsa_next_free_vgpr 9 - .amdhsa_next_free_sgpr 27 - .amdhsa_accum_offset 4 - .amdhsa_reserve_vcc 0 - .amdhsa_reserve_flat_scratch 0 - .amdhsa_float_round_mode_32 1 - .amdhsa_float_round_mode_16_64 1 - .amdhsa_float_denorm_mode_32 1 - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_fp16_overflow 1 - .amdhsa_tg_split 1 - .amdhsa_exception_fp_ieee_invalid_op 1 - .amdhsa_exception_fp_denorm_src 1 - .amdhsa_exception_fp_ieee_div_zero 1 - .amdhsa_exception_fp_ieee_overflow 1 - .amdhsa_exception_fp_ieee_underflow 1 - .amdhsa_exception_fp_ieee_inexact 1 - .amdhsa_exception_int_div_zero 1 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel complete -// ASM-NEXT: .amdhsa_group_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_private_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_kernarg_size 8 -// ASM-NEXT: .amdhsa_user_sgpr_count 16 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 -// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 1 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 1 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 -// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1 -// ASM-NEXT: .amdhsa_next_free_vgpr 9 -// ASM-NEXT: .amdhsa_next_free_sgpr 27 -// ASM-NEXT: .amdhsa_accum_offset 4 -// ASM-NEXT: .amdhsa_reserve_vcc 0 -// ASM-NEXT: .amdhsa_reserve_flat_scratch 0 -// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 -// ASM-NEXT: .amdhsa_float_round_mode_32 1 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0 -// ASM-NEXT: .amdhsa_dx10_clamp 0 -// ASM-NEXT: .amdhsa_ieee_mode 0 -// ASM-NEXT: .amdhsa_fp16_overflow 1 -// ASM-NEXT: .amdhsa_tg_split 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 -// ASM-NEXT: .amdhsa_exception_int_div_zero 1 -// ASM-NEXT: .end_amdhsa_kernel - -.section .foo - -.byte .amdgcn.gfx_generation_number -// ASM: .byte 9 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v7, s10 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 8 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 11 - -.set .amdgcn.next_free_vgpr, 0 -.set .amdgcn.next_free_sgpr, 0 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v16, s3 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 17 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 4 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s deleted file mode 100644 index 9624515ecd6fb..0000000000000 --- a/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s +++ /dev/null @@ -1,178 +0,0 @@ -// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefix=ASM %s -// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj < %s > %t -// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s -// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s - -// READOBJ: Section Headers -// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256 -// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000080 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64 - -// READOBJ: Relocation section '.rela.rodata' at offset -// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10 -// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110 - -// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries: -// READOBJ-DAG: {{[0-9]+}}: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete -// READOBJ-DAG: {{[0-9]+}}: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd -// READOBJ-DAG: {{[0-9]+}}: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal -// READOBJ-DAG: {{[0-9]+}}: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd - -// OBJDUMP: Contents of section .rodata -// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here. -// minimal -// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000 -// complete -// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 -// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100 -// OBJDUMP-NEXT: 0070 01510104 150f007f 5e008100 00000000 - -.text -// ASM: .text - -.amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc" - -.p2align 8 -.type minimal,@function -minimal: - s_endpgm - -.p2align 8 -.type complete,@function -complete: - s_endpgm - -.rodata -// ASM: .rodata - -// Test that only specifying required directives is allowed, and that defaulted -// values are omitted. -.p2align 6 -.amdhsa_kernel minimal - .amdhsa_next_free_vgpr 0 - .amdhsa_next_free_sgpr 0 - .amdhsa_accum_offset 4 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel minimal -// ASM: .amdhsa_next_free_vgpr 0 -// ASM-NEXT: .amdhsa_next_free_sgpr 0 -// ASM-NEXT: .amdhsa_accum_offset 4 -// ASM: .amdhsa_tg_split 0 -// ASM: .end_amdhsa_kernel - -// Test that we can specify all available directives with non-default values. -.p2align 6 -.amdhsa_kernel complete - .amdhsa_group_segment_fixed_size 1 - .amdhsa_private_segment_fixed_size 1 - .amdhsa_user_sgpr_dispatch_ptr 1 - .amdhsa_user_sgpr_queue_ptr 1 - .amdhsa_user_sgpr_kernarg_segment_ptr 1 - .amdhsa_user_sgpr_dispatch_id 1 - .amdhsa_kernarg_size 8 - .amdhsa_user_sgpr_kernarg_preload_length 1 - .amdhsa_user_sgpr_kernarg_preload_offset 1 - .amdhsa_user_sgpr_private_segment_size 1 - .amdhsa_enable_private_segment 1 - .amdhsa_system_sgpr_workgroup_id_x 0 - .amdhsa_system_sgpr_workgroup_id_y 1 - .amdhsa_system_sgpr_workgroup_id_z 1 - .amdhsa_system_sgpr_workgroup_info 1 - .amdhsa_system_vgpr_workitem_id 1 - .amdhsa_next_free_vgpr 9 - .amdhsa_next_free_sgpr 27 - .amdhsa_accum_offset 4 - .amdhsa_reserve_vcc 0 - .amdhsa_float_round_mode_32 1 - .amdhsa_float_round_mode_16_64 1 - .amdhsa_float_denorm_mode_32 1 - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_fp16_overflow 1 - .amdhsa_tg_split 1 - .amdhsa_exception_fp_ieee_invalid_op 1 - .amdhsa_exception_fp_denorm_src 1 - .amdhsa_exception_fp_ieee_div_zero 1 - .amdhsa_exception_fp_ieee_overflow 1 - .amdhsa_exception_fp_ieee_underflow 1 - .amdhsa_exception_fp_ieee_inexact 1 - .amdhsa_exception_int_div_zero 1 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel complete -// ASM-NEXT: .amdhsa_group_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_private_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_kernarg_size 8 -// ASM-NEXT: .amdhsa_user_sgpr_count 10 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 1 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 1 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 -// ASM-NEXT: .amdhsa_enable_private_segment 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1 -// ASM-NEXT: .amdhsa_next_free_vgpr 9 -// ASM-NEXT: .amdhsa_next_free_sgpr 27 -// ASM-NEXT: .amdhsa_accum_offset 4 -// ASM-NEXT: .amdhsa_reserve_vcc 0 -// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 -// ASM-NEXT: .amdhsa_float_round_mode_32 1 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0 -// ASM-NEXT: .amdhsa_dx10_clamp 0 -// ASM-NEXT: .amdhsa_ieee_mode 0 -// ASM-NEXT: .amdhsa_fp16_overflow 1 -// ASM-NEXT: .amdhsa_tg_split 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 -// ASM-NEXT: .amdhsa_exception_int_div_zero 1 -// ASM-NEXT: .end_amdhsa_kernel - -.section .foo - -.byte .amdgcn.gfx_generation_number -// ASM: .byte 9 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v7, s10 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 8 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 11 - -.set .amdgcn.next_free_vgpr, 0 -.set .amdgcn.next_free_sgpr, 0 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v16, s3 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 17 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 4 diff --git a/llvm/test/MC/AMDGPU/hsa-v3.s b/llvm/test/MC/AMDGPU/hsa-v3.s deleted file mode 100644 index 9f854986d7bc4..0000000000000 --- a/llvm/test/MC/AMDGPU/hsa-v3.s +++ /dev/null @@ -1,304 +0,0 @@ -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack -filetype=obj < %s > %t -// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s -// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s - -// READOBJ: Section Headers -// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256 -// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64 - -// READOBJ: Relocation section '.rela.rodata' at offset -// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10 -// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110 -// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210 -// READOBJ: 00000000000000d0 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 310 - -// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries: -// READOBJ: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal -// READOBJ-NEXT: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete -// READOBJ-NEXT: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr -// READOBJ-NEXT: 0000000000000300 0 FUNC LOCAL PROTECTED 2 disabled_user_sgpr -// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd -// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd -// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd -// READOBJ-NEXT: 00000000000000c0 64 OBJECT LOCAL DEFAULT 3 disabled_user_sgpr.kd - -// OBJDUMP: Contents of section .rodata -// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here. -// minimal -// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000 -// complete -// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 -// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000 -// special_sgpr -// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 00b0 00010000 80000000 00000000 00000000 -// disabled_user_sgpr -// OBJDUMP-NEXT: 00c0 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 00f0 0000ac00 80000000 00000000 00000000 - -.text -// ASM: .text - -.amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack" - -.p2align 8 -.type minimal,@function -minimal: - s_endpgm - -.p2align 8 -.type complete,@function -complete: - s_endpgm - -.p2align 8 -.type special_sgpr,@function -special_sgpr: - s_endpgm - -.p2align 8 -.type disabled_user_sgpr,@function -disabled_user_sgpr: - s_endpgm - -.rodata -// ASM: .rodata - -// Test that only specifying required directives is allowed, and that defaulted -// values are omitted. -.p2align 6 -.amdhsa_kernel minimal - .amdhsa_next_free_vgpr 0 - .amdhsa_next_free_sgpr 0 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel minimal -// ASM: .amdhsa_next_free_vgpr 0 -// ASM-NEXT: .amdhsa_next_free_sgpr 0 -// ASM: .end_amdhsa_kernel - -// Test that we can specify all available directives with non-default values. -.p2align 6 -.amdhsa_kernel complete - .amdhsa_group_segment_fixed_size 1 - .amdhsa_private_segment_fixed_size 1 - .amdhsa_kernarg_size 8 - .amdhsa_user_sgpr_private_segment_buffer 1 - .amdhsa_user_sgpr_dispatch_ptr 1 - .amdhsa_user_sgpr_queue_ptr 1 - .amdhsa_user_sgpr_kernarg_segment_ptr 1 - .amdhsa_user_sgpr_dispatch_id 1 - .amdhsa_user_sgpr_flat_scratch_init 1 - .amdhsa_user_sgpr_private_segment_size 1 - .amdhsa_system_sgpr_private_segment_wavefront_offset 1 - .amdhsa_system_sgpr_workgroup_id_x 0 - .amdhsa_system_sgpr_workgroup_id_y 1 - .amdhsa_system_sgpr_workgroup_id_z 1 - .amdhsa_system_sgpr_workgroup_info 1 - .amdhsa_system_vgpr_workitem_id 1 - .amdhsa_next_free_vgpr 9 - .amdhsa_next_free_sgpr 27 - .amdhsa_reserve_vcc 0 - .amdhsa_reserve_flat_scratch 0 - .amdhsa_reserve_xnack_mask 1 - .amdhsa_float_round_mode_32 1 - .amdhsa_float_round_mode_16_64 1 - .amdhsa_float_denorm_mode_32 1 - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_fp16_overflow 1 - .amdhsa_exception_fp_ieee_invalid_op 1 - .amdhsa_exception_fp_denorm_src 1 - .amdhsa_exception_fp_ieee_div_zero 1 - .amdhsa_exception_fp_ieee_overflow 1 - .amdhsa_exception_fp_ieee_underflow 1 - .amdhsa_exception_fp_ieee_inexact 1 - .amdhsa_exception_int_div_zero 1 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel complete -// ASM-NEXT: .amdhsa_group_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_private_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_kernarg_size 8 -// ASM-NEXT: .amdhsa_user_sgpr_count 15 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 -// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 -// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1 -// ASM-NEXT: .amdhsa_next_free_vgpr 9 -// ASM-NEXT: .amdhsa_next_free_sgpr 27 -// ASM-NEXT: .amdhsa_reserve_vcc 0 -// ASM-NEXT: .amdhsa_reserve_flat_scratch 0 -// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 -// ASM-NEXT: .amdhsa_float_round_mode_32 1 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0 -// ASM-NEXT: .amdhsa_dx10_clamp 0 -// ASM-NEXT: .amdhsa_ieee_mode 0 -// ASM-NEXT: .amdhsa_fp16_overflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 -// ASM-NEXT: .amdhsa_exception_int_div_zero 1 -// ASM-NEXT: .end_amdhsa_kernel - -// Test that we are including special SGPR usage in the granulated count. -.p2align 6 -.amdhsa_kernel special_sgpr - // Same next_free_sgpr as "complete", but... - .amdhsa_next_free_sgpr 27 - // ...on GFX9 this should require an additional 6 SGPRs, pushing us from - // 3 granules to 4 - .amdhsa_reserve_flat_scratch 1 - - .amdhsa_reserve_vcc 0 - .amdhsa_reserve_xnack_mask 1 - - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_next_free_vgpr 0 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel special_sgpr -// ASM: .amdhsa_next_free_vgpr 0 -// ASM-NEXT: .amdhsa_next_free_sgpr 27 -// ASM-NEXT: .amdhsa_reserve_vcc 0 -// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 -// ASM: .amdhsa_float_denorm_mode_16_64 0 -// ASM-NEXT: .amdhsa_dx10_clamp 0 -// ASM-NEXT: .amdhsa_ieee_mode 0 -// ASM: .end_amdhsa_kernel - -// Test that explicitly disabling user_sgpr's does not affect the user_sgpr -// count, i.e. this should produce the same descriptor as minimal. -.p2align 6 -.amdhsa_kernel disabled_user_sgpr - .amdhsa_user_sgpr_private_segment_buffer 0 - .amdhsa_next_free_vgpr 0 - .amdhsa_next_free_sgpr 0 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel disabled_user_sgpr -// ASM: .amdhsa_next_free_vgpr 0 -// ASM-NEXT: .amdhsa_next_free_sgpr 0 -// ASM: .end_amdhsa_kernel - -.section .foo - -.byte .amdgcn.gfx_generation_number -// ASM: .byte 9 - -.byte .amdgcn.gfx_generation_minor -// ASM: .byte 0 - -.byte .amdgcn.gfx_generation_stepping -// ASM: .byte 4 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v7, s10 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 8 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 11 - -.set .amdgcn.next_free_vgpr, 0 -.set .amdgcn.next_free_sgpr, 0 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v16, s3 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 17 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 4 - -// Metadata - -.amdgpu_metadata - amdhsa.version: - - 3 - - 0 - amdhsa.kernels: - - .name: amd_kernel_code_t_test_all - .symbol: amd_kernel_code_t_test_all@kd - .kernarg_segment_size: 8 - .group_segment_fixed_size: 16 - .private_segment_fixed_size: 32 - .kernarg_segment_align: 64 - .wavefront_size: 128 - .sgpr_count: 14 - .vgpr_count: 40 - .max_flat_workgroup_size: 256 - - .name: amd_kernel_code_t_minimal - .symbol: amd_kernel_code_t_minimal@kd - .kernarg_segment_size: 8 - .group_segment_fixed_size: 16 - .private_segment_fixed_size: 32 - .kernarg_segment_align: 64 - .wavefront_size: 128 - .sgpr_count: 14 - .vgpr_count: 40 - .max_flat_workgroup_size: 256 -.end_amdgpu_metadata - -// ASM: .amdgpu_metadata -// ASM: amdhsa.kernels: -// ASM: - .group_segment_fixed_size: 16 -// ASM: .kernarg_segment_align: 64 -// ASM: .kernarg_segment_size: 8 -// ASM: .max_flat_workgroup_size: 256 -// ASM: .name: amd_kernel_code_t_test_all -// ASM: .private_segment_fixed_size: 32 -// ASM: .sgpr_count: 14 -// ASM: .symbol: 'amd_kernel_code_t_test_all@kd' -// ASM: .vgpr_count: 40 -// ASM: .wavefront_size: 128 -// ASM: - .group_segment_fixed_size: 16 -// ASM: .kernarg_segment_align: 64 -// ASM: .kernarg_segment_size: 8 -// ASM: .max_flat_workgroup_size: 256 -// ASM: .name: amd_kernel_code_t_minimal -// ASM: .private_segment_fixed_size: 32 -// ASM: .sgpr_count: 14 -// ASM: .symbol: 'amd_kernel_code_t_minimal@kd' -// ASM: .vgpr_count: 40 -// ASM: .wavefront_size: 128 -// ASM: amdhsa.version: -// ASM-NEXT: - 3 -// ASM-NEXT: - 0 -// ASM: .end_amdgpu_metadata diff --git a/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s b/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s index 63e532e0ffa37..7e3ae8424cc7b 100644 --- a/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s +++ b/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a %s 2>&1 >/dev/null | FileCheck -check-prefix=ERR %s +// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx90a %s 2>&1 >/dev/null | FileCheck -check-prefix=ERR %s .amdhsa_kernel implied_count_too_low_0 .amdhsa_user_sgpr_count 0 diff --git a/llvm/test/MC/AMDGPU/user-sgpr-count.s b/llvm/test/MC/AMDGPU/user-sgpr-count.s index aa8970185eb04..950c514f786b2 100644 --- a/llvm/test/MC/AMDGPU/user-sgpr-count.s +++ b/llvm/test/MC/AMDGPU/user-sgpr-count.s @@ -1,10 +1,10 @@ -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a --amdhsa-code-object-version=4 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s .text // ASM: .text -.amdgcn_target "amdgcn-amd-amdhsa--gfx90a+xnack+sram-ecc" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a+xnack+sram-ecc" +.amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+" +// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+" // ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count From 7571f27ed768530f3fe9707d310c83d5a687ea16 Mon Sep 17 00:00:00 2001 From: pvanhout Date: Mon, 16 Oct 2023 08:41:26 +0200 Subject: [PATCH 195/720] [lld] Restore "REQUIRES: amdgpu" in amdgpu-abi-version Accidentally deleted it in a previous commit. --- lld/test/ELF/amdgpu-abi-version.s | 1 + 1 file changed, 1 insertion(+) diff --git a/lld/test/ELF/amdgpu-abi-version.s b/lld/test/ELF/amdgpu-abi-version.s index 72b67fdaeb1a1..cda9f5aafa5ee 100644 --- a/lld/test/ELF/amdgpu-abi-version.s +++ b/lld/test/ELF/amdgpu-abi-version.s @@ -1,3 +1,4 @@ +# REQUIRES: amdgpu # RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 -filetype=obj %s -o %t.o # RUN: ld.lld -shared %t.o -o %t.so # RUN: llvm-readobj --file-headers %t.so | FileCheck --check-prefix=COV4 %s From 33b58f3f2e7808658050847b4d7e8465bd14d076 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Mon, 16 Oct 2023 07:12:30 +0000 Subject: [PATCH 196/720] [BOLT] Move X86-specific test to X86 subdirectory (#68992) It only works when the X86 target is available. --- bolt/test/{ => X86}/checkvma-large-section.test | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename bolt/test/{ => X86}/checkvma-large-section.test (100%) diff --git a/bolt/test/checkvma-large-section.test b/bolt/test/X86/checkvma-large-section.test similarity index 100% rename from bolt/test/checkvma-large-section.test rename to bolt/test/X86/checkvma-large-section.test From 5c0931727eb045c2ed2828d070eb16d4ac87b933 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Mon, 16 Oct 2023 07:13:07 +0000 Subject: [PATCH 197/720] [BOLT][RISCV] Implement MCPlusBuilder::equals (#68989) This enables ICF for RISC-V. No tests are added by this commit as `bolt-icf.test` covers this case (only on a RISC-V host though). --- bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp index af7645f568471..b95d599bafb20 100644 --- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp +++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp @@ -31,6 +31,17 @@ class RISCVMCPlusBuilder : public MCPlusBuilder { public: using MCPlusBuilder::MCPlusBuilder; + bool equals(const MCTargetExpr &A, const MCTargetExpr &B, + CompFuncTy Comp) const override { + const auto &RISCVExprA = cast(A); + const auto &RISCVExprB = cast(B); + if (RISCVExprA.getKind() != RISCVExprB.getKind()) + return false; + + return MCPlusBuilder::equals(*RISCVExprA.getSubExpr(), + *RISCVExprB.getSubExpr(), Comp); + } + bool shouldRecordCodeRelocation(uint64_t RelType) const override { switch (RelType) { case ELF::R_RISCV_JAL: From 2371d0ab263c164be820f961095cc22076566d12 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 16 Oct 2023 09:55:02 +0200 Subject: [PATCH 198/720] [DebugInfo] Only call upgradeCULocals() at module level (#68965) Loading a 2GB bitcode file, I noticed that we spend minutes just running upgradeCULocals(). Apparently it gets invoked every time a metadata block is loaded, which will be once at the module level and then once per function. However, the relevant metadata only exists at the module level, so running this upgrade per function is unnecessary. --- llvm/lib/Bitcode/Reader/MetadataLoader.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index 1e9ed5fcaa581..4aaaea7ffeed4 100644 --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -705,10 +705,11 @@ class MetadataLoader::MetadataLoaderImpl { return Error::success(); } - void upgradeDebugInfo() { + void upgradeDebugInfo(bool ModuleLevel) { upgradeCUSubprograms(); upgradeCUVariables(); - upgradeCULocals(); + if (ModuleLevel) + upgradeCULocals(); } void callMDTypeCallback(Metadata **Val, unsigned TypeID); @@ -1085,7 +1086,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) { // Reading the named metadata created forward references and/or // placeholders, that we flush here. resolveForwardRefsAndPlaceholders(Placeholders); - upgradeDebugInfo(); + upgradeDebugInfo(ModuleLevel); // Return at the beginning of the block, since it is easy to skip it // entirely from there. Stream.ReadBlockEnd(); // Pop the abbrev block context. @@ -1116,7 +1117,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) { return error("Malformed block"); case BitstreamEntry::EndBlock: resolveForwardRefsAndPlaceholders(Placeholders); - upgradeDebugInfo(); + upgradeDebugInfo(ModuleLevel); return Error::success(); case BitstreamEntry::Record: // The interesting case. From 8592241e29e29f0e7e407e0989489c6e70c91c42 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Mon, 16 Oct 2023 08:11:35 +0000 Subject: [PATCH 199/720] [BOLT] Fix reorder data test for RISC-V (#68996) On RISC-V, small data objects are put in the `.sdata` section by default. This causes the `reorder-data-writable-ptload.c` test to fail since it hard-codes the section to optimize to `.data`. This patch passes the `-fPIC -pie` flags to clang to ensure the objects are added to `.data` on RISC-V. --- bolt/test/reorder-data-writable-ptload.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bolt/test/reorder-data-writable-ptload.c b/bolt/test/reorder-data-writable-ptload.c index 7b384e9655a32..fa9918779463a 100644 --- a/bolt/test/reorder-data-writable-ptload.c +++ b/bolt/test/reorder-data-writable-ptload.c @@ -1,7 +1,9 @@ // This test checks that reorder-data pass puts new hot .data section // to the writable segment. -// RUN: %clang %cflags -O3 -nostdlib -Wl,-q %s -o %t.exe +// Use -fPIC -pie to prevent the globals being put in .sdata instead of .data on +// RISC-V. +// RUN: %clang %cflags -fPIC -pie -O3 -nostdlib -Wl,-q %s -o %t.exe // RUN: llvm-bolt %t.exe -o %t.bolt --reorder-data=".data" \ // RUN: -data %S/Inputs/reorder-data-writable-ptload.fdata // RUN: llvm-readelf -SlW %t.bolt | FileCheck %s From 0ddca87b794d92fc38114df537c87673770497ff Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Mon, 16 Oct 2023 16:27:15 +0800 Subject: [PATCH 200/720] [X86][FP16] Do not combine to ADDSUB if target doesn't support FP16 (#69109) Fix crash when build code with `-mattr=f16c,fma` or `-mattr=avx512vl`. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- .../X86/avx512fp16-combine-shuffle-fma.ll | 58 +++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 13684babb2385..66b6d8260b7c7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -40685,7 +40685,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, SDLoc dl(N); EVT VT = N->getValueType(0); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLI.isTypeLegal(VT)) + if (TLI.isTypeLegal(VT) && !isSoftF16(VT, Subtarget)) if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG)) return AddSub; diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll new file mode 100644 index 0000000000000..54ccc23840f99 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=f16c,fma | FileCheck %s --check-prefix=F16C +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=F16C +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 | FileCheck %s --check-prefix=FP16 + +define <2 x half> @foo(<2 x half> %0) "unsafe-fp-math"="true" nounwind { +; AVX2-LABEL: foo: +; AVX2: # %bb.0: +; AVX2-NEXT: subq $40, %rsp +; AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: callq __truncsfhf2@PLT +; AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX2-NEXT: callq __truncsfhf2@PLT +; AVX2-NEXT: addq $40, %rsp +; AVX2-NEXT: retq +; +; F16C-LABEL: foo: +; F16C: # %bb.0: +; F16C-NEXT: vpsrld $16, %xmm0, %xmm1 +; F16C-NEXT: vcvtph2ps %xmm1, %ymm1 +; F16C-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; F16C-NEXT: vcvtps2ph $4, %ymm1, %xmm1 +; F16C-NEXT: vcvtph2ps %xmm0, %ymm0 +; F16C-NEXT: vcvtph2ps %xmm1, %ymm1 +; F16C-NEXT: vsubps %ymm0, %ymm1, %ymm2 +; F16C-NEXT: vcvtps2ph $4, %ymm2, %xmm2 +; F16C-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; F16C-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; F16C-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3,4,5,6,7] +; F16C-NEXT: vzeroupper +; F16C-NEXT: retq +; +; FP16-LABEL: foo: +; FP16: # %bb.0: +; FP16-NEXT: vpsrld $16, %xmm0, %xmm1 +; FP16-NEXT: vfmaddsub231ph {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; FP16-NEXT: retq + %2 = shufflevector <2 x half> %0, <2 x half> undef, <2 x i32> + %3 = fmul fast <2 x half> %2, + %4 = fsub fast <2 x half> %3, %0 + %5 = fadd fast <2 x half> %3, %0 + %6 = shufflevector <2 x half> %4, <2 x half> %5, <2 x i32> + %7 = fadd fast <2 x half> %6, zeroinitializer + %8 = shufflevector <2 x half> undef, <2 x half> %7, <2 x i32> + %9 = fsub fast <2 x half> %8, zeroinitializer + ret <2 x half> %9 +} From d8de38b4010f4ea57fcdb45ba2be726f55b0c516 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Mon, 16 Oct 2023 08:29:28 +0000 Subject: [PATCH 201/720] [BOLT][RISCV] Handle EH_LABEL operands (#68998) Fixes the `runtime/exceptions-no-pie.cpp` test on RISC-V. --- bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp index b95d599bafb20..64bd318e06e87 100644 --- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp +++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp @@ -303,6 +303,7 @@ class RISCVMCPlusBuilder : public MCPlusBuilder { default: return false; case RISCV::C_J: + case TargetOpcode::EH_LABEL: OpNum = 0; return true; case RISCV::AUIPC: From c67b86280ec93f88cc7c7756617d305039e4c874 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Mon, 16 Oct 2023 08:29:49 +0000 Subject: [PATCH 202/720] [BOLT][RISCV] Don't create function entry points for unnamed symbols (#68977) Unnamed symbols are used, for example, for debug info related relocations on RISC-V. --- bolt/lib/Rewrite/RewriteInstance.cpp | 6 ++++++ bolt/test/RISCV/unnamed-sym-no-entry.c | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 bolt/test/RISCV/unnamed-sym-no-entry.c diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index ddcc21878abb8..b3de3b96b3ab8 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -1582,6 +1582,12 @@ void RewriteInstance::adjustFunctionBoundaries() { if (!Function.isSymbolValidInScope(Symbol, SymbolSize)) break; + // Ignore unnamed symbols. Used, for example, by debugging info on RISC-V. + if (BC->isRISCV() && cantFail(Symbol.getName()).empty()) { + ++NextSymRefI; + continue; + } + // Skip basic block labels. This happens on RISC-V with linker relaxation // enabled because every branch needs a relocation and corresponding // symbol. We don't want to add such symbols as entry points. diff --git a/bolt/test/RISCV/unnamed-sym-no-entry.c b/bolt/test/RISCV/unnamed-sym-no-entry.c new file mode 100644 index 0000000000000..605bbc00aeec4 --- /dev/null +++ b/bolt/test/RISCV/unnamed-sym-no-entry.c @@ -0,0 +1,18 @@ +/// Verify that unnamed symbols are not added as function entry points. Such +/// symbols are used by relocations in debugging sections. + +// clang-format off + +// RUN: %clang %cflags -g -Wl,-q -o %t %s + +/// Verify that the binary indeed contains an unnamed symbol at _start +// RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=CHECK-ELF +// CHECK-ELF-DAG: [[#%x,START:]] {{.*}} FUNC GLOBAL DEFAULT [[#%d,SECTION:]] _start{{$}} +// CHECK-ELF-DAG: [[#%x,START]] {{.*}} NOTYPE LOCAL DEFAULT [[#SECTION]] {{$}} + +/// Verify that BOLT did not create an extra entry point for the unnamed symbol +// RUN: llvm-bolt -o %t.bolt %t --print-cfg | FileCheck %s +// CHECK: Binary Function "_start" after building cfg { +// CHECK: IsMultiEntry: 0 + +void _start() {} From 5857fec27fe8ee5a48a2ee48a4d79a9e39b0332b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= <1.int32@gmail.com> Date: Mon, 16 Oct 2023 10:31:01 +0200 Subject: [PATCH 203/720] [clang][ASTImporter] Fix of possible crash "Did not find base!". (#67680) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A problem with AST import could lead to multiple instances of the same template class specialization, with different template arguments. The difference was caused by pointers to different declarations of the same function. Problem is fixed by using the canonical declaration at import. Co-authored-by: Balázs Kéri --- clang/lib/AST/ASTImporter.cpp | 3 +- clang/unittests/AST/ASTImporterTest.cpp | 58 +++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 3adbabdb7fb87..628a2b2bbca39 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -811,7 +811,8 @@ ASTNodeImporter::import(const TemplateArgument &From) { ExpectedType ToTypeOrErr = import(From.getParamTypeForDecl()); if (!ToTypeOrErr) return ToTypeOrErr.takeError(); - return TemplateArgument(*ToOrErr, *ToTypeOrErr, From.getIsDefaulted()); + return TemplateArgument(dyn_cast((*ToOrErr)->getCanonicalDecl()), + *ToTypeOrErr, From.getIsDefaulted()); } case TemplateArgument::NullPtr: { diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp index 1dc314eafc4ef..f1f09a0be2b8d 100644 --- a/clang/unittests/AST/ASTImporterTest.cpp +++ b/clang/unittests/AST/ASTImporterTest.cpp @@ -9175,6 +9175,64 @@ TEST_P(ASTImporterOptionSpecificTestBase, EXPECT_TRUE(ToXType->typeMatchesDecl()); } +TEST_P(ASTImporterOptionSpecificTestBase, + ImportTemplateArgumentWithPointerToDifferentInstantiation) { + const char *CodeTo = + R"( + template + A f1() { + return A(); + } + template + class X {}; + + X> x; + )"; + const char *CodeFrom = + R"( + template + A f1(); + template + class X {}; + + X> x; + )"; + Decl *ToTU = getToTuDecl(CodeTo, Lang_CXX11); + Decl *FromTU = getTuDecl(CodeFrom, Lang_CXX11); + + auto *ToF1 = FirstDeclMatcher().match( + ToTU, functionDecl(hasName("f1"), isInstantiated())); + auto *FromF1 = FirstDeclMatcher().match( + FromTU, functionDecl(hasName("f1"), isInstantiated())); + EXPECT_TRUE(ToF1->isThisDeclarationADefinition()); + EXPECT_FALSE(FromF1->isThisDeclarationADefinition()); + + auto *ToX = FirstDeclMatcher().match( + ToTU, classTemplateSpecializationDecl(hasName("X"))); + auto *FromX = FirstDeclMatcher().match( + FromTU, classTemplateSpecializationDecl(hasName("X"))); + + Decl *ToTArgF = ToX->getTemplateArgs().get(1).getAsDecl(); + Decl *FromTArgF = FromX->getTemplateArgs().get(1).getAsDecl(); + EXPECT_EQ(ToTArgF, ToF1); + EXPECT_EQ(FromTArgF, FromF1); + + auto *ToXImported = Import(FromX, Lang_CXX11); + // The template argument 1 of 'X' in the "From" code points to a function + // that has no definition. The import must ensure that this template argument + // is imported in a way that it will point to the existing 'f1' function, not + // to the 'f1' that is imported. In this way when specialization of 'X' is + // imported it will have the same template arguments as the existing one. + EXPECT_EQ(ToXImported, ToX); + // FIXME: This matcher causes a crash "Tried to match orphan node". + // The code is removed until the problem is fixed. + // auto *ToF1Imported = + // LastDeclMatcher().match(ToTU, + // functionDecl(hasName("f1"),isInstantiated())); + // EXPECT_NE(ToF1Imported, ToF1); + // EXPECT_EQ(ToF1Imported->getPreviousDecl(), ToF1); +} + INSTANTIATE_TEST_SUITE_P(ParameterizedTests, ASTImporterLookupTableTest, DefaultTestValuesForRunOptions); From 3ab536fb994b9961e43a9ae07325c6fb0ff71cd5 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Mon, 16 Oct 2023 08:52:56 +0000 Subject: [PATCH 204/720] [BOLT][RISCV] Implement getCalleeSavedRegs (#69161) The main reason for implementing this now is to ensure the `assume=abi.test` test passes on RISC-V. Since it uses `--indirect-call-promotion=all`, it requires some support for register analysis on the target. Further testing and implementation of register/frame analysis on RISC-V will come later. --- bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp index 64bd318e06e87..85855fbf3ab97 100644 --- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp +++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp @@ -42,6 +42,22 @@ class RISCVMCPlusBuilder : public MCPlusBuilder { *RISCVExprB.getSubExpr(), Comp); } + void getCalleeSavedRegs(BitVector &Regs) const override { + Regs |= getAliases(RISCV::X2); + Regs |= getAliases(RISCV::X8); + Regs |= getAliases(RISCV::X9); + Regs |= getAliases(RISCV::X18); + Regs |= getAliases(RISCV::X19); + Regs |= getAliases(RISCV::X20); + Regs |= getAliases(RISCV::X21); + Regs |= getAliases(RISCV::X22); + Regs |= getAliases(RISCV::X23); + Regs |= getAliases(RISCV::X24); + Regs |= getAliases(RISCV::X25); + Regs |= getAliases(RISCV::X26); + Regs |= getAliases(RISCV::X27); + } + bool shouldRecordCodeRelocation(uint64_t RelType) const override { switch (RelType) { case ELF::R_RISCV_JAL: From c68bc1726c1c14a297c75cae597dab00e9e7e905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A1bor=20Spaits?= <48805437+spaits@users.noreply.github.com> Date: Mon, 16 Oct 2023 10:55:31 +0200 Subject: [PATCH 205/720] [analyzer] Fix note for member reference (#68691) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the following code: ```cpp int main() { struct Wrapper {char c; int &ref; }; Wrapper w = {.c = 'a', .ref = *(int *)0 }; w.ref = 1; } ``` The clang static analyzer will produce the following warnings and notes: ``` test.cpp:12:11: warning: Dereference of null pointer [core.NullDereference] 12 | w.ref = 1; | ~~~~~~^~~ test.cpp:11:5: note: 'w' initialized here 11 | Wrapper w = {.c = 'a', .ref = *(int *)0 }; | ^~~~~~~~~ test.cpp:12:11: note: Dereference of null pointer 12 | w.ref = 1; | ~~~~~~^~~ 1 warning generated. ``` In the line where `w` is created, the note gives information about the initialization of `w` instead of `w.ref`. Let's compare it to a similar case where a null pointer dereference happens to a pointer member: ```cpp int main() { struct Wrapper {char c; int *ptr; }; Wrapper w = {.c = 'a', .ptr = nullptr }; *w.ptr = 1; } ``` Here the following error and notes are seen: ``` test.cpp:18:12: warning: Dereference of null pointer (loaded from field 'ptr') [core.NullDereference] 18 | *w.ptr = 1; | ~~~ ^ test.cpp:17:5: note: 'w.ptr' initialized to a null pointer value 17 | Wrapper w = {.c = 'a', .ptr = nullptr }; | ^~~~~~~~~ test.cpp:18:12: note: Dereference of null pointer (loaded from field 'ptr') 18 | *w.ptr = 1; | ~~~ ^ 1 warning generated. ``` Here the note that shows the initialization the initialization of `w.ptr` in shown instead of `w`. This commit is here to achieve similar notes for member reference as the notes of member pointers, so the report looks like the following: ``` test.cpp:12:11: warning: Dereference of null pointer [core.NullDereference] 12 | w.ref = 1; | ~~~~~~^~~ test.cpp:11:5: note: 'w.ref' initialized to a null pointer value 11 | Wrapper w = {.c = 'a', .ref = *(int *)0 }; | ^~~~~~~~~ test.cpp:12:11: note: Dereference of null pointer 12 | w.ref = 1; | ~~~~~~^~~ 1 warning generated. ``` Here the initialization of `w.ref` is shown instead of `w`. --------- Authored-by: Gábor Spaits Reviewed-by: Donát Nagy --- .../Core/BugReporterVisitors.cpp | 54 ++++++++++++++----- .../deref-track-symbolic-region.cpp | 31 +++++++++++ 2 files changed, 71 insertions(+), 14 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp index 42d03f67510cf..2d184d5295132 100644 --- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -132,6 +132,16 @@ const Expr *bugreporter::getDerefExpr(const Stmt *S) { } // Pattern match for a few useful cases: a[0], p->f, *p etc. else if (const auto *ME = dyn_cast(E)) { + // This handles the case when the dereferencing of a member reference + // happens. This is needed, because the AST for dereferencing a + // member reference looks like the following: + // |-MemberExpr + // `-DeclRefExpr + // Without this special case the notes would refer to the whole object + // (struct, class or union variable) instead of just the relevant member. + + if (ME->getMemberDecl()->getType()->isReferenceType()) + break; E = ME->getBase(); } else if (const auto *IvarRef = dyn_cast(E)) { E = IvarRef->getBase(); @@ -157,26 +167,42 @@ const Expr *bugreporter::getDerefExpr(const Stmt *S) { return E; } +static const VarDecl *getVarDeclForExpression(const Expr *E) { + if (const auto *DR = dyn_cast(E)) + return dyn_cast(DR->getDecl()); + return nullptr; +} + static const MemRegion * getLocationRegionIfReference(const Expr *E, const ExplodedNode *N, bool LookingForReference = true) { - if (const auto *DR = dyn_cast(E)) { - if (const auto *VD = dyn_cast(DR->getDecl())) { - if (LookingForReference && !VD->getType()->isReferenceType()) - return nullptr; - return N->getState() - ->getLValue(VD, N->getLocationContext()) - .getAsRegion(); + if (const auto *ME = dyn_cast(E)) { + // This handles null references from FieldRegions, for example: + // struct Wrapper { int &ref; }; + // Wrapper w = { *(int *)0 }; + // w.ref = 1; + const Expr *Base = ME->getBase(); + const VarDecl *VD = getVarDeclForExpression(Base); + if (!VD) + return nullptr; + + const auto *FD = dyn_cast(ME->getMemberDecl()); + if (!FD) + return nullptr; + + if (FD->getType()->isReferenceType()) { + SVal StructSVal = N->getState()->getLValue(VD, N->getLocationContext()); + return N->getState()->getLValue(FD, StructSVal).getAsRegion(); } + return nullptr; } - // FIXME: This does not handle other kinds of null references, - // for example, references from FieldRegions: - // struct Wrapper { int &ref; }; - // Wrapper w = { *(int *)0 }; - // w.ref = 1; - - return nullptr; + const VarDecl *VD = getVarDeclForExpression(E); + if (!VD) + return nullptr; + if (LookingForReference && !VD->getType()->isReferenceType()) + return nullptr; + return N->getState()->getLValue(VD, N->getLocationContext()).getAsRegion(); } /// Comparing internal representations of symbolic values (via diff --git a/clang/test/Analysis/diagnostics/deref-track-symbolic-region.cpp b/clang/test/Analysis/diagnostics/deref-track-symbolic-region.cpp index e258a60aa966a..e9f62c2407e88 100644 --- a/clang/test/Analysis/diagnostics/deref-track-symbolic-region.cpp +++ b/clang/test/Analysis/diagnostics/deref-track-symbolic-region.cpp @@ -41,3 +41,34 @@ int testRefToNullPtr2() { return *p2; //expected-warning {{Dereference of null pointer}} // expected-note@-1{{Dereference of null pointer}} } + +void testMemberNullPointerDeref() { + struct Wrapper {char c; int *ptr; }; + Wrapper w = {'a', nullptr}; // expected-note {{'w.ptr' initialized to a null pointer value}} + *w.ptr = 1; //expected-warning {{Dereference of null pointer}} + // expected-note@-1{{Dereference of null pointer}} +} + +void testMemberNullReferenceDeref() { + struct Wrapper {char c; int &ref; }; + Wrapper w = {.c = 'a', .ref = *(int *)0 }; // expected-note {{'w.ref' initialized to a null pointer value}} + // expected-warning@-1 {{binding dereferenced null pointer to reference has undefined behavior}} + w.ref = 1; //expected-warning {{Dereference of null pointer}} + // expected-note@-1{{Dereference of null pointer}} +} + +void testReferenceToPointerWithNullptr() { + int *i = nullptr; // expected-note {{'i' initialized to a null pointer value}} + struct Wrapper {char c; int *&a;}; + Wrapper w {'c', i}; // expected-note{{'w.a' initialized here}} + *(w.a) = 25; // expected-warning {{Dereference of null pointer}} + // expected-note@-1 {{Dereference of null pointer}} +} + +void testNullReferenceToPointer() { + struct Wrapper {char c; int *&a;}; + Wrapper w {'c', *(int **)0 }; // expected-note{{'w.a' initialized to a null pointer value}} + // expected-warning@-1 {{binding dereferenced null pointer to reference has undefined behavior}} + w.a = nullptr; // expected-warning {{Dereference of null pointer}} + // expected-note@-1 {{Dereference of null pointer}} +} \ No newline at end of file From 8e53abc0412ff9d4f2be15fdc24b7d8e377d1b62 Mon Sep 17 00:00:00 2001 From: Mikhail Goncharov Date: Mon, 16 Oct 2023 11:22:58 +0200 Subject: [PATCH 206/720] [ci] pull main branch before diffing (#68983) we tried to generate a full diff against main in ec9d80e but it resulted in wrong diffs. It seems that the issue was that 'main' was not updated after agent restart and diff main...HEAD kept growing. Not enabling diff main...HEAD just yet and will check logs for new PRs first. --- .ci/generate-buildkite-pipeline-premerge | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.ci/generate-buildkite-pipeline-premerge b/.ci/generate-buildkite-pipeline-premerge index e389df431505b..1028c08e20fcd 100755 --- a/.ci/generate-buildkite-pipeline-premerge +++ b/.ci/generate-buildkite-pipeline-premerge @@ -23,6 +23,16 @@ set -o pipefail # Environment variables script works with: # List of files affected by this commit : ${MODIFIED_FILES:=$(git diff --name-only HEAD~1)} +# Fetch origin/main to have an up to date merge base for main...HEAD diff. +git fetch origin main:main +echo "files modified HEAD~1" >&2 +git --no-pager diff --name-only HEAD~1 >&2 +echo "files modified main...HEAD" >&2 +git --no-pager diff --name-only main...HEAD | head -n 10 >&2 +merge_base=$(git merge-base main HEAD) +echo "merge base with main $merge_base" >&2 +echo "git log" >&2 +git --no-pager log --oneline --abbrev-commit -n 5 >&2 # Filter rules for generic windows tests : ${WINDOWS_AGENTS:='{"queue": "windows"}'} # Filter rules for generic linux tests From c0a7dd49118b6cef9f3e8ec8c0b5459968b92fd0 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 16 Oct 2023 10:51:01 +0100 Subject: [PATCH 207/720] Fix MSVC "not all control paths return a value" warnings. NFC. --- llvm/lib/Target/AArch64/AArch64PointerAuth.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp index f9b3027c35bb3..5d11f0d22574c 100644 --- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp +++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp @@ -231,6 +231,7 @@ MachineBasicBlock &llvm::AArch64PAuth::checkAuthenticatedRegister( .addMBB(BreakBlock); return *SuccessBlock; } + llvm_unreachable("Unknown AuthCheckMethod enum"); } unsigned llvm::AArch64PAuth::getCheckerSizeInBytes(AuthCheckMethod Method) { @@ -244,6 +245,7 @@ unsigned llvm::AArch64PAuth::getCheckerSizeInBytes(AuthCheckMethod Method) { case AuthCheckMethod::XPACHint: return 20; } + llvm_unreachable("Unknown AuthCheckMethod enum"); } bool AArch64PointerAuth::checkAuthenticatedLR( From d86047cb665ecdb37d17fc83bae2f67d3a6455c4 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Mon, 16 Oct 2023 10:55:30 +0100 Subject: [PATCH 208/720] [mlir][ArmSME] Update tile slice layout syntax (#69151) This patch prefixes tile slice layout with `layout` in the assemblyFormat: - `` -> `layout` - `` -> `layout` The reason for this change is the current format doesn't play nicely with additional optional operands, required to support padding and masking (#69148), as it becomes ambiguous. This affects the the following ops: - arm_sme.tile_load - arm_sme.tile_store - arm_sme.load_tile_slice - arm_sme.store_tile_slice --- .../mlir/Dialect/ArmSME/IR/ArmSMEOps.td | 39 ++--- .../Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp | 4 +- .../VectorToArmSME/VectorToArmSME.cpp | 6 +- .../ArmSMEToSCF/arm-sme-to-scf.mlir | 8 +- mlir/test/Dialect/ArmSME/arm-sme-to-llvm.mlir | 36 ++--- mlir/test/Dialect/ArmSME/roundtrip.mlir | 152 +++++++++--------- .../Dialect/ArmSME/vector-ops-to-sme.mlir | 36 ++--- .../Vector/CPU/ArmSME/test-load-vertical.mlir | 2 +- 8 files changed, 139 insertions(+), 144 deletions(-) diff --git a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td index 049c9759d70bf..dab54b63d8d22 100644 --- a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td +++ b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td @@ -76,6 +76,7 @@ def TileSliceLayout : I32EnumAttr<"TileSliceLayout", "Layout of a tile slice", [ def ArmSME_TileSliceLayoutAttr : EnumAttr { let assemblyFormat = "`<` $value `>`"; + let defaultValue = "TileSliceLayout::Horizontal"; } //===----------------------------------------------------------------------===// @@ -248,19 +249,18 @@ def TileLoadOp : ArmSME_Op<"tile_load"> { Example 2: Load a FP 32-bit element ZA tile with vertical layout from memory. ```mlir - %tile = arm_sme.tile_load %base[%c0, %c0], : memref, vector<[4]x[4]xf32> + %tile = arm_sme.tile_load %base[%c0, %c0] layout : memref, vector<[4]x[4]xf32> ``` Example 3: Load a 128-bit element ZA tile with horizontal layout (default) from memory. ```mlir - %tile = arm_sme.tile_load %base[%c0, %c0], : memref, vector<[1]x[1]xi128> + %tile = arm_sme.tile_load %base[%c0, %c0] layout : memref, vector<[1]x[1]xi128> ``` }]; let arguments = (ins Arg:$base, Variadic:$indices, - DefaultValuedAttr:$layout + ArmSME_TileSliceLayoutAttr:$layout ); let results = (outs SMETile:$result); @@ -274,7 +274,7 @@ def TileLoadOp : ArmSME_Op<"tile_load"> { }]; let assemblyFormat = - "$base `[` $indices `]` (`,` $layout^)? attr-dict " + "$base `[` $indices `]` (`layout` `` $layout^)? attr-dict " "`:` type($base) `,` type($result)"; } @@ -296,19 +296,17 @@ def TileStoreOp : ArmSME_Op<"tile_store"> { Example 2: Store a FP 32-bit element ZA tile with vertical layout to memory. ```mlir - arm_sme.tile_store %tile, %base[%c0, %c0], : vector<[4]x[4]xf32>, memref + arm_sme.tile_store %tile, %base[%c0, %c0] layout : vector<[4]x[4]xf32>, memref ``` Example 3: Store a 128-bit element ZA tile with horizontal (default) layout to memory. ```mlir - arm_sme.tile_store %tile, %base[%c0, %c0], : vector<[1]x[1]xi128>, memref + arm_sme.tile_store %tile, %base[%c0, %c0] layout : vector<[1]x[1]xi128>, memref ``` }]; let arguments = (ins SMETile:$valueToStore, Arg:$base, - Variadic:$indices, - DefaultValuedAttr:$layout + Variadic:$indices, ArmSME_TileSliceLayoutAttr:$layout ); let extraClassDeclaration = [{ MemRefType getMemRefType() { @@ -320,7 +318,7 @@ def TileStoreOp : ArmSME_Op<"tile_store"> { }]; let assemblyFormat = - "$valueToStore `,` $base `[` $indices `]` (`,` $layout^)? attr-dict " + "$valueToStore `,` $base `[` $indices `]` (`layout` `` $layout^)? attr-dict " "`:` type($base) `,` type($valueToStore)"; } @@ -348,19 +346,18 @@ def LoadTileSliceOp : ArmSME_Op<"load_tile_slice", [ Example 2: Load a vector<[4]xf32> tile slice from memory into tile vertically at given index. ```mlir - %tile_update = arm_sme.load_tile_slice %base[%c0], %tile, %tile_slice_index, : memref, vector<[4]x[4]xf32> + %tile_update = arm_sme.load_tile_slice %base[%c0], %tile, %tile_slice_index layout : memref, vector<[4]x[4]xf32> ``` Example 3: Load a vector<[1]xi128> tile slice from memory into tile vertically at given index. ```mlir - %tile_update = arm_sme.load_tile_slice %base[%c0], %tile, %tile_slice_index, : memref, vector<[1]x[1]xi128> + %tile_update = arm_sme.load_tile_slice %base[%c0], %tile, %tile_slice_index layout : memref, vector<[1]x[1]xi128> ``` }]; let arguments = (ins Arg:$base, SMETile:$tile, Variadic:$indices, Index:$tile_slice_index, - DefaultValuedAttr:$layout + ArmSME_TileSliceLayoutAttr:$layout ); let results = (outs SMETile:$result); @@ -374,7 +371,7 @@ def LoadTileSliceOp : ArmSME_Op<"load_tile_slice", [ }]; let assemblyFormat = [{ - $base `[` $indices `]` `,` $tile `,` $tile_slice_index (`,` $layout^)? + $base `[` $indices `]` `,` $tile `,` $tile_slice_index (`layout` `` $layout^)? attr-dict `:` type($base) `,` type($result) }]; } @@ -401,19 +398,17 @@ def StoreTileSliceOp : ArmSME_Op<"store_tile_slice"> { Example 2: Store vector<[4]xf32> vertical tile slice from tile at given index to memory. ```mlir - arm_sme.store_tile_slice %tile, %tile_slice_index, %base[%c0], : vector<[4]x[4]xf32>, memref + arm_sme.store_tile_slice %tile, %tile_slice_index, %base[%c0] layout : vector<[4]x[4]xf32>, memref ``` Example 3: Store a vector<[1]xi128> vertical tile slice from tile at given index to memory. ```mlir - arm_sme.store_tile_slice %tile, %tile_slice_index, %base[%c0], : vector<[1]x[1]xi128>, memref + arm_sme.store_tile_slice %tile, %tile_slice_index, %base[%c0] layout : vector<[1]x[1]xi128>, memref ``` }]; let arguments = (ins SMETile:$tile, Index:$tile_slice_index, Arg:$base, - Variadic:$indices, - DefaultValuedAttr:$layout + Variadic:$indices, ArmSME_TileSliceLayoutAttr:$layout ); let extraClassDeclaration = [{ MemRefType getMemRefType() { @@ -425,7 +420,7 @@ def StoreTileSliceOp : ArmSME_Op<"store_tile_slice"> { }]; let assemblyFormat = [{ - $tile `,` $tile_slice_index `,` $base `[` $indices `]` (`,` $layout^)? + $tile `,` $tile_slice_index `,` $base `[` $indices `]` (`layout` `` $layout^)? attr-dict `:` type($base) `,` type($tile) }]; } diff --git a/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp b/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp index 881cc8575fb48..0ec51b7430c02 100644 --- a/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp +++ b/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp @@ -134,7 +134,7 @@ struct TileLoadOpConversion : public OpRewritePattern { /// /// BEFORE: /// ```mlir -/// arm_sme.tile_store %tile, %dest[%c0, %c0], +/// arm_sme.tile_store %tile, %dest[%c0, %c0] layout /// : memref, vector<[4]x[4]xi32 /// ``` /// @@ -147,7 +147,7 @@ struct TileLoadOpConversion : public OpRewritePattern { /// %svl_s = arith.muli %min_svl_s, %vscale : index /// scf.for %tile_slice_idx = %c0 to %svl_s step %c1 { /// arm_sme.store_tile_slice %tile, %tile_slice_idx, %dest[%tile_slice_idx], -/// : memref, vector<[4]x[4]xi32> +/// layout : memref, vector<[4]x[4]xi32> /// } /// ``` struct TileStoreOpConversion : public OpRewritePattern { diff --git a/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp b/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp index cbc5e468c7293..d06eb4f5b01c9 100644 --- a/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp +++ b/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp @@ -67,7 +67,7 @@ namespace { /// /// is converted to: /// -/// arm_sme.tile_load ... +/// arm_sme.tile_load ... layout struct TransferReadPermutationToArmSMELowering : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -368,8 +368,8 @@ struct SplatOpToArmSMELowering : public OpRewritePattern { /// %alloca = memref.alloca(%svl_s, %svl_s) : memref /// %arm_sme.tile_store %src, , %alloca[%c0, %c0] /// : memref, vector<[4]x[4]xi32> -/// %transposed_src = arm_sme.tile_load %alloca[%c0, %c0], -/// : memref, vector<[4]x[4]xi32> +/// %transposed_src = arm_sme.tile_load %alloca[%c0, %c0] +/// layout : memref, vector<[4]x[4]xi32> /// /// NOTE: Tranposing via memory is obviously expensive, the current intention /// is to avoid the transpose if possible, this is therefore intended as a diff --git a/mlir/test/Conversion/ArmSMEToSCF/arm-sme-to-scf.mlir b/mlir/test/Conversion/ArmSMEToSCF/arm-sme-to-scf.mlir index 09f148bcd42f5..4b3020970d6cc 100644 --- a/mlir/test/Conversion/ArmSMEToSCF/arm-sme-to-scf.mlir +++ b/mlir/test/Conversion/ArmSMEToSCF/arm-sme-to-scf.mlir @@ -21,10 +21,10 @@ func.func @arm_sme_tile_load_hor(%src : memref) { // ----- // CHECK-LABEL: @arm_sme_tile_load_ver -// CHECK: arm_sme.load_tile_slice {{.*}} +// CHECK: arm_sme.load_tile_slice {{.*}} layout func.func @arm_sme_tile_load_ver(%src : memref) { %c0 = arith.constant 0 : index - %tile = arm_sme.tile_load %src[%c0, %c0], : memref, vector<[4]x[4]xi32> + %tile = arm_sme.tile_load %src[%c0, %c0] layout : memref, vector<[4]x[4]xi32> return } @@ -50,10 +50,10 @@ func.func @arm_sme_tile_store_hor(%tile : vector<[4]x[4]xi32>, %dest : memref +// CHECK: arm_sme.store_tile_slice {{.*}} layout func.func @arm_sme_tile_store_ver(%tile : vector<[4]x[4]xi32>, %dest : memref) { %c0 = arith.constant 0 : index - arm_sme.tile_store %tile, %dest[%c0, %c0], : memref, vector<[4]x[4]xi32> + arm_sme.tile_store %tile, %dest[%c0, %c0] layout : memref, vector<[4]x[4]xi32> return } diff --git a/mlir/test/Dialect/ArmSME/arm-sme-to-llvm.mlir b/mlir/test/Dialect/ArmSME/arm-sme-to-llvm.mlir index 4c16e5c488a74..07485b3ee8ddf 100644 --- a/mlir/test/Dialect/ArmSME/arm-sme-to-llvm.mlir +++ b/mlir/test/Dialect/ArmSME/arm-sme-to-llvm.mlir @@ -116,7 +116,7 @@ func.func @arm_sme_load_tile_slice_hor_f64(%src : memref, %tile : vecto // CHECK: "arm_sme.intr.ld1b.vert"({{.*}}) : (vector<[16]xi1>, !llvm.ptr, i32, i32) -> () func.func @arm_sme_load_tile_slice_ver_i8(%src : memref, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) { %c0 = arith.constant 0 : index - %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index, : memref, vector<[16]x[16]xi8> + %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index layout : memref, vector<[16]x[16]xi8> return } @@ -126,7 +126,7 @@ func.func @arm_sme_load_tile_slice_ver_i8(%src : memref, %tile : vector< // CHECK: "arm_sme.intr.ld1h.vert"({{.*}}) : (vector<[8]xi1>, !llvm.ptr, i32, i32) -> () func.func @arm_sme_load_tile_slice_ver_i16(%src : memref, %tile : vector<[8]x[8]xi16>, %tile_slice_index : index) { %c0 = arith.constant 0 : index - %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index, : memref, vector<[8]x[8]xi16> + %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index layout : memref, vector<[8]x[8]xi16> return } @@ -136,7 +136,7 @@ func.func @arm_sme_load_tile_slice_ver_i16(%src : memref, %tile : vecto // CHECK: "arm_sme.intr.ld1w.vert"({{.*}}) : (vector<[4]xi1>, !llvm.ptr, i32, i32) -> () func.func @arm_sme_load_tile_slice_ver_i32(%src : memref, %tile : vector<[4]x[4]xi32>, %tile_slice_index : index) { %c0 = arith.constant 0 : index - %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index, : memref, vector<[4]x[4]xi32> + %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index layout : memref, vector<[4]x[4]xi32> return } @@ -146,7 +146,7 @@ func.func @arm_sme_load_tile_slice_ver_i32(%src : memref, %tile : vecto // CHECK: "arm_sme.intr.ld1d.vert"({{.*}}) : (vector<[2]xi1>, !llvm.ptr, i32, i32) -> () func.func @arm_sme_load_tile_slice_ver_i64(%src : memref, %tile : vector<[2]x[2]xi64>, %tile_slice_index : index) { %c0 = arith.constant 0 : index - %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index, : memref, vector<[2]x[2]xi64> + %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index layout : memref, vector<[2]x[2]xi64> return } @@ -156,7 +156,7 @@ func.func @arm_sme_load_tile_slice_ver_i64(%src : memref, %tile : vecto // CHECK: "arm_sme.intr.ld1q.vert"({{.*}}) : (vector<[1]xi1>, !llvm.ptr, i32, i32) -> () func.func @arm_sme_load_tile_slice_ver_i128(%src : memref, %tile : vector<[1]x[1]xi128>, %tile_slice_index : index) { %c0 = arith.constant 0 : index - %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index, : memref, vector<[1]x[1]xi128> + %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index layout : memref, vector<[1]x[1]xi128> return } @@ -166,7 +166,7 @@ func.func @arm_sme_load_tile_slice_ver_i128(%src : memref, %tile : vec // CHECK: "arm_sme.intr.ld1h.vert"({{.*}}) : (vector<[8]xi1>, !llvm.ptr, i32, i32) -> () func.func @arm_sme_load_tile_slice_ver_f16(%src : memref, %tile : vector<[8]x[8]xf16>, %tile_slice_index : index) { %c0 = arith.constant 0 : index - %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index, : memref, vector<[8]x[8]xf16> + %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index layout : memref, vector<[8]x[8]xf16> return } @@ -176,7 +176,7 @@ func.func @arm_sme_load_tile_slice_ver_f16(%src : memref, %tile : vecto // CHECK: "arm_sme.intr.ld1h.vert"({{.*}}) : (vector<[8]xi1>, !llvm.ptr, i32, i32) -> () func.func @arm_sme_load_tile_slice_ver_bf16(%src : memref, %tile : vector<[8]x[8]xbf16>, %tile_slice_index : index) { %c0 = arith.constant 0 : index - %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index, : memref, vector<[8]x[8]xbf16> + %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index layout : memref, vector<[8]x[8]xbf16> return } @@ -186,7 +186,7 @@ func.func @arm_sme_load_tile_slice_ver_bf16(%src : memref, %tile : vec // CHECK: "arm_sme.intr.ld1w.vert"({{.*}}) : (vector<[4]xi1>, !llvm.ptr, i32, i32) -> () func.func @arm_sme_load_tile_slice_ver_f32(%src : memref, %tile : vector<[4]x[4]xf32>, %tile_slice_index : index) { %c0 = arith.constant 0 : index - %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index, : memref, vector<[4]x[4]xf32> + %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index layout : memref, vector<[4]x[4]xf32> return } @@ -196,7 +196,7 @@ func.func @arm_sme_load_tile_slice_ver_f32(%src : memref, %tile : vecto // CHECK: "arm_sme.intr.ld1d.vert"({{.*}}) : (vector<[2]xi1>, !llvm.ptr, i32, i32) -> () func.func @arm_sme_load_tile_slice_ver_f64(%src : memref, %tile : vector<[2]x[2]xf64>, %tile_slice_index : index) { %c0 = arith.constant 0 : index - %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index, : memref, vector<[2]x[2]xf64> + %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index layout : memref, vector<[2]x[2]xf64> return } @@ -316,7 +316,7 @@ func.func @arm_sme_store_tile_slice_hor_f64(%tile : vector<[2]x[2]xf64>, %tile_s // CHECK: "arm_sme.intr.st1b.vert"({{.*}}) : (vector<[16]xi1>, !llvm.ptr, i32, i32) -> () func.func @arm_sme_store_tile_slice_ver_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0], : memref, vector<[16]x[16]xi8> + arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[16]x[16]xi8> return } @@ -326,7 +326,7 @@ func.func @arm_sme_store_tile_slice_ver_i8(%tile : vector<[16]x[16]xi8>, %tile_s // CHECK: "arm_sme.intr.st1h.vert"({{.*}}) : (vector<[8]xi1>, !llvm.ptr, i32, i32) -> () func.func @arm_sme_store_tile_slice_ver_i16(%tile : vector<[8]x[8]xi16>, %tile_slice_index : index, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0], : memref, vector<[8]x[8]xi16> + arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[8]x[8]xi16> return } @@ -336,7 +336,7 @@ func.func @arm_sme_store_tile_slice_ver_i16(%tile : vector<[8]x[8]xi16>, %tile_s // CHECK: "arm_sme.intr.st1w.vert"({{.*}}) : (vector<[4]xi1>, !llvm.ptr, i32, i32) -> () func.func @arm_sme_store_tile_slice_ver_i32(%tile : vector<[4]x[4]xi32>, %tile_slice_index : index, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0], : memref, vector<[4]x[4]xi32> + arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[4]x[4]xi32> return } @@ -346,7 +346,7 @@ func.func @arm_sme_store_tile_slice_ver_i32(%tile : vector<[4]x[4]xi32>, %tile_s // CHECK: "arm_sme.intr.st1d.vert"({{.*}}) : (vector<[2]xi1>, !llvm.ptr, i32, i32) -> () func.func @arm_sme_store_tile_slice_ver_i64(%tile : vector<[2]x[2]xi64>, %tile_slice_index : index, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0], : memref, vector<[2]x[2]xi64> + arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[2]x[2]xi64> return } @@ -356,7 +356,7 @@ func.func @arm_sme_store_tile_slice_ver_i64(%tile : vector<[2]x[2]xi64>, %tile_s // CHECK: "arm_sme.intr.st1q.vert"({{.*}}) : (vector<[1]xi1>, !llvm.ptr, i32, i32) -> () func.func @arm_sme_store_tile_slice_ver_i128(%tile : vector<[1]x[1]xi128>, %tile_slice_index : index, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0], : memref, vector<[1]x[1]xi128> + arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[1]x[1]xi128> return } @@ -366,7 +366,7 @@ func.func @arm_sme_store_tile_slice_ver_i128(%tile : vector<[1]x[1]xi128>, %tile // CHECK: "arm_sme.intr.st1h.vert"({{.*}}) : (vector<[8]xi1>, !llvm.ptr, i32, i32) -> () func.func @arm_sme_store_tile_slice_ver_f16(%tile : vector<[8]x[8]xf16>, %tile_slice_index : index, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0], : memref, vector<[8]x[8]xf16> + arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[8]x[8]xf16> return } @@ -376,7 +376,7 @@ func.func @arm_sme_store_tile_slice_ver_f16(%tile : vector<[8]x[8]xf16>, %tile_s // CHECK: "arm_sme.intr.st1h.vert"({{.*}}) : (vector<[8]xi1>, !llvm.ptr, i32, i32) -> () func.func @arm_sme_store_tile_slice_ver_bf16(%tile : vector<[8]x[8]xbf16>, %tile_slice_index : index, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0], : memref, vector<[8]x[8]xbf16> + arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[8]x[8]xbf16> return } @@ -386,7 +386,7 @@ func.func @arm_sme_store_tile_slice_ver_bf16(%tile : vector<[8]x[8]xbf16>, %tile // CHECK: "arm_sme.intr.st1w.vert"({{.*}}) : (vector<[4]xi1>, !llvm.ptr, i32, i32) -> () func.func @arm_sme_store_tile_slice_ver_f32(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0], : memref, vector<[4]x[4]xf32> + arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[4]x[4]xf32> return } @@ -396,7 +396,7 @@ func.func @arm_sme_store_tile_slice_ver_f32(%tile : vector<[4]x[4]xf32>, %tile_s // CHECK: "arm_sme.intr.st1d.vert"({{.*}}) : (vector<[2]xi1>, !llvm.ptr, i32, i32) -> () func.func @arm_sme_store_tile_slice_ver_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0], : memref, vector<[2]x[2]xf64> + arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[2]x[2]xf64> return } diff --git a/mlir/test/Dialect/ArmSME/roundtrip.mlir b/mlir/test/Dialect/ArmSME/roundtrip.mlir index f6d19359b8e3a..427154158e797 100644 --- a/mlir/test/Dialect/ArmSME/roundtrip.mlir +++ b/mlir/test/Dialect/ArmSME/roundtrip.mlir @@ -358,81 +358,81 @@ func.func @arm_sme_tile_load_hor_f64(%src : memref) { // ----- func.func @arm_sme_tile_load_ver_i8(%src : memref) { - // CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[16]x[16]xi8> + // CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[16]x[16]xi8> %c0 = arith.constant 0 : index - %tile = arm_sme.tile_load %src[%c0, %c0], : memref, vector<[16]x[16]xi8> + %tile = arm_sme.tile_load %src[%c0, %c0] layout : memref, vector<[16]x[16]xi8> return } // ----- func.func @arm_sme_tile_load_ver_i16(%src : memref) { - // CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[8]x[8]xi16> + // CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[8]x[8]xi16> %c0 = arith.constant 0 : index - %tile = arm_sme.tile_load %src[%c0, %c0], : memref, vector<[8]x[8]xi16> + %tile = arm_sme.tile_load %src[%c0, %c0] layout : memref, vector<[8]x[8]xi16> return } // ----- func.func @arm_sme_tile_load_ver_i32(%src : memref) { - // CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[4]x[4]xi32> + // CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[4]x[4]xi32> %c0 = arith.constant 0 : index - %tile = arm_sme.tile_load %src[%c0, %c0], : memref, vector<[4]x[4]xi32> + %tile = arm_sme.tile_load %src[%c0, %c0] layout : memref, vector<[4]x[4]xi32> return } // ----- func.func @arm_sme_tile_load_ver_i64(%src : memref) { - // CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[2]x[2]xi64> + // CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[2]x[2]xi64> %c0 = arith.constant 0 : index - %tile = arm_sme.tile_load %src[%c0, %c0], : memref, vector<[2]x[2]xi64> + %tile = arm_sme.tile_load %src[%c0, %c0] layout : memref, vector<[2]x[2]xi64> return } // ----- func.func @arm_sme_tile_load_ver_i128(%src : memref) { - // CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[1]x[1]xi128> + // CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[1]x[1]xi128> %c0 = arith.constant 0 : index - %tile = arm_sme.tile_load %src[%c0, %c0], : memref, vector<[1]x[1]xi128> + %tile = arm_sme.tile_load %src[%c0, %c0] layout : memref, vector<[1]x[1]xi128> return } // ----- func.func @arm_sme_tile_load_ver_f16(%src : memref) { - // CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[8]x[8]xf16> + // CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[8]x[8]xf16> %c0 = arith.constant 0 : index - %tile = arm_sme.tile_load %src[%c0, %c0], : memref, vector<[8]x[8]xf16> + %tile = arm_sme.tile_load %src[%c0, %c0] layout : memref, vector<[8]x[8]xf16> return } // ----- func.func @arm_sme_tile_load_ver_bf16(%src : memref) { - // CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[8]x[8]xbf16> + // CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[8]x[8]xbf16> %c0 = arith.constant 0 : index - %tile = arm_sme.tile_load %src[%c0, %c0], : memref, vector<[8]x[8]xbf16> + %tile = arm_sme.tile_load %src[%c0, %c0] layout : memref, vector<[8]x[8]xbf16> return } // ----- func.func @arm_sme_tile_load_ver_f32(%src : memref) { - // CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[4]x[4]xf32> + // CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[4]x[4]xf32> %c0 = arith.constant 0 : index - %tile = arm_sme.tile_load %src[%c0, %c0], : memref, vector<[4]x[4]xf32> + %tile = arm_sme.tile_load %src[%c0, %c0] layout : memref, vector<[4]x[4]xf32> return } // ----- func.func @arm_sme_tile_load_ver_f64(%src : memref) { - // CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[2]x[2]xf64> + // CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[2]x[2]xf64> %c0 = arith.constant 0 : index - %tile = arm_sme.tile_load %src[%c0, %c0], : memref, vector<[2]x[2]xf64> + %tile = arm_sme.tile_load %src[%c0, %c0] layout : memref, vector<[2]x[2]xf64> return } @@ -442,7 +442,7 @@ func.func @arm_sme_tile_load_ver_f64(%src : memref) { func.func @arm_sme_tile_load_explicit_hor(%src : memref) { // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref, vector<[16]x[16]xi8> %c0 = arith.constant 0 : index - %tile = arm_sme.tile_load %src[%c0, %c0], : memref, vector<[16]x[16]xi8> + %tile = arm_sme.tile_load %src[%c0, %c0] layout : memref, vector<[16]x[16]xi8> return } @@ -534,81 +534,81 @@ func.func @arm_sme_tile_store_hor_f64(%tile : vector<[2]x[2]xf64>, %dest : memre // ----- func.func @arm_sme_tile_store_ver_i8(%tile : vector<[16]x[16]xi8>, %dest : memref) { - // CHECK: arm_sme.tile_store {{.*}}, : memref, vector<[16]x[16]xi8> + // CHECK: arm_sme.tile_store {{.*}} layout : memref, vector<[16]x[16]xi8> %c0 = arith.constant 0 : index - arm_sme.tile_store %tile, %dest[%c0, %c0], : memref, vector<[16]x[16]xi8> + arm_sme.tile_store %tile, %dest[%c0, %c0] layout : memref, vector<[16]x[16]xi8> return } // ----- func.func @arm_sme_tile_store_ver_i16(%tile : vector<[8]x[8]xi16>, %dest : memref) { - // CHECK: arm_sme.tile_store {{.*}}, : memref, vector<[8]x[8]xi16> + // CHECK: arm_sme.tile_store {{.*}} layout : memref, vector<[8]x[8]xi16> %c0 = arith.constant 0 : index - arm_sme.tile_store %tile, %dest[%c0, %c0], : memref, vector<[8]x[8]xi16> + arm_sme.tile_store %tile, %dest[%c0, %c0] layout : memref, vector<[8]x[8]xi16> return } // ----- func.func @arm_sme_tile_store_ver_i32(%tile : vector<[4]x[4]xi32>, %dest : memref) { - // CHECK: arm_sme.tile_store {{.*}}, : memref, vector<[4]x[4]xi32> + // CHECK: arm_sme.tile_store {{.*}} layout : memref, vector<[4]x[4]xi32> %c0 = arith.constant 0 : index - arm_sme.tile_store %tile, %dest[%c0, %c0], : memref, vector<[4]x[4]xi32> + arm_sme.tile_store %tile, %dest[%c0, %c0] layout : memref, vector<[4]x[4]xi32> return } // ----- func.func @arm_sme_tile_store_ver_i64(%tile : vector<[2]x[2]xi64>, %dest : memref) { - // CHECK: arm_sme.tile_store {{.*}}, : memref, vector<[2]x[2]xi64> + // CHECK: arm_sme.tile_store {{.*}} layout : memref, vector<[2]x[2]xi64> %c0 = arith.constant 0 : index - arm_sme.tile_store %tile, %dest[%c0, %c0], : memref, vector<[2]x[2]xi64> + arm_sme.tile_store %tile, %dest[%c0, %c0] layout : memref, vector<[2]x[2]xi64> return } // ----- func.func @arm_sme_tile_store_ver_i128(%tile : vector<[1]x[1]xi128>, %dest : memref) { - // CHECK: arm_sme.tile_store {{.*}}, : memref, vector<[1]x[1]xi128> + // CHECK: arm_sme.tile_store {{.*}} layout : memref, vector<[1]x[1]xi128> %c0 = arith.constant 0 : index - arm_sme.tile_store %tile, %dest[%c0, %c0], : memref, vector<[1]x[1]xi128> + arm_sme.tile_store %tile, %dest[%c0, %c0] layout : memref, vector<[1]x[1]xi128> return } // ----- func.func @arm_sme_tile_store_ver_f16(%tile : vector<[8]x[8]xf16>, %dest : memref) { - // CHECK: arm_sme.tile_store {{.*}}, : memref, vector<[8]x[8]xf16> + // CHECK: arm_sme.tile_store {{.*}} layout : memref, vector<[8]x[8]xf16> %c0 = arith.constant 0 : index - arm_sme.tile_store %tile, %dest[%c0, %c0], : memref, vector<[8]x[8]xf16> + arm_sme.tile_store %tile, %dest[%c0, %c0] layout : memref, vector<[8]x[8]xf16> return } // ----- func.func @arm_sme_tile_store_ver_bf16(%tile : vector<[8]x[8]xbf16>, %dest : memref) { - // CHECK: arm_sme.tile_store {{.*}}, : memref, vector<[8]x[8]xbf16> + // CHECK: arm_sme.tile_store {{.*}} layout : memref, vector<[8]x[8]xbf16> %c0 = arith.constant 0 : index - arm_sme.tile_store %tile, %dest[%c0, %c0], : memref, vector<[8]x[8]xbf16> + arm_sme.tile_store %tile, %dest[%c0, %c0] layout : memref, vector<[8]x[8]xbf16> return } // ----- func.func @arm_sme_tile_store_ver_f32(%tile : vector<[4]x[4]xf32>, %dest : memref) { - // CHECK: arm_sme.tile_store {{.*}}, : memref, vector<[4]x[4]xf32> + // CHECK: arm_sme.tile_store {{.*}} layout : memref, vector<[4]x[4]xf32> %c0 = arith.constant 0 : index - arm_sme.tile_store %tile, %dest[%c0, %c0], : memref, vector<[4]x[4]xf32> + arm_sme.tile_store %tile, %dest[%c0, %c0] layout : memref, vector<[4]x[4]xf32> return } // ----- func.func @arm_sme_tile_store_ver_f64(%tile : vector<[2]x[2]xf64>, %dest : memref) { - // CHECK: arm_sme.tile_store {{.*}}, : memref, vector<[2]x[2]xf64> + // CHECK: arm_sme.tile_store {{.*}} layout : memref, vector<[2]x[2]xf64> %c0 = arith.constant 0 : index - arm_sme.tile_store %tile, %dest[%c0, %c0], : memref, vector<[2]x[2]xf64> + arm_sme.tile_store %tile, %dest[%c0, %c0] layout : memref, vector<[2]x[2]xf64> return } @@ -618,7 +618,7 @@ func.func @arm_sme_tile_store_ver_f64(%tile : vector<[2]x[2]xf64>, %dest : memre func.func @arm_sme_tile_store_ver_i8(%tile : vector<[16]x[16]xi8>, %dest : memref) { // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref, vector<[16]x[16]xi8> %c0 = arith.constant 0 : index - arm_sme.tile_store %tile, %dest[%c0, %c0], : memref, vector<[16]x[16]xi8> + arm_sme.tile_store %tile, %dest[%c0, %c0] layout : memref, vector<[16]x[16]xi8> return } @@ -710,81 +710,81 @@ func.func @arm_sme_load_tile_slice_hor_f64(%src : memref, %tile : vecto // ----- func.func @arm_sme_load_tile_slice_ver_i8(%src : memref, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) { - // CHECK: arm_sme.load_tile_slice {{.*}}, : memref, vector<[16]x[16]xi8> + // CHECK: arm_sme.load_tile_slice {{.*}} layout : memref, vector<[16]x[16]xi8> %c0 = arith.constant 0 : index - %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index, : memref, vector<[16]x[16]xi8> + %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index layout : memref, vector<[16]x[16]xi8> return } // ----- func.func @arm_sme_load_tile_slice_ver_i16(%src : memref, %tile : vector<[8]x[8]xi16>, %tile_slice_index : index) { - // CHECK: arm_sme.load_tile_slice {{.*}}, : memref, vector<[8]x[8]xi16> + // CHECK: arm_sme.load_tile_slice {{.*}} layout : memref, vector<[8]x[8]xi16> %c0 = arith.constant 0 : index - %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index, : memref, vector<[8]x[8]xi16> + %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index layout : memref, vector<[8]x[8]xi16> return } // ----- func.func @arm_sme_load_tile_slice_ver_i32(%src : memref, %tile : vector<[4]x[4]xi32>, %tile_slice_index : index) { - // CHECK: arm_sme.load_tile_slice {{.*}}, : memref, vector<[4]x[4]xi32> + // CHECK: arm_sme.load_tile_slice {{.*}} layout : memref, vector<[4]x[4]xi32> %c0 = arith.constant 0 : index - %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index, : memref, vector<[4]x[4]xi32> + %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index layout : memref, vector<[4]x[4]xi32> return } // ----- func.func @arm_sme_load_tile_slice_ver_i64(%src : memref, %tile : vector<[2]x[2]xi64>, %tile_slice_index : index) { - // CHECK: arm_sme.load_tile_slice {{.*}}, : memref, vector<[2]x[2]xi64> + // CHECK: arm_sme.load_tile_slice {{.*}} layout : memref, vector<[2]x[2]xi64> %c0 = arith.constant 0 : index - %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index, : memref, vector<[2]x[2]xi64> + %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index layout : memref, vector<[2]x[2]xi64> return } // ----- func.func @arm_sme_load_tile_slice_ver_i128(%src : memref, %tile : vector<[1]x[1]xi128>, %tile_slice_index : index) { - // CHECK: arm_sme.load_tile_slice {{.*}}, : memref, vector<[1]x[1]xi128> + // CHECK: arm_sme.load_tile_slice {{.*}} layout : memref, vector<[1]x[1]xi128> %c0 = arith.constant 0 : index - %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index, : memref, vector<[1]x[1]xi128> + %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index layout : memref, vector<[1]x[1]xi128> return } // ----- func.func @arm_sme_load_tile_slice_ver_f16(%src : memref, %tile : vector<[8]x[8]xf16>, %tile_slice_index : index) { - // CHECK: arm_sme.load_tile_slice {{.*}}, : memref, vector<[8]x[8]xf16> + // CHECK: arm_sme.load_tile_slice {{.*}} layout : memref, vector<[8]x[8]xf16> %c0 = arith.constant 0 : index - %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index, : memref, vector<[8]x[8]xf16> + %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index layout : memref, vector<[8]x[8]xf16> return } // ----- func.func @arm_sme_load_tile_slice_ver_bf16(%src : memref, %tile : vector<[8]x[8]xbf16>, %tile_slice_index : index) { - // CHECK: arm_sme.load_tile_slice {{.*}}, : memref, vector<[8]x[8]xbf16> + // CHECK: arm_sme.load_tile_slice {{.*}} layout : memref, vector<[8]x[8]xbf16> %c0 = arith.constant 0 : index - %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index, : memref, vector<[8]x[8]xbf16> + %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index layout : memref, vector<[8]x[8]xbf16> return } // ----- func.func @arm_sme_load_tile_slice_ver_f32(%src : memref, %tile : vector<[4]x[4]xf32>, %tile_slice_index : index) { - // CHECK: arm_sme.load_tile_slice {{.*}}, : memref, vector<[4]x[4]xf32> + // CHECK: arm_sme.load_tile_slice {{.*}} layout : memref, vector<[4]x[4]xf32> %c0 = arith.constant 0 : index - %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index, : memref, vector<[4]x[4]xf32> + %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index layout : memref, vector<[4]x[4]xf32> return } // ----- func.func @arm_sme_load_tile_slice_ver_f64(%src : memref, %tile : vector<[2]x[2]xf64>, %tile_slice_index : index) { - // CHECK: arm_sme.load_tile_slice {{.*}}, : memref, vector<[2]x[2]xf64> + // CHECK: arm_sme.load_tile_slice {{.*}} layout : memref, vector<[2]x[2]xf64> %c0 = arith.constant 0 : index - %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index, : memref, vector<[2]x[2]xf64> + %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index layout : memref, vector<[2]x[2]xf64> return } @@ -794,7 +794,7 @@ func.func @arm_sme_load_tile_slice_ver_f64(%src : memref, %tile : vecto func.func @arm_sme_load_tile_slice_hor_i8(%src : memref, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) { // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref, vector<[16]x[16]xi8> %c0 = arith.constant 0 : index - %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index, : memref, vector<[16]x[16]xi8> + %tile_update = arm_sme.load_tile_slice %src[%c0], %tile, %tile_slice_index layout : memref, vector<[16]x[16]xi8> return } @@ -886,81 +886,81 @@ func.func @arm_sme_store_tile_slice_hor_f64(%tile : vector<[2]x[2]xf64>, %tile_s // ----- func.func @arm_sme_store_tile_slice_ver_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}}, : memref, vector<[16]x[16]xi8> + // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[16]x[16]xi8> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0], : memref, vector<[16]x[16]xi8> + arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[16]x[16]xi8> return } // ----- func.func @arm_sme_store_tile_slice_ver_i16(%tile : vector<[8]x[8]xi16>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}}, : memref, vector<[8]x[8]xi16> + // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[8]x[8]xi16> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0], : memref, vector<[8]x[8]xi16> + arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[8]x[8]xi16> return } // ----- func.func @arm_sme_store_tile_slice_ver_i32(%tile : vector<[4]x[4]xi32>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}}, : memref, vector<[4]x[4]xi32> + // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[4]x[4]xi32> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0], : memref, vector<[4]x[4]xi32> + arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[4]x[4]xi32> return } // ----- func.func @arm_sme_store_tile_slice_ver_i64(%tile : vector<[2]x[2]xi64>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}}, : memref, vector<[2]x[2]xi64> + // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[2]x[2]xi64> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0], : memref, vector<[2]x[2]xi64> + arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[2]x[2]xi64> return } // ----- func.func @arm_sme_store_tile_slice_ver_i128(%tile : vector<[1]x[1]xi128>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}}, : memref, vector<[1]x[1]xi128> + // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[1]x[1]xi128> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0], : memref, vector<[1]x[1]xi128> + arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[1]x[1]xi128> return } // ----- func.func @arm_sme_store_tile_slice_ver_f16(%tile : vector<[8]x[8]xf16>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}}, : memref, vector<[8]x[8]xf16> + // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[8]x[8]xf16> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0], : memref, vector<[8]x[8]xf16> + arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[8]x[8]xf16> return } // ----- func.func @arm_sme_store_tile_slice_ver_bf16(%tile : vector<[8]x[8]xbf16>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}}, : memref, vector<[8]x[8]xbf16> + // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[8]x[8]xbf16> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0], : memref, vector<[8]x[8]xbf16> + arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[8]x[8]xbf16> return } // ----- func.func @arm_sme_store_tile_slice_ver_f32(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}}, : memref, vector<[4]x[4]xf32> + // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[4]x[4]xf32> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0], : memref, vector<[4]x[4]xf32> + arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[4]x[4]xf32> return } // ----- func.func @arm_sme_store_tile_slice_ver_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}}, : memref, vector<[2]x[2]xf64> + // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[2]x[2]xf64> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0], : memref, vector<[2]x[2]xf64> + arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[2]x[2]xf64> return } @@ -970,7 +970,7 @@ func.func @arm_sme_store_tile_slice_ver_f64(%tile : vector<[2]x[2]xf64>, %tile_s func.func @arm_sme_store_tile_slice_hor_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %dest : memref) -> () { // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[16]x[16]xi8> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0], : memref, vector<[16]x[16]xi8> + arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[16]x[16]xi8> return } diff --git a/mlir/test/Dialect/ArmSME/vector-ops-to-sme.mlir b/mlir/test/Dialect/ArmSME/vector-ops-to-sme.mlir index b2c8fd8e01ac7..455b47a83e28f 100644 --- a/mlir/test/Dialect/ArmSME/vector-ops-to-sme.mlir +++ b/mlir/test/Dialect/ArmSME/vector-ops-to-sme.mlir @@ -5,7 +5,7 @@ //===----------------------------------------------------------------------===// // CHECK-LABEL: @transfer_read_2d_transpose_i8 -// CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[16]x[16]xi8> +// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[16]x[16]xi8> func.func @transfer_read_2d_transpose_i8(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0 : i8 @@ -17,7 +17,7 @@ func.func @transfer_read_2d_transpose_i8(%src : memref) { // ----- // CHECK-LABEL: @transfer_read_2d_transpose_i16 -// CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[8]x[8]xi16> +// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[8]x[8]xi16> func.func @transfer_read_2d_transpose_i16(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0 : i16 @@ -29,7 +29,7 @@ func.func @transfer_read_2d_transpose_i16(%src : memref) { // ----- // CHECK-LABEL: @transfer_read_2d_transpose_i32 -// CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[4]x[4]xi32> +// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[4]x[4]xi32> func.func @transfer_read_2d_transpose_i32(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0 : i32 @@ -41,7 +41,7 @@ func.func @transfer_read_2d_transpose_i32(%src : memref) { // ----- // CHECK-LABEL: @transfer_read_2d_transpose_i64 -// CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[2]x[2]xi64> +// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[2]x[2]xi64> func.func @transfer_read_2d_transpose_i64(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0 : i64 @@ -53,7 +53,7 @@ func.func @transfer_read_2d_transpose_i64(%src : memref) { // ----- // CHECK-LABEL: @transfer_read_2d_transpose_i128 -// CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[1]x[1]xi128> +// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[1]x[1]xi128> func.func @transfer_read_2d_transpose_i128(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0 : i128 @@ -65,7 +65,7 @@ func.func @transfer_read_2d_transpose_i128(%src : memref) { // ----- // CHECK-LABEL: @transfer_read_2d_transpose_f16 -// CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[8]x[8]xf16> +// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[8]x[8]xf16> func.func @transfer_read_2d_transpose_f16(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f16 @@ -77,7 +77,7 @@ func.func @transfer_read_2d_transpose_f16(%src : memref) { // ----- // CHECK-LABEL: @transfer_read_2d_transpose_bf16 -// CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[8]x[8]xbf16> +// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[8]x[8]xbf16> func.func @transfer_read_2d_transpose_bf16(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : bf16 @@ -89,7 +89,7 @@ func.func @transfer_read_2d_transpose_bf16(%src : memref) { // ----- // CHECK-LABEL: @transfer_read_2d_transpose_f32 -// CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[4]x[4]xf32> +// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[4]x[4]xf32> func.func @transfer_read_2d_transpose_f32(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 @@ -101,7 +101,7 @@ func.func @transfer_read_2d_transpose_f32(%src : memref) { // ----- // CHECK-LABEL: @transfer_read_2d_transpose_f64 -// CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[2]x[2]xf64> +// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[2]x[2]xf64> func.func @transfer_read_2d_transpose_f64(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f64 @@ -475,7 +475,7 @@ func.func @splat_vec2d_from_f16(%arg0: f16) { // CHECK: %[[MIN_TILE_SLICES:.*]] = arith.muli %[[VSCALE]], %[[C16]] : index // CHECK: %[[NUM_TILE_SLICES:.*]] = memref.alloca(%[[MIN_TILE_SLICES]], %[[MIN_TILE_SLICES]]) : memref // CHECK: arm_sme.tile_store %[[TILE]], %[[NUM_TILE_SLICES]]{{\[}}%[[C0]], %[[C0]]] : memref, vector<[16]x[16]xi8> -// CHECK: arm_sme.tile_load %[[NUM_TILE_SLICES]]{{\[}}%[[C0]], %[[C0]]], : memref, vector<[16]x[16]xi8> +// CHECK: arm_sme.tile_load %[[NUM_TILE_SLICES]]{{\[}}%[[C0]], %[[C0]]] layout : memref, vector<[16]x[16]xi8> func.func @transpose_i8(%arg0: vector<[16]x[16]xi8>) { %0 = vector.transpose %arg0, [1, 0] : vector<[16]x[16]xi8> to vector<[16]x[16]xi8> "prevent.dce"(%0) : (vector<[16]x[16]xi8>) -> () @@ -487,7 +487,7 @@ func.func @transpose_i8(%arg0: vector<[16]x[16]xi8>) { // CHECK-LABEL: @transpose_i16 // CHECK: arith.constant 8 // CHECK: arm_sme.tile_store {{.*}} : memref, vector<[8]x[8]xi16> -// CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[8]x[8]xi16> +// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[8]x[8]xi16> func.func @transpose_i16(%arg0: vector<[8]x[8]xi16>) { %0 = vector.transpose %arg0, [1, 0] : vector<[8]x[8]xi16> to vector<[8]x[8]xi16> "prevent.dce"(%0) : (vector<[8]x[8]xi16>) -> () @@ -499,7 +499,7 @@ func.func @transpose_i16(%arg0: vector<[8]x[8]xi16>) { // CHECK-LABEL: @transpose_i32 // CHECK: arith.constant 4 // CHECK: arm_sme.tile_store {{.*}} : memref, vector<[4]x[4]xi32> -// CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[4]x[4]xi32> +// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[4]x[4]xi32> func.func @transpose_i32(%arg0: vector<[4]x[4]xi32>) { %0 = vector.transpose %arg0, [1, 0] : vector<[4]x[4]xi32> to vector<[4]x[4]xi32> "prevent.dce"(%0) : (vector<[4]x[4]xi32>) -> () @@ -511,7 +511,7 @@ func.func @transpose_i32(%arg0: vector<[4]x[4]xi32>) { // CHECK-LABEL: @transpose_i64 // CHECK: arith.constant 2 // CHECK: arm_sme.tile_store {{.*}} : memref, vector<[2]x[2]xi64> -// CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[2]x[2]xi64> +// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[2]x[2]xi64> func.func @transpose_i64(%arg0: vector<[2]x[2]xi64>) { %0 = vector.transpose %arg0, [1, 0] : vector<[2]x[2]xi64> to vector<[2]x[2]xi64> "prevent.dce"(%0) : (vector<[2]x[2]xi64>) -> () @@ -524,7 +524,7 @@ func.func @transpose_i64(%arg0: vector<[2]x[2]xi64>) { // CHECK: %[[VSCALE:.*]] = vector.vscale // CHECK: %[[NUM_TILE_SLICES:.*]] = memref.alloca(%[[VSCALE]], %[[VSCALE]]) : memref // CHECK: arm_sme.tile_store {{.*}} : memref, vector<[1]x[1]xi128> -// CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[1]x[1]xi128> +// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[1]x[1]xi128> func.func @transpose_i128(%arg0: vector<[1]x[1]xi128>) { %0 = vector.transpose %arg0, [1, 0] : vector<[1]x[1]xi128> to vector<[1]x[1]xi128> "prevent.dce"(%0) : (vector<[1]x[1]xi128>) -> () @@ -536,7 +536,7 @@ func.func @transpose_i128(%arg0: vector<[1]x[1]xi128>) { // CHECK-LABEL: @transpose_f16 // CHECK: arith.constant 8 // CHECK: arm_sme.tile_store {{.*}} : memref, vector<[8]x[8]xf16> -// CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[8]x[8]xf16> +// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[8]x[8]xf16> func.func @transpose_f16(%arg0: vector<[8]x[8]xf16>) { %0 = vector.transpose %arg0, [1, 0] : vector<[8]x[8]xf16> to vector<[8]x[8]xf16> "prevent.dce"(%0) : (vector<[8]x[8]xf16>) -> () @@ -548,7 +548,7 @@ func.func @transpose_f16(%arg0: vector<[8]x[8]xf16>) { // CHECK-LABEL: @transpose_bf16 // CHECK: arith.constant 8 // CHECK: arm_sme.tile_store {{.*}} : memref, vector<[8]x[8]xbf16> -// CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[8]x[8]xbf16> +// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[8]x[8]xbf16> func.func @transpose_bf16(%arg0: vector<[8]x[8]xbf16>) { %0 = vector.transpose %arg0, [1, 0] : vector<[8]x[8]xbf16> to vector<[8]x[8]xbf16> "prevent.dce"(%0) : (vector<[8]x[8]xbf16>) -> () @@ -560,7 +560,7 @@ func.func @transpose_bf16(%arg0: vector<[8]x[8]xbf16>) { // CHECK-LABEL: @transpose_f32 // CHECK: arith.constant 4 // CHECK: arm_sme.tile_store {{.*}} : memref, vector<[4]x[4]xf32> -// CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[4]x[4]xf32> +// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[4]x[4]xf32> func.func @transpose_f32(%arg0: vector<[4]x[4]xf32>) { %0 = vector.transpose %arg0, [1, 0] : vector<[4]x[4]xf32> to vector<[4]x[4]xf32> "prevent.dce"(%0) : (vector<[4]x[4]xf32>) -> () @@ -572,7 +572,7 @@ func.func @transpose_f32(%arg0: vector<[4]x[4]xf32>) { // CHECK-LABEL: @transpose_f64 // CHECK: arith.constant 2 // CHECK: arm_sme.tile_store {{.*}} : memref, vector<[2]x[2]xf64> -// CHECK: arm_sme.tile_load {{.*}}, : memref, vector<[2]x[2]xf64> +// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[2]x[2]xf64> func.func @transpose_f64(%arg0: vector<[2]x[2]xf64>) { %0 = vector.transpose %arg0, [1, 0] : vector<[2]x[2]xf64> to vector<[2]x[2]xf64> "prevent.dce"(%0) : (vector<[2]x[2]xf64>) -> () diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-load-vertical.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-load-vertical.mlir index 8c7d8c954d384..179e9fa83662e 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-load-vertical.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-load-vertical.mlir @@ -63,7 +63,7 @@ func.func @entry() { } // Load tile from "mem1" vertically. - %0 = arm_sme.tile_load %mem1[%c0, %c0], : memref, vector<[4]x[4]xi32> + %0 = arm_sme.tile_load %mem1[%c0, %c0] layout : memref, vector<[4]x[4]xi32> // 1. ORIGINAL HORIZONTAL LAYOUT // Dump "mem1". The smallest SVL is 128-bits so the tile will be at least From dad563e3c223a4276c00407eb8fb48dc702540c1 Mon Sep 17 00:00:00 2001 From: chuongg3 Date: Mon, 16 Oct 2023 11:02:03 +0100 Subject: [PATCH 209/720] [AArch64][GlobalISel] Add legalization for G_VECREDUCE_MUL (#68398) --- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 7 + .../GlobalISel/legalizer-info-validation.mir | 4 +- llvm/test/CodeGen/AArch64/aarch64-mulv.ll | 595 ++++++++++++++++++ 3 files changed, 604 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/aarch64-mulv.ll diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index d2f855f407530..ddc27bebb7676 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -895,6 +895,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .clampMaxNumElements(1, s16, 8) .lower(); + getActionDefinitionsBuilder(G_VECREDUCE_MUL) + .clampMaxNumElements(1, s32, 2) + .clampMaxNumElements(1, s16, 4) + .clampMaxNumElements(1, s8, 8) + .scalarize(1) + .lower(); + getActionDefinitionsBuilder( {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR}) // Try to break down into smaller vectors as long as they're at least 64 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 70114f83e8dd6..549f36b2afd06 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -754,8 +754,8 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_VECREDUCE_MUL (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined -# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_VECREDUCE_AND (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected diff --git a/llvm/test/CodeGen/AArch64/aarch64-mulv.ll b/llvm/test/CodeGen/AArch64/aarch64-mulv.ll new file mode 100644 index 0000000000000..995023e80c44b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-mulv.ll @@ -0,0 +1,595 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK_GI: warning: Instruction selection used fallback path for mulv_v3i64 + +declare i8 @llvm.vector.reduce.mul.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.mul.v3i8(<3 x i8>) +declare i8 @llvm.vector.reduce.mul.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.mul.v32i8(<32 x i8>) +declare i16 @llvm.vector.reduce.mul.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.mul.v3i16(<3 x i16>) +declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>) +declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.mul.v3i32(<3 x i32>) +declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>) +declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.mul.v3i64(<3 x i64>) +declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>) +declare i128 @llvm.vector.reduce.mul.v2i128(<2 x i128>) + +define i8 @mulv_v2i8(<2 x i8> %a) { +; CHECK-SD-LABEL: mulv_v2i8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: mul w0, w9, w8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mulv_v2i8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: mul w0, w8, w9 +; CHECK-GI-NEXT: ret +entry: + %arg1 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> %a) + ret i8 %arg1 +} + +define i8 @mulv_v3i8(<3 x i8> %a) { +; CHECK-LABEL: mulv_v3i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul w8, w0, w1 +; CHECK-NEXT: mul w0, w8, w2 +; CHECK-NEXT: ret +entry: + %arg1 = call i8 @llvm.vector.reduce.mul.v3i8(<3 x i8> %a) + ret i8 %arg1 +} + +define i8 @mulv_v4i8(<4 x i8> %a) { +; CHECK-SD-LABEL: mulv_v4i8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: umov w8, v0.h[1] +; CHECK-SD-NEXT: umov w9, v0.h[0] +; CHECK-SD-NEXT: umov w10, v0.h[2] +; CHECK-SD-NEXT: mul w8, w9, w8 +; CHECK-SD-NEXT: umov w9, v0.h[3] +; CHECK-SD-NEXT: mul w8, w8, w10 +; CHECK-SD-NEXT: mul w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mulv_v4i8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov h2, v0.h[2] +; CHECK-GI-NEXT: mov h3, v0.h[3] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: fmov w10, s2 +; CHECK-GI-NEXT: fmov w11, s3 +; CHECK-GI-NEXT: mul w8, w8, w9 +; CHECK-GI-NEXT: mul w9, w10, w11 +; CHECK-GI-NEXT: mul w0, w8, w9 +; CHECK-GI-NEXT: ret +entry: + %arg1 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> %a) + ret i8 %arg1 +} + +define i8 @mulv_v8i8(<8 x i8> %a) { +; CHECK-SD-LABEL: mulv_v8i8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: umov w8, v0.b[1] +; CHECK-SD-NEXT: umov w9, v0.b[0] +; CHECK-SD-NEXT: umov w10, v0.b[2] +; CHECK-SD-NEXT: mul w8, w9, w8 +; CHECK-SD-NEXT: umov w9, v0.b[3] +; CHECK-SD-NEXT: mul w8, w8, w10 +; CHECK-SD-NEXT: umov w10, v0.b[4] +; CHECK-SD-NEXT: mul w8, w8, w9 +; CHECK-SD-NEXT: umov w9, v0.b[5] +; CHECK-SD-NEXT: mul w8, w8, w10 +; CHECK-SD-NEXT: umov w10, v0.b[6] +; CHECK-SD-NEXT: mul w8, w8, w9 +; CHECK-SD-NEXT: umov w9, v0.b[7] +; CHECK-SD-NEXT: mul w8, w8, w10 +; CHECK-SD-NEXT: mul w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mulv_v8i8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov b1, v0.b[1] +; CHECK-GI-NEXT: mov b2, v0.b[2] +; CHECK-GI-NEXT: mov b3, v0.b[3] +; CHECK-GI-NEXT: mov b4, v0.b[4] +; CHECK-GI-NEXT: mov b5, v0.b[5] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: mov b6, v0.b[6] +; CHECK-GI-NEXT: mov b7, v0.b[7] +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: fmov w10, s2 +; CHECK-GI-NEXT: fmov w11, s3 +; CHECK-GI-NEXT: fmov w12, s5 +; CHECK-GI-NEXT: mul w8, w8, w9 +; CHECK-GI-NEXT: fmov w9, s4 +; CHECK-GI-NEXT: mul w10, w10, w11 +; CHECK-GI-NEXT: fmov w11, s6 +; CHECK-GI-NEXT: mul w9, w9, w12 +; CHECK-GI-NEXT: fmov w12, s7 +; CHECK-GI-NEXT: mul w8, w8, w10 +; CHECK-GI-NEXT: mul w11, w11, w12 +; CHECK-GI-NEXT: mul w9, w9, w11 +; CHECK-GI-NEXT: mul w0, w8, w9 +; CHECK-GI-NEXT: ret +entry: + %arg1 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> %a) + ret i8 %arg1 +} + +define i8 @mulv_v16i8(<16 x i8> %a) { +; CHECK-SD-LABEL: mulv_v16i8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: mul v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: umov w8, v0.b[1] +; CHECK-SD-NEXT: umov w9, v0.b[0] +; CHECK-SD-NEXT: umov w10, v0.b[2] +; CHECK-SD-NEXT: mul w8, w9, w8 +; CHECK-SD-NEXT: umov w9, v0.b[3] +; CHECK-SD-NEXT: mul w8, w8, w10 +; CHECK-SD-NEXT: umov w10, v0.b[4] +; CHECK-SD-NEXT: mul w8, w8, w9 +; CHECK-SD-NEXT: umov w9, v0.b[5] +; CHECK-SD-NEXT: mul w8, w8, w10 +; CHECK-SD-NEXT: umov w10, v0.b[6] +; CHECK-SD-NEXT: mul w8, w8, w9 +; CHECK-SD-NEXT: umov w9, v0.b[7] +; CHECK-SD-NEXT: mul w8, w8, w10 +; CHECK-SD-NEXT: mul w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mulv_v16i8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: mul v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: mov b1, v0.b[1] +; CHECK-GI-NEXT: mov b2, v0.b[2] +; CHECK-GI-NEXT: mov b3, v0.b[3] +; CHECK-GI-NEXT: mov b4, v0.b[4] +; CHECK-GI-NEXT: mov b5, v0.b[5] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: mov b6, v0.b[6] +; CHECK-GI-NEXT: mov b7, v0.b[7] +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: fmov w10, s2 +; CHECK-GI-NEXT: fmov w11, s3 +; CHECK-GI-NEXT: fmov w12, s5 +; CHECK-GI-NEXT: mul w8, w8, w9 +; CHECK-GI-NEXT: fmov w9, s4 +; CHECK-GI-NEXT: mul w10, w10, w11 +; CHECK-GI-NEXT: fmov w11, s6 +; CHECK-GI-NEXT: mul w9, w9, w12 +; CHECK-GI-NEXT: fmov w12, s7 +; CHECK-GI-NEXT: mul w8, w8, w10 +; CHECK-GI-NEXT: mul w11, w11, w12 +; CHECK-GI-NEXT: mul w9, w9, w11 +; CHECK-GI-NEXT: mul w0, w8, w9 +; CHECK-GI-NEXT: ret +entry: + %arg1 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %a) + ret i8 %arg1 +} + +define i8 @mulv_v32i8(<32 x i8> %a) { +; CHECK-SD-LABEL: mulv_v32i8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mul v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: mul v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: umov w8, v0.b[1] +; CHECK-SD-NEXT: umov w9, v0.b[0] +; CHECK-SD-NEXT: umov w10, v0.b[2] +; CHECK-SD-NEXT: mul w8, w9, w8 +; CHECK-SD-NEXT: umov w9, v0.b[3] +; CHECK-SD-NEXT: mul w8, w8, w10 +; CHECK-SD-NEXT: umov w10, v0.b[4] +; CHECK-SD-NEXT: mul w8, w8, w9 +; CHECK-SD-NEXT: umov w9, v0.b[5] +; CHECK-SD-NEXT: mul w8, w8, w10 +; CHECK-SD-NEXT: umov w10, v0.b[6] +; CHECK-SD-NEXT: mul w8, w8, w9 +; CHECK-SD-NEXT: umov w9, v0.b[7] +; CHECK-SD-NEXT: mul w8, w8, w10 +; CHECK-SD-NEXT: mul w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mulv_v32i8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: mov d3, v1.d[1] +; CHECK-GI-NEXT: mul v0.8b, v0.8b, v2.8b +; CHECK-GI-NEXT: mul v1.8b, v1.8b, v3.8b +; CHECK-GI-NEXT: mul v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: mov b1, v0.b[1] +; CHECK-GI-NEXT: mov b2, v0.b[2] +; CHECK-GI-NEXT: mov b3, v0.b[3] +; CHECK-GI-NEXT: mov b4, v0.b[4] +; CHECK-GI-NEXT: mov b5, v0.b[5] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: mov b6, v0.b[6] +; CHECK-GI-NEXT: mov b7, v0.b[7] +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: fmov w10, s2 +; CHECK-GI-NEXT: fmov w11, s3 +; CHECK-GI-NEXT: fmov w12, s5 +; CHECK-GI-NEXT: mul w8, w8, w9 +; CHECK-GI-NEXT: fmov w9, s4 +; CHECK-GI-NEXT: mul w10, w10, w11 +; CHECK-GI-NEXT: fmov w11, s6 +; CHECK-GI-NEXT: mul w9, w9, w12 +; CHECK-GI-NEXT: fmov w12, s7 +; CHECK-GI-NEXT: mul w8, w8, w10 +; CHECK-GI-NEXT: mul w11, w11, w12 +; CHECK-GI-NEXT: mul w9, w9, w11 +; CHECK-GI-NEXT: mul w0, w8, w9 +; CHECK-GI-NEXT: ret +entry: + %arg1 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> %a) + ret i8 %arg1 +} + +define i16 @mulv_v2i16(<2 x i16> %a) { +; CHECK-SD-LABEL: mulv_v2i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: mul w0, w9, w8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mulv_v2i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: mul w0, w8, w9 +; CHECK-GI-NEXT: ret +entry: + %arg1 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> %a) + ret i16 %arg1 +} + +define i16 @mulv_v3i16(<3 x i16> %a) { +; CHECK-SD-LABEL: mulv_v3i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: umov w8, v0.h[1] +; CHECK-SD-NEXT: umov w9, v0.h[0] +; CHECK-SD-NEXT: umov w10, v0.h[2] +; CHECK-SD-NEXT: mul w8, w9, w8 +; CHECK-SD-NEXT: mul w0, w8, w10 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mulv_v3i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov h2, v0.h[2] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: mul w8, w8, w9 +; CHECK-GI-NEXT: fmov w9, s2 +; CHECK-GI-NEXT: mul w0, w8, w9 +; CHECK-GI-NEXT: ret +entry: + %arg1 = call i16 @llvm.vector.reduce.mul.v3i16(<3 x i16> %a) + ret i16 %arg1 +} + +define i16 @mulv_v4i16(<4 x i16> %a) { +; CHECK-SD-LABEL: mulv_v4i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: umov w8, v0.h[1] +; CHECK-SD-NEXT: umov w9, v0.h[0] +; CHECK-SD-NEXT: umov w10, v0.h[2] +; CHECK-SD-NEXT: mul w8, w9, w8 +; CHECK-SD-NEXT: umov w9, v0.h[3] +; CHECK-SD-NEXT: mul w8, w8, w10 +; CHECK-SD-NEXT: mul w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mulv_v4i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov h2, v0.h[2] +; CHECK-GI-NEXT: mov h3, v0.h[3] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: fmov w10, s2 +; CHECK-GI-NEXT: fmov w11, s3 +; CHECK-GI-NEXT: mul w8, w8, w9 +; CHECK-GI-NEXT: mul w9, w10, w11 +; CHECK-GI-NEXT: mul w0, w8, w9 +; CHECK-GI-NEXT: ret +entry: + %arg1 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> %a) + ret i16 %arg1 +} + +define i16 @mulv_v8i16(<8 x i16> %a) { +; CHECK-SD-LABEL: mulv_v8i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: umov w8, v0.h[1] +; CHECK-SD-NEXT: umov w9, v0.h[0] +; CHECK-SD-NEXT: umov w10, v0.h[2] +; CHECK-SD-NEXT: mul w8, w9, w8 +; CHECK-SD-NEXT: umov w9, v0.h[3] +; CHECK-SD-NEXT: mul w8, w8, w10 +; CHECK-SD-NEXT: mul w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mulv_v8i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov h2, v0.h[2] +; CHECK-GI-NEXT: mov h3, v0.h[3] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: fmov w10, s2 +; CHECK-GI-NEXT: fmov w11, s3 +; CHECK-GI-NEXT: mul w8, w8, w9 +; CHECK-GI-NEXT: mul w9, w10, w11 +; CHECK-GI-NEXT: mul w0, w8, w9 +; CHECK-GI-NEXT: ret +entry: + %arg1 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %a) + ret i16 %arg1 +} + +define i16 @mulv_v16i16(<16 x i16> %a) { +; CHECK-SD-LABEL: mulv_v16i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mul v0.8h, v0.8h, v1.8h +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: umov w8, v0.h[1] +; CHECK-SD-NEXT: umov w9, v0.h[0] +; CHECK-SD-NEXT: umov w10, v0.h[2] +; CHECK-SD-NEXT: mul w8, w9, w8 +; CHECK-SD-NEXT: umov w9, v0.h[3] +; CHECK-SD-NEXT: mul w8, w8, w10 +; CHECK-SD-NEXT: mul w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mulv_v16i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: mov d3, v1.d[1] +; CHECK-GI-NEXT: mul v0.4h, v0.4h, v2.4h +; CHECK-GI-NEXT: mul v1.4h, v1.4h, v3.4h +; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov h2, v0.h[2] +; CHECK-GI-NEXT: mov h3, v0.h[3] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: fmov w10, s2 +; CHECK-GI-NEXT: mul w8, w8, w9 +; CHECK-GI-NEXT: fmov w9, s3 +; CHECK-GI-NEXT: mul w9, w10, w9 +; CHECK-GI-NEXT: mul w0, w8, w9 +; CHECK-GI-NEXT: ret +entry: + %arg1 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> %a) + ret i16 %arg1 +} + +define i32 @mulv_v2i32(<2 x i32> %a) { +; CHECK-SD-LABEL: mulv_v2i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: mul w0, w9, w8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mulv_v2i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: mul w0, w8, w9 +; CHECK-GI-NEXT: ret +entry: + %arg1 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %a) + ret i32 %arg1 +} + +define i32 @mulv_v3i32(<3 x i32> %a) { +; CHECK-LABEL: mulv_v3i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: mov v1.s[3], w8 +; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: mul v0.2s, v0.2s, v1.2s +; CHECK-NEXT: mov w8, v0.s[1] +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mul w0, w9, w8 +; CHECK-NEXT: ret +entry: + %arg1 = call i32 @llvm.vector.reduce.mul.v3i32(<3 x i32> %a) + ret i32 %arg1 +} + +define i32 @mulv_v4i32(<4 x i32> %a) { +; CHECK-SD-LABEL: mulv_v4i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: mul w0, w9, w8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mulv_v4i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: mul v0.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: mul w0, w8, w9 +; CHECK-GI-NEXT: ret +entry: + %arg1 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %a) + ret i32 %arg1 +} + +define i32 @mulv_v8i32(<8 x i32> %a) { +; CHECK-SD-LABEL: mulv_v8i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: mul w0, w9, w8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mulv_v8i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: mov d3, v1.d[1] +; CHECK-GI-NEXT: mul v0.2s, v0.2s, v2.2s +; CHECK-GI-NEXT: mul v1.2s, v1.2s, v3.2s +; CHECK-GI-NEXT: mul v0.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: mul w0, w8, w9 +; CHECK-GI-NEXT: ret +entry: + %arg1 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %a) + ret i32 %arg1 +} + +define i64 @mulv_v2i64(<2 x i64> %a) { +; CHECK-SD-LABEL: mulv_v2i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mov x8, v0.d[1] +; CHECK-SD-NEXT: fmov x9, d0 +; CHECK-SD-NEXT: mul x0, x9, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mulv_v2i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: fmov x9, d1 +; CHECK-GI-NEXT: mul x0, x8, x9 +; CHECK-GI-NEXT: ret +entry: + %arg1 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %a) + ret i64 %arg1 +} + +define i64 @mulv_v3i64(<3 x i64> %a) { +; CHECK-SD-LABEL: mulv_v3i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: fmov x8, d2 +; CHECK-SD-NEXT: fmov x9, d0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: mul x8, x9, x8 +; CHECK-SD-NEXT: fmov x9, d1 +; CHECK-SD-NEXT: mul x0, x9, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mulv_v3i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: fmov x9, d1 +; CHECK-GI-NEXT: mul x8, x8, x9 +; CHECK-GI-NEXT: fmov x9, d2 +; CHECK-GI-NEXT: mul x0, x8, x9 +; CHECK-GI-NEXT: ret +entry: + %arg1 = call i64 @llvm.vector.reduce.mul.v3i64(<3 x i64> %a) + ret i64 %arg1 +} + +define i64 @mulv_v4i64(<4 x i64> %a) { +; CHECK-SD-LABEL: mulv_v4i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mov x8, v1.d[1] +; CHECK-SD-NEXT: mov x9, v0.d[1] +; CHECK-SD-NEXT: fmov x10, d0 +; CHECK-SD-NEXT: mul x8, x9, x8 +; CHECK-SD-NEXT: fmov x9, d1 +; CHECK-SD-NEXT: mul x9, x10, x9 +; CHECK-SD-NEXT: mul x0, x9, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mulv_v4i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: mov d3, v1.d[1] +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: fmov x9, d2 +; CHECK-GI-NEXT: fmov x10, d3 +; CHECK-GI-NEXT: mul x8, x8, x9 +; CHECK-GI-NEXT: fmov x9, d1 +; CHECK-GI-NEXT: mul x9, x9, x10 +; CHECK-GI-NEXT: mul x0, x8, x9 +; CHECK-GI-NEXT: ret +entry: + %arg1 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> %a) + ret i64 %arg1 +} + +define i128 @mulv_v2i128(<2 x i128> %a) { +; CHECK-SD-LABEL: mulv_v2i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: umulh x8, x0, x2 +; CHECK-SD-NEXT: madd x8, x0, x3, x8 +; CHECK-SD-NEXT: mul x0, x0, x2 +; CHECK-SD-NEXT: madd x1, x1, x2, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mulv_v2i128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mul x9, x0, x3 +; CHECK-GI-NEXT: umulh x8, x0, x2 +; CHECK-GI-NEXT: madd x9, x1, x2, x9 +; CHECK-GI-NEXT: mul x0, x0, x2 +; CHECK-GI-NEXT: add x1, x9, x8 +; CHECK-GI-NEXT: ret +entry: + %arg1 = call i128 @llvm.vector.reduce.mul.v2i128(<2 x i128> %a) + ret i128 %arg1 +} From 1d43096e16ff7288c7feac1ae81fd4f745ce10bb Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 16 Oct 2023 12:03:49 +0200 Subject: [PATCH 210/720] [ConstraintElim] Don't decompose values wider than 64 bits (#68803) Our coefficients are 64-bits, so adding/multiplying them can wrap in 64-bits even if there would be no wrapping the full bit width. The alternative would be to check for overflows during all adds/muls in decomposition. I assume that we don't particularly care about handling wide integers here, so I've opted to bail out. Fixes https://github.com/llvm/llvm-project/issues/68751. --- .../Transforms/Scalar/ConstraintElimination.cpp | 16 +++++++++++++--- .../ConstraintElimination/large-constant-ints.ll | 8 +++++--- .../test/Transforms/ConstraintElimination/shl.ll | 3 ++- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 1eb7e481d43cd..37f720ec40f4e 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -451,6 +451,19 @@ static Decomposition decompose(Value *V, return ResA; }; + Type *Ty = V->getType()->getScalarType(); + if (Ty->isPointerTy() && !IsSigned) { + if (auto *GEP = dyn_cast(V)) + return decomposeGEP(*GEP, Preconditions, IsSigned, DL); + return V; + } + + // Don't handle integers > 64 bit. Our coefficients are 64-bit large, so + // coefficient add/mul may wrap, while the operation in the full bit width + // would not. + if (!Ty->isIntegerTy() || Ty->getIntegerBitWidth() > 64) + return V; + // Decompose \p V used with a signed predicate. if (IsSigned) { if (auto *CI = dyn_cast(V)) { @@ -478,9 +491,6 @@ static Decomposition decompose(Value *V, return int64_t(CI->getZExtValue()); } - if (auto *GEP = dyn_cast(V)) - return decomposeGEP(*GEP, Preconditions, IsSigned, DL); - Value *Op0; bool IsKnownNonNegative = false; if (match(V, m_ZExt(m_Value(Op0)))) { diff --git a/llvm/test/Transforms/ConstraintElimination/large-constant-ints.ll b/llvm/test/Transforms/ConstraintElimination/large-constant-ints.ll index 6b616aa700330..9568b155af13a 100644 --- a/llvm/test/Transforms/ConstraintElimination/large-constant-ints.ll +++ b/llvm/test/Transforms/ConstraintElimination/large-constant-ints.ll @@ -96,6 +96,7 @@ else: ret i1 false } +; TODO: This could be folded. define i1 @sub_decomp_i80(i80 %a) { ; CHECK-LABEL: @sub_decomp_i80( ; CHECK-NEXT: entry: @@ -104,7 +105,8 @@ define i1 @sub_decomp_i80(i80 %a) { ; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[SUB_1:%.*]] = sub nuw i80 [[A]], 1973801615886922022913 -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[C_1:%.*]] = icmp ult i80 [[SUB_1]], 1346612317380797267967 +; CHECK-NEXT: ret i1 [[C_1]] ; CHECK: else: ; CHECK-NEXT: ret i1 false ; @@ -418,12 +420,12 @@ entry: ret i1 %res } -; FIXME: This is a miscompile. define i1 @pr68751(i128 %arg) { ; CHECK-LABEL: @pr68751( ; CHECK-NEXT: [[SHL1:%.*]] = shl nuw nsw i128 [[ARG:%.*]], 32 ; CHECK-NEXT: [[SHL2:%.*]] = shl nuw nsw i128 [[SHL1]], 32 -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i128 [[SHL2]], 0 +; CHECK-NEXT: ret i1 [[CMP]] ; %shl1 = shl nuw nsw i128 %arg, 32 %shl2 = shl nuw nsw i128 %shl1, 32 diff --git a/llvm/test/Transforms/ConstraintElimination/shl.ll b/llvm/test/Transforms/ConstraintElimination/shl.ll index 982e0e7458333..9f98a9d3a57ca 100644 --- a/llvm/test/Transforms/ConstraintElimination/shl.ll +++ b/llvm/test/Transforms/ConstraintElimination/shl.ll @@ -1277,7 +1277,8 @@ define i1 @shl_55() { ; CHECK-LABEL: @shl_55( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SHL_UB:%.*]] = shl nuw nsw i256 1, 55 -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[SHL_CMP:%.*]] = icmp uge i256 [[SHL_UB]], 1 +; CHECK-NEXT: ret i1 [[SHL_CMP]] ; entry: %shl.ub = shl nuw nsw i256 1, 55 From 17fce286834344a5379288b68068224af74d51f0 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Mon, 16 Oct 2023 10:13:47 +0000 Subject: [PATCH 211/720] [lldb][DYLD][NFC] Dedupe calls to CreateBreakpoint These only differ in the modules passed to them. Also I've swapped the if order so we have the "positive" check first. --- .../POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp | 31 +++++++------------ 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp index 85d7ae9dac75d..c427b476089e4 100644 --- a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp +++ b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp @@ -337,29 +337,20 @@ bool DynamicLoaderPOSIXDYLD::SetRendezvousBreakpoint() { }; ModuleSP interpreter = LoadInterpreterModule(); - if (!interpreter) { - FileSpecList containingModules; + FileSpecList containingModules; + if (interpreter) + containingModules.Append(interpreter->GetFileSpec()); + else containingModules.Append( m_process->GetTarget().GetExecutableModulePointer()->GetFileSpec()); - dyld_break = target.CreateBreakpoint( - &containingModules, /*containingSourceFiles=*/nullptr, - DebugStateCandidates, eFunctionNameTypeFull, eLanguageTypeC, - /*m_offset=*/0, - /*skip_prologue=*/eLazyBoolNo, - /*internal=*/true, - /*request_hardware=*/false); - } else { - FileSpecList containingModules; - containingModules.Append(interpreter->GetFileSpec()); - dyld_break = target.CreateBreakpoint( - &containingModules, /*containingSourceFiles=*/nullptr, - DebugStateCandidates, eFunctionNameTypeFull, eLanguageTypeC, - /*m_offset=*/0, - /*skip_prologue=*/eLazyBoolNo, - /*internal=*/true, - /*request_hardware=*/false); - } + dyld_break = target.CreateBreakpoint( + &containingModules, /*containingSourceFiles=*/nullptr, + DebugStateCandidates, eFunctionNameTypeFull, eLanguageTypeC, + /*m_offset=*/0, + /*skip_prologue=*/eLazyBoolNo, + /*internal=*/true, + /*request_hardware=*/false); } if (dyld_break->GetNumResolvedLocations() != 1) { From a72d88fb4f65dd1f6a44f964245ee1002711735b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 16 Oct 2023 12:11:02 +0200 Subject: [PATCH 212/720] Revert "Reapply [Verifier] Sanity check alloca size against DILocalVariable fragment size" This reverts commit 8840da2db237cd714d975c199d5992945d2b71e9. This results in verifier failures during LTO, see #68929. --- llvm/lib/IR/Verifier.cpp | 14 -- .../CodeGen/ARM/stack-frame-layout-remarks.ll | 20 +-- llvm/test/CodeGen/BPF/warn-stack.ll | 6 +- .../X86/stack-frame-layout-remarks-64.ll | 76 ---------- ...ks-32.ll => stack-frame-layout-remarks.ll} | 44 ++++-- .../DebugInfo/{X86 => Generic}/PR20038.ll | 5 +- .../DebugInfo/Generic/discriminated-union.ll | 4 +- .../DebugInfo/Generic/dwarf-public-names.ll | 137 ++++++++++++++++++ .../{X86 => Generic}/member-order.ll | 2 +- .../{X86 => Generic}/tu-composite.ll | 2 +- .../Generic/univariant-discriminated-union.ll | 4 +- llvm/test/DebugInfo/X86/fi-piece.ll | 5 +- llvm/test/DebugInfo/invalid-sizes.ll | 80 ---------- llvm/test/Linker/type-unique-odr-a.ll | 4 +- llvm/test/Linker/type-unique-odr-b.ll | 2 - llvm/test/Linker/type-unique-simple2-a.ll | 4 +- llvm/test/Linker/type-unique-simple2-b.ll | 2 - .../dbg-scalable-store-fixed-frag.ll | 6 +- .../InstCombine/dbg-simplify-alloca-size.ll | 4 +- llvm/test/Transforms/Util/dbg-user-of-aext.ll | 8 +- 20 files changed, 204 insertions(+), 225 deletions(-) delete mode 100644 llvm/test/CodeGen/X86/stack-frame-layout-remarks-64.ll rename llvm/test/CodeGen/X86/{stack-frame-layout-remarks-32.ll => stack-frame-layout-remarks.ll} (91%) rename llvm/test/DebugInfo/{X86 => Generic}/PR20038.ll (96%) create mode 100644 llvm/test/DebugInfo/Generic/dwarf-public-names.ll rename llvm/test/DebugInfo/{X86 => Generic}/member-order.ll (96%) rename llvm/test/DebugInfo/{X86 => Generic}/tu-composite.ll (99%) delete mode 100644 llvm/test/DebugInfo/invalid-sizes.ll diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 188e4a4a658f3..5a3328416db3e 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6318,20 +6318,6 @@ void Verifier::visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII) { CheckDI(isType(Var->getRawType()), "invalid type ref", Var, Var->getRawType()); verifyFnArgs(DII); - - if (auto *Declare = dyn_cast(&DII)) { - if (auto *Alloca = dyn_cast_or_null(Declare->getAddress())) { - DIExpression *Expr = Declare->getExpression(); - std::optional FragSize = Declare->getFragmentSizeInBits(); - std::optional AllocSize = Alloca->getAllocationSizeInBits(DL); - if (FragSize && AllocSize && !AllocSize->isScalable() && - !Expr->isComplex()) { - CheckDI(*FragSize <= AllocSize->getFixedValue(), - "llvm.dbg.declare has larger fragment size than alloca size ", - &DII); - } - } - } } void Verifier::visitDbgLabelIntrinsic(StringRef Kind, DbgLabelInst &DLI) { diff --git a/llvm/test/CodeGen/ARM/stack-frame-layout-remarks.ll b/llvm/test/CodeGen/ARM/stack-frame-layout-remarks.ll index 53e09216abee5..c76dc24bae7e8 100644 --- a/llvm/test/CodeGen/ARM/stack-frame-layout-remarks.ll +++ b/llvm/test/CodeGen/ARM/stack-frame-layout-remarks.ll @@ -236,19 +236,19 @@ attributes #2 = { ssp "stack-protector-buffer-size"="5" "frame-pointer"="all" } !2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !4, splitDebugInlining: false, nameTableKind: None) !3 = !DIFile(filename: "dot.c", directory: "") !4 = !{!5, !6, !10, !13} -!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 32) -!6 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 32) -!7 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Array", file: !3, line: 3, size: 64, elements: !8) +!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64) +!6 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64) +!7 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Array", file: !3, line: 3, size: 128, elements: !8) !8 = !{!9, !12} -!9 = !DIDerivedType(tag: DW_TAG_member, name: "data", scope: !7, file: !3, line: 4, baseType: !10, size: 32) -!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 32) +!9 = !DIDerivedType(tag: DW_TAG_member, name: "data", scope: !7, file: !3, line: 4, baseType: !10, size: 64) +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) !11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!12 = !DIDerivedType(tag: DW_TAG_member, name: "size", scope: !7, file: !3, line: 5, baseType: !11, size: 32, offset: 32) -!13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !14, size: 32) -!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Result", file: !3, line: 8, size: 64, elements: !15) +!12 = !DIDerivedType(tag: DW_TAG_member, name: "size", scope: !7, file: !3, line: 5, baseType: !11, size: 32, offset: 64) +!13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !14, size: 64) +!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Result", file: !3, line: 8, size: 128, elements: !15) !15 = !{!16, !17} -!16 = !DIDerivedType(tag: DW_TAG_member, name: "data", scope: !14, file: !3, line: 9, baseType: !6, size: 32) -!17 = !DIDerivedType(tag: DW_TAG_member, name: "sum", scope: !14, file: !3, line: 10, baseType: !11, size: 32, offset: 32) +!16 = !DIDerivedType(tag: DW_TAG_member, name: "data", scope: !14, file: !3, line: 9, baseType: !6, size: 64) +!17 = !DIDerivedType(tag: DW_TAG_member, name: "sum", scope: !14, file: !3, line: 10, baseType: !11, size: 32, offset: 64) !18 = !{i32 7, !"Dwarf Version", i32 5} !19 = !{i32 2, !"Debug Info Version", i32 3} !20 = !{i32 1, !"wchar_size", i32 4} diff --git a/llvm/test/CodeGen/BPF/warn-stack.ll b/llvm/test/CodeGen/BPF/warn-stack.ll index 14d0f16f0f57b..807e196b926d9 100644 --- a/llvm/test/CodeGen/BPF/warn-stack.ll +++ b/llvm/test/CodeGen/BPF/warn-stack.ll @@ -2,11 +2,11 @@ ;; CHECK-NOT: nowarn define void @nowarn() local_unnamed_addr #0 !dbg !6 { - %1 = alloca [511 x i8], align 1 - call void @llvm.lifetime.start.p0(i64 511, ptr nonnull %1) #4, !dbg !15 + %1 = alloca [504 x i8], align 1 + call void @llvm.lifetime.start.p0(i64 504, ptr nonnull %1) #4, !dbg !15 tail call void @llvm.dbg.declare(metadata ptr %1, metadata !10, metadata !16), !dbg !17 call void @doit(ptr nonnull %1) #4, !dbg !18 - call void @llvm.lifetime.end.p0(i64 511, ptr nonnull %1) #4, !dbg !19 + call void @llvm.lifetime.end.p0(i64 504, ptr nonnull %1) #4, !dbg !19 ret void, !dbg !19 } diff --git a/llvm/test/CodeGen/X86/stack-frame-layout-remarks-64.ll b/llvm/test/CodeGen/X86/stack-frame-layout-remarks-64.ll deleted file mode 100644 index f4b00e3132864..0000000000000 --- a/llvm/test/CodeGen/X86/stack-frame-layout-remarks-64.ll +++ /dev/null @@ -1,76 +0,0 @@ -; Test remark output for stack-frame-layout - -; ensure basic output works -; RUN: llc -mcpu=corei7 -O1 -pass-remarks-analysis=stack-frame-layout < %s 2>&1 >/dev/null | FileCheck %s - -; check additional slots are displayed when stack is not optimized -; RUN: llc -mcpu=corei7 -O0 -pass-remarks-analysis=stack-frame-layout < %s 2>&1 >/dev/null | FileCheck %s --check-prefix=NO_COLORING - -target triple = "x86_64-unknown-linux-gnu" - -@.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1 -declare i32 @printf(ptr, ...) - -; CHECK: Function: stackSizeWarning -; CHECK: Offset: [SP-88], Type: Variable, Align: 16, Size: 80 -; CHECK: buffer @ frame-diags.c:30 -; NO_COLORING: Offset: [SP-168], Type: Variable, Align: 16, Size: 80 -; CHECK: buffer2 @ frame-diags.c:33 -define void @stackSizeWarning() { -entry: - %buffer = alloca [80 x i8], align 16 - %buffer2 = alloca [80 x i8], align 16 - call void @llvm.dbg.declare(metadata ptr %buffer, metadata !25, metadata !DIExpression()), !dbg !39 - call void @llvm.dbg.declare(metadata ptr %buffer2, metadata !31, metadata !DIExpression()), !dbg !40 - ret void -} - -; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare void @llvm.dbg.declare(metadata, metadata, metadata) #0 - -attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } - -!llvm.dbg.cu = !{!0, !2} -!llvm.module.flags = !{!18, !19, !20, !21, !22, !23, !24} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) -!1 = !DIFile(filename: "frame-diags.c", directory: "") -!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !4, splitDebugInlining: false, nameTableKind: None) -!3 = !DIFile(filename: "dot.c", directory: "") -!4 = !{!5, !6, !10, !13} -!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64) -!6 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64) -!7 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Array", file: !3, line: 3, size: 128, elements: !8) -!8 = !{!9, !12} -!9 = !DIDerivedType(tag: DW_TAG_member, name: "data", scope: !7, file: !3, line: 4, baseType: !10, size: 64) -!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) -!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!12 = !DIDerivedType(tag: DW_TAG_member, name: "size", scope: !7, file: !3, line: 5, baseType: !11, size: 32, offset: 64) -!13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !14, size: 64) -!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Result", file: !3, line: 8, size: 128, elements: !15) -!15 = !{!16, !17} -!16 = !DIDerivedType(tag: DW_TAG_member, name: "data", scope: !14, file: !3, line: 9, baseType: !6, size: 64) -!17 = !DIDerivedType(tag: DW_TAG_member, name: "sum", scope: !14, file: !3, line: 10, baseType: !11, size: 32, offset: 64) -!18 = !{i32 7, !"Dwarf Version", i32 5} -!19 = !{i32 2, !"Debug Info Version", i32 3} -!20 = !{i32 1, !"wchar_size", i32 4} -!21 = !{i32 8, !"PIC Level", i32 2} -!22 = !{i32 7, !"PIE Level", i32 2} -!23 = !{i32 7, !"uwtable", i32 2} -!24 = !{i32 7, !"frame-pointer", i32 2} -!25 = !DILocalVariable(name: "buffer", scope: !26, file: !1, line: 30, type: !32) -!26 = distinct !DILexicalBlock(scope: !27, file: !1, line: 29, column: 3) -!27 = distinct !DISubprogram(name: "stackSizeWarning", scope: !1, file: !1, line: 28, type: !28, scopeLine: 28, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !30) -!28 = !DISubroutineType(types: !29) -!29 = !{null} -!30 = !{!25, !31, !36, !37} -!31 = !DILocalVariable(name: "buffer2", scope: !27, file: !1, line: 33, type: !32) -!32 = !DICompositeType(tag: DW_TAG_array_type, baseType: !33, size: 640, elements: !34) -!33 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) -!34 = !{!35} -!35 = !DISubrange(count: 80) -!36 = !DILocalVariable(name: "a", scope: !27, file: !1, line: 34, type: !11) -!37 = !DILocalVariable(name: "b", scope: !27, file: !1, line: 35, type: !38) -!38 = !DIBasicType(name: "long", size: 64, encoding: DW_ATE_signed) -!39 = !DILocation(line: 30, column: 10, scope: !26) -!40 = !DILocation(line: 33, column: 8, scope: !27) diff --git a/llvm/test/CodeGen/X86/stack-frame-layout-remarks-32.ll b/llvm/test/CodeGen/X86/stack-frame-layout-remarks.ll similarity index 91% rename from llvm/test/CodeGen/X86/stack-frame-layout-remarks-32.ll rename to llvm/test/CodeGen/X86/stack-frame-layout-remarks.ll index dda5459149167..d32a37efcb5a4 100644 --- a/llvm/test/CodeGen/X86/stack-frame-layout-remarks-32.ll +++ b/llvm/test/CodeGen/X86/stack-frame-layout-remarks.ll @@ -1,16 +1,36 @@ ; Test remark output for stack-frame-layout +; ensure basic output works +; RUN: llc -mcpu=corei7 -O1 -pass-remarks-analysis=stack-frame-layout < %s 2>&1 >/dev/null | FileCheck %s + +; check additional slots are displayed when stack is not optimized +; RUN: llc -mcpu=corei7 -O0 -pass-remarks-analysis=stack-frame-layout < %s 2>&1 >/dev/null | FileCheck %s --check-prefix=NO_COLORING + ; check more complex cases ; RUN: llc %s -pass-remarks-analysis=stack-frame-layout -o /dev/null --march=x86 -mcpu=i386 2>&1 | FileCheck %s --check-prefix=BOTH --check-prefix=DEBUG ; check output without debug info ; RUN: opt %s -passes=strip -S | llc -pass-remarks-analysis=stack-frame-layout -o /dev/null --march=x86 -mcpu=i386 2>&1 | FileCheck %s --check-prefix=BOTH --check-prefix=STRIPPED -target triple = "i386-unknown-linux-gnu" +target triple = "x86_64-unknown-linux-gnu" @.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1 declare i32 @printf(ptr, ...) +; CHECK: Function: stackSizeWarning +; CHECK: Offset: [SP-88], Type: Variable, Align: 16, Size: 80 +; CHECK: buffer @ frame-diags.c:30 +; NO_COLORING: Offset: [SP-168], Type: Variable, Align: 16, Size: 80 +; CHECK: buffer2 @ frame-diags.c:33 +define void @stackSizeWarning() { +entry: + %buffer = alloca [80 x i8], align 16 + %buffer2 = alloca [80 x i8], align 16 + call void @llvm.dbg.declare(metadata ptr %buffer, metadata !25, metadata !DIExpression()), !dbg !39 + call void @llvm.dbg.declare(metadata ptr %buffer2, metadata !31, metadata !DIExpression()), !dbg !40 + ret void +} + ; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn declare void @llvm.dbg.declare(metadata, metadata, metadata) #0 @@ -188,7 +208,7 @@ entry: } ; uselistorder directives -uselistorder ptr @llvm.dbg.declare, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 16 } +uselistorder ptr @llvm.dbg.declare, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 18 } attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn } attributes #1 = { "frame-pointer"="all" } @@ -202,19 +222,19 @@ attributes #2 = { ssp "stack-protector-buffer-size"="5" "frame-pointer"="all" } !2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !4, splitDebugInlining: false, nameTableKind: None) !3 = !DIFile(filename: "dot.c", directory: "") !4 = !{!5, !6, !10, !13} -!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 32) -!6 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 32) -!7 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Array", file: !3, line: 3, size: 64, elements: !8) +!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64) +!6 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64) +!7 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Array", file: !3, line: 3, size: 128, elements: !8) !8 = !{!9, !12} -!9 = !DIDerivedType(tag: DW_TAG_member, name: "data", scope: !7, file: !3, line: 4, baseType: !10, size: 32) -!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 32) +!9 = !DIDerivedType(tag: DW_TAG_member, name: "data", scope: !7, file: !3, line: 4, baseType: !10, size: 64) +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) !11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!12 = !DIDerivedType(tag: DW_TAG_member, name: "size", scope: !7, file: !3, line: 5, baseType: !11, size: 32, offset: 32) -!13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !14, size: 32) -!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Result", file: !3, line: 8, size: 64, elements: !15) +!12 = !DIDerivedType(tag: DW_TAG_member, name: "size", scope: !7, file: !3, line: 5, baseType: !11, size: 32, offset: 64) +!13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !14, size: 64) +!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Result", file: !3, line: 8, size: 128, elements: !15) !15 = !{!16, !17} -!16 = !DIDerivedType(tag: DW_TAG_member, name: "data", scope: !14, file: !3, line: 9, baseType: !6, size: 32) -!17 = !DIDerivedType(tag: DW_TAG_member, name: "sum", scope: !14, file: !3, line: 10, baseType: !11, size: 32, offset: 32) +!16 = !DIDerivedType(tag: DW_TAG_member, name: "data", scope: !14, file: !3, line: 9, baseType: !6, size: 64) +!17 = !DIDerivedType(tag: DW_TAG_member, name: "sum", scope: !14, file: !3, line: 10, baseType: !11, size: 32, offset: 64) !18 = !{i32 7, !"Dwarf Version", i32 5} !19 = !{i32 2, !"Debug Info Version", i32 3} !20 = !{i32 1, !"wchar_size", i32 4} diff --git a/llvm/test/DebugInfo/X86/PR20038.ll b/llvm/test/DebugInfo/Generic/PR20038.ll similarity index 96% rename from llvm/test/DebugInfo/X86/PR20038.ll rename to llvm/test/DebugInfo/Generic/PR20038.ll index 0879cd1680fff..024a6abf0591a 100644 --- a/llvm/test/DebugInfo/X86/PR20038.ll +++ b/llvm/test/DebugInfo/Generic/PR20038.ll @@ -1,4 +1,7 @@ -; RUN: %llc_dwarf -mtriple=x86_64-unknown-linux-gnu -O0 -filetype=obj -dwarf-linkage-names=All < %s | llvm-dwarfdump -debug-info - | FileCheck %s --implicit-check-not=DW_TAG +; For some reason, the output when targetting sparc is not quite as expected. +; XFAIL: target=sparc{{.*}} + +; RUN: %llc_dwarf -O0 -filetype=obj -dwarf-linkage-names=All < %s | llvm-dwarfdump -debug-info - | FileCheck %s --implicit-check-not=DW_TAG ; IR generated from clang -O0 with: ; struct C { diff --git a/llvm/test/DebugInfo/Generic/discriminated-union.ll b/llvm/test/DebugInfo/Generic/discriminated-union.ll index 6a42fea338039..0acf478f653ae 100644 --- a/llvm/test/DebugInfo/Generic/discriminated-union.ll +++ b/llvm/test/DebugInfo/Generic/discriminated-union.ll @@ -25,14 +25,14 @@ ; CHECK: DW_AT_alignment ; CHECK: DW_AT_data_member_location [DW_FORM_data1] (0x00) -%F = type { [0 x i8], i64, [8 x i8] } +%F = type { [0 x i8], ptr, [8 x i8] } %"F::Nope" = type {} define internal void @_ZN2e34main17h934ff72f9a38d4bbE() unnamed_addr #0 !dbg !5 { start: %qq = alloca %F, align 8 call void @llvm.dbg.declare(metadata ptr %qq, metadata !10, metadata !28), !dbg !29 - store i64 0, ptr %qq, !dbg !29 + store ptr null, ptr %qq, !dbg !29 ret void, !dbg !30 } diff --git a/llvm/test/DebugInfo/Generic/dwarf-public-names.ll b/llvm/test/DebugInfo/Generic/dwarf-public-names.ll new file mode 100644 index 0000000000000..bcb16172b7bcf --- /dev/null +++ b/llvm/test/DebugInfo/Generic/dwarf-public-names.ll @@ -0,0 +1,137 @@ +; RUN: %llc_dwarf -debugger-tune=gdb -filetype=obj -o %t.o < %s +; RUN: llvm-dwarfdump -debug-pubnames %t.o | FileCheck %s +; ModuleID = 'dwarf-public-names.cpp' +; +; Generated from: +; +; struct C { +; void member_function(); +; static int static_member_function(); +; static int static_member_variable; +; }; +; +; int C::static_member_variable = 0; +; +; void C::member_function() { +; static_member_variable = 0; +; } +; +; int C::static_member_function() { +; return static_member_variable; +; } +; +; C global_variable; +; +; int global_function() { +; return -1; +; } +; +; namespace ns { +; void global_namespace_function() { +; global_variable.member_function(); +; } +; int global_namespace_variable = 1; +; } + +; Skip the output to the header of the pubnames section. +; CHECK: debug_pubnames +; CHECK: version = 0x0002 + +; Check for each name in the output. +; CHECK-DAG: "ns" +; CHECK-DAG: "C::static_member_function" +; CHECK-DAG: "global_variable" +; CHECK-DAG: "ns::global_namespace_variable" +; CHECK-DAG: "ns::global_namespace_function" +; CHECK-DAG: "global_function" +; CHECK-DAG: "C::static_member_variable" +; CHECK-DAG: "C::member_function" + +source_filename = "test/DebugInfo/Generic/dwarf-public-names.ll" + +%struct.C = type { i8 } + +@_ZN1C22static_member_variableE = global i32 0, align 4, !dbg !0 +@global_variable = global %struct.C zeroinitializer, align 1, !dbg !15 +@_ZN2ns25global_namespace_variableE = global i32 1, align 4, !dbg !17 + +; Function Attrs: nounwind uwtable +define void @_ZN1C15member_functionEv(ptr %this) #0 align 2 !dbg !23 { +entry: + %this.addr = alloca ptr, align 8 + store ptr %this, ptr %this.addr, align 8 + call void @llvm.dbg.declare(metadata ptr %this.addr, metadata !24, metadata !26), !dbg !27 + %this1 = load ptr, ptr %this.addr + store i32 0, ptr @_ZN1C22static_member_variableE, align 4, !dbg !28 + ret void, !dbg !29 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: nounwind uwtable +define i32 @_ZN1C22static_member_functionEv() #0 align 2 !dbg !30 { +entry: + %0 = load i32, ptr @_ZN1C22static_member_variableE, align 4, !dbg !31 + ret i32 %0, !dbg !31 +} + +; Function Attrs: nounwind uwtable +define i32 @_Z15global_functionv() #0 !dbg !32 { +entry: + ret i32 -1, !dbg !33 +} + +; Function Attrs: nounwind uwtable +define void @_ZN2ns25global_namespace_functionEv() #0 !dbg !34 { +entry: + call void @_ZN1C15member_functionEv(ptr @global_variable), !dbg !37 + ret void, !dbg !38 +} + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!20} +!llvm.module.flags = !{!22} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = !DIGlobalVariable(name: "static_member_variable", linkageName: "_ZN1C22static_member_variableE", scope: !2, file: !3, line: 7, type: !6, isLocal: false, isDefinition: true, declaration: !5) +!2 = !DICompositeType(tag: DW_TAG_structure_type, name: "C", file: !3, line: 1, size: 8, align: 8, elements: !4) +!3 = !DIFile(filename: "dwarf-public-names.cpp", directory: "/usr2/kparzysz/s.hex/t") +!4 = !{!5, !7, !12} +!5 = !DIDerivedType(tag: DW_TAG_member, name: "static_member_variable", scope: !2, file: !3, line: 4, baseType: !6, flags: DIFlagStaticMember) +!6 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!7 = !DISubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", scope: !2, file: !3, line: 2, type: !8, isLocal: false, isDefinition: false, scopeLine: 2, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{null, !10} +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !2, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer) +!11 = !{} +!12 = !DISubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", scope: !2, file: !3, line: 3, type: !13, isLocal: false, isDefinition: false, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, retainedNodes: !11) +!13 = !DISubroutineType(types: !14) +!14 = !{!6} +!15 = !DIGlobalVariableExpression(var: !16, expr: !DIExpression()) +!16 = !DIGlobalVariable(name: "global_variable", scope: null, file: !3, line: 17, type: !2, isLocal: false, isDefinition: true) ; previously: invalid DW_TAG_base_type +!17 = !DIGlobalVariableExpression(var: !18, expr: !DIExpression()) +!18 = !DIGlobalVariable(name: "global_namespace_variable", linkageName: "_ZN2ns25global_namespace_variableE", scope: !19, file: !3, line: 27, type: !6, isLocal: false, isDefinition: true) +!19 = !DINamespace(name: "ns", scope: null) +!20 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 3.3", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !11, retainedTypes: !11, globals: !21, imports: !11) ; previously: invalid DW_TAG_base_type +!21 = !{!0, !15, !17} +!22 = !{i32 1, !"Debug Info Version", i32 3} +!23 = distinct !DISubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", scope: null, file: !3, line: 9, type: !8, isLocal: false, isDefinition: true, scopeLine: 9, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !20, declaration: !7, retainedNodes: !11) +!24 = !DILocalVariable(name: "this", arg: 1, scope: !23, file: !3, line: 9, type: !25, flags: DIFlagArtificial | DIFlagObjectPointer) +!25 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !2, size: 64, align: 64) +!26 = !DIExpression() +!27 = !DILocation(line: 9, scope: !23) +!28 = !DILocation(line: 10, scope: !23) +!29 = !DILocation(line: 11, scope: !23) +!30 = distinct !DISubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", scope: null, file: !3, line: 13, type: !13, isLocal: false, isDefinition: true, scopeLine: 13, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !20, declaration: !12, retainedNodes: !11) +!31 = !DILocation(line: 14, scope: !30) +!32 = distinct !DISubprogram(name: "global_function", linkageName: "_Z15global_functionv", scope: !3, file: !3, line: 19, type: !13, isLocal: false, isDefinition: true, scopeLine: 19, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !20, retainedNodes: !11) +!33 = !DILocation(line: 20, scope: !32) +!34 = distinct !DISubprogram(name: "global_namespace_function", linkageName: "_ZN2ns25global_namespace_functionEv", scope: !19, file: !3, line: 24, type: !35, isLocal: false, isDefinition: true, scopeLine: 24, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !20, retainedNodes: !11) +!35 = !DISubroutineType(types: !36) +!36 = !{null} +!37 = !DILocation(line: 25, scope: !34) +!38 = !DILocation(line: 26, scope: !34) + diff --git a/llvm/test/DebugInfo/X86/member-order.ll b/llvm/test/DebugInfo/Generic/member-order.ll similarity index 96% rename from llvm/test/DebugInfo/X86/member-order.ll rename to llvm/test/DebugInfo/Generic/member-order.ll index 6b39d79ec9b01..a2965cc0dd9b3 100644 --- a/llvm/test/DebugInfo/X86/member-order.ll +++ b/llvm/test/DebugInfo/Generic/member-order.ll @@ -1,4 +1,4 @@ -; RUN: %llc_dwarf -mtriple=x86_64-unknown-linux -filetype=obj -O0 < %s | llvm-dwarfdump -debug-info - | FileCheck %s +; RUN: %llc_dwarf -filetype=obj -O0 < %s | llvm-dwarfdump -debug-info - | FileCheck %s ; generated by clang from: ; struct foo { diff --git a/llvm/test/DebugInfo/X86/tu-composite.ll b/llvm/test/DebugInfo/Generic/tu-composite.ll similarity index 99% rename from llvm/test/DebugInfo/X86/tu-composite.ll rename to llvm/test/DebugInfo/Generic/tu-composite.ll index 9cb4a7ff75ead..bcfe049e7323c 100644 --- a/llvm/test/DebugInfo/X86/tu-composite.ll +++ b/llvm/test/DebugInfo/Generic/tu-composite.ll @@ -1,4 +1,4 @@ -; RUN: %llc_dwarf -mtriple=x86_64-unknown-linux-gnu -filetype=obj -O0 < %s > %t +; RUN: %llc_dwarf -filetype=obj -O0 < %s > %t ; RUN: llvm-dwarfdump -v -debug-info %t | FileCheck %s ; CHECK: [[TYPE:.*]]: DW_TAG_structure_type ; Make sure we correctly handle containing type of a struct being a type identifier. diff --git a/llvm/test/DebugInfo/Generic/univariant-discriminated-union.ll b/llvm/test/DebugInfo/Generic/univariant-discriminated-union.ll index a1849c715fff4..628c2f8b1dec9 100644 --- a/llvm/test/DebugInfo/Generic/univariant-discriminated-union.ll +++ b/llvm/test/DebugInfo/Generic/univariant-discriminated-union.ll @@ -12,14 +12,14 @@ ; CHECK: DW_AT_alignment ; CHECK: DW_AT_data_member_location [DW_FORM_data1] (0x00) -%F = type { [0 x i8], i64, [8 x i8] } +%F = type { [0 x i8], ptr, [8 x i8] } %"F::Nope" = type {} define internal void @_ZN2e34main17h934ff72f9a38d4bbE() unnamed_addr #0 !dbg !5 { start: %qq = alloca %F, align 8 call void @llvm.dbg.declare(metadata ptr %qq, metadata !10, metadata !28), !dbg !29 - store i64 0, ptr %qq, !dbg !29 + store ptr null, ptr %qq, !dbg !29 ret void, !dbg !30 } diff --git a/llvm/test/DebugInfo/X86/fi-piece.ll b/llvm/test/DebugInfo/X86/fi-piece.ll index b763b4f1264e5..30144d7875550 100644 --- a/llvm/test/DebugInfo/X86/fi-piece.ll +++ b/llvm/test/DebugInfo/X86/fi-piece.ll @@ -5,7 +5,7 @@ ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_abstract_origin ; CHECK: DW_TAG_variable -; CHECK-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_fbreg -8, DW_OP_piece 0x2, DW_OP_fbreg -12, DW_OP_piece 0x2) +; CHECK-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_fbreg -4, DW_OP_piece 0x2, DW_OP_fbreg -8, DW_OP_piece 0x2) ; CHECK-NEXT: DW_AT_abstract_origin {{.*}}"a" ; Inlined variable, not to be merged. ; CHECK-NOT: DW_TAG @@ -25,11 +25,10 @@ define void @f() #0 !dbg !8 { entry: %a = alloca i16, align 4 %b = alloca i16, align 4 - %c = alloca { i16, i16 }, align 4 call void @llvm.dbg.declare(metadata ptr %a, metadata !11, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 16)), !dbg !14 store i16 1, ptr %a, align 4, !dbg !14 call void @llvm.dbg.declare(metadata ptr %b, metadata !11, metadata !DIExpression(DW_OP_LLVM_fragment, 16, 16)), !dbg !16 - call void @llvm.dbg.declare(metadata ptr %c, metadata !11, metadata !13), !dbg !17 + call void @llvm.dbg.declare(metadata ptr %a, metadata !11, metadata !13), !dbg !17 store i16 2, ptr %b, align 4, !dbg !17 ret void } diff --git a/llvm/test/DebugInfo/invalid-sizes.ll b/llvm/test/DebugInfo/invalid-sizes.ll deleted file mode 100644 index 6562f708d08a4..0000000000000 --- a/llvm/test/DebugInfo/invalid-sizes.ll +++ /dev/null @@ -1,80 +0,0 @@ -; RUN: llvm-as -disable-output < %s 2>&1 | FileCheck %s - -; CHECK: llvm.dbg.declare has larger fragment size than alloca size -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr %slice.dbg.spill, metadata !23, metadata !DIExpression()) -; CHECK: llvm.dbg.declare has larger fragment size than alloca size -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr %slice.dbg.spill1, metadata !23, metadata !DIExpression()) - -%"EndianSlice<'_>" = type { { ptr, i64 }, i32, [1 x i32] } - -; example::test -; Function Attrs: nonlazybind uwtable -define void @_ZN7example4test17h64a501af0fe536ddE(ptr align 1 %s.0, i64 %s.1) unnamed_addr #0 !dbg !7 { -start: - %slice.dbg.spill1 = alloca i32, align 4 - %slice.dbg.spill = alloca { ptr, i64 }, align 8 - %s.dbg.spill = alloca { ptr, i64 }, align 8 - %_2 = alloca %"EndianSlice<'_>", align 8 - %0 = getelementptr inbounds { ptr, i64 }, ptr %s.dbg.spill, i32 0, i32 0 - store ptr %s.0, ptr %0, align 8 - %1 = getelementptr inbounds { ptr, i64 }, ptr %s.dbg.spill, i32 0, i32 1 - store i64 %s.1, ptr %1, align 8 - call void @llvm.dbg.declare(metadata ptr %s.dbg.spill, metadata !22, metadata !DIExpression()), !dbg !33 - %2 = getelementptr inbounds { ptr, i64 }, ptr %slice.dbg.spill, i32 0, i32 0, !dbg !34 - store ptr %s.0, ptr %2, align 8, !dbg !34 - %3 = getelementptr inbounds { ptr, i64 }, ptr %slice.dbg.spill, i32 0, i32 1, !dbg !34 - store i64 %s.1, ptr %3, align 8, !dbg !34 - call void @llvm.dbg.declare(metadata ptr %slice.dbg.spill, metadata !23, metadata !DIExpression()), !dbg !35 - store i32 1, ptr %slice.dbg.spill1, align 4, !dbg !34 - call void @llvm.dbg.declare(metadata ptr %slice.dbg.spill1, metadata !23, metadata !DIExpression()), !dbg !35 - ret void, !dbg !36 -} - -; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 - -attributes #0 = { nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" } -attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } - -!llvm.module.flags = !{!0, !1, !2, !3} -!llvm.ident = !{!4} -!llvm.dbg.cu = !{!5} - -!0 = !{i32 8, !"PIC Level", i32 2} -!1 = !{i32 2, !"RtLibUseGOT", i32 1} -!2 = !{i32 2, !"Dwarf Version", i32 4} -!3 = !{i32 2, !"Debug Info Version", i32 3} -!4 = !{!"rustc version 1.74.0-nightly (5c6a7e71c 2023-08-20)"} -!5 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !6, producer: "clang LLVM (rustc version 1.74.0-nightly (5c6a7e71c 2023-08-20))", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false) -!6 = !DIFile(filename: "/app/example.rs/@/example.a6c375ed18e8f6d3-cgu.0", directory: "/app") -!7 = distinct !DISubprogram(name: "test", linkageName: "_ZN7example4test17h64a501af0fe536ddE", scope: !9, file: !8, line: 9, type: !10, scopeLine: 9, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !5, templateParams: !20, retainedNodes: !21) -!8 = !DIFile(filename: "example.rs", directory: "/app", checksumkind: CSK_MD5, checksum: "bd53c9e80c244adbeae5aa0d57de599d") -!9 = !DINamespace(name: "example", scope: null) -!10 = !DISubroutineType(types: !11) -!11 = !{null, !12} -!12 = !DICompositeType(tag: DW_TAG_structure_type, name: "&[u8]", file: !13, size: 128, align: 64, elements: !14, templateParams: !20, identifier: "4f7d759e2003ffb713a77bd933fd0146") -!13 = !DIFile(filename: "", directory: "") -!14 = !{!15, !18} -!15 = !DIDerivedType(tag: DW_TAG_member, name: "data_ptr", scope: !12, file: !13, baseType: !16, size: 64, align: 64) -!16 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !17, size: 64, align: 64, dwarfAddressSpace: 0) -!17 = !DIBasicType(name: "u8", size: 8, encoding: DW_ATE_unsigned) -!18 = !DIDerivedType(tag: DW_TAG_member, name: "length", scope: !12, file: !13, baseType: !19, size: 64, align: 64, offset: 64) -!19 = !DIBasicType(name: "usize", size: 64, encoding: DW_ATE_unsigned) -!20 = !{} -!21 = !{!22, !23} -!22 = !DILocalVariable(name: "s", arg: 1, scope: !7, file: !8, line: 9, type: !12) -!23 = !DILocalVariable(name: "slice", scope: !24, file: !8, line: 10, type: !25, align: 8) -!24 = distinct !DILexicalBlock(scope: !7, file: !8, line: 10, column: 5) -!25 = !DICompositeType(tag: DW_TAG_structure_type, name: "EndianSlice", scope: !9, file: !13, size: 192, align: 64, elements: !26, templateParams: !20, identifier: "f1b6e593370159e9df4228aa26ace4b5") -!26 = !{!27, !28} -!27 = !DIDerivedType(tag: DW_TAG_member, name: "slice", scope: !25, file: !13, baseType: !12, size: 128, align: 64) -!28 = !DIDerivedType(tag: DW_TAG_member, name: "endian", scope: !25, file: !13, baseType: !29, size: 32, align: 32, offset: 128) -!29 = !DICompositeType(tag: DW_TAG_structure_type, name: "Endian", scope: !9, file: !13, size: 32, align: 32, elements: !30, templateParams: !20, identifier: "a76092aada82685a5b963f3da7ae1bd9") -!30 = !{!31} -!31 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !29, file: !13, baseType: !32, size: 32, align: 32) -!32 = !DIBasicType(name: "i32", size: 32, encoding: DW_ATE_signed) -!33 = !DILocation(line: 9, column: 13, scope: !7) -!34 = !DILocation(line: 10, column: 17, scope: !7) -!35 = !DILocation(line: 10, column: 9, scope: !24) -!36 = !DILocation(line: 11, column: 2, scope: !37) -!37 = !DILexicalBlockFile(scope: !7, file: !8, discriminator: 0) diff --git a/llvm/test/Linker/type-unique-odr-a.ll b/llvm/test/Linker/type-unique-odr-a.ll index 831d42b35c2de..9a911c7f2d440 100644 --- a/llvm/test/Linker/type-unique-odr-a.ll +++ b/llvm/test/Linker/type-unique-odr-a.ll @@ -1,4 +1,4 @@ -; REQUIRES: x86-registered-target +; REQUIRES: object-emission ; ; RUN: llvm-link %s %p/type-unique-odr-b.ll -S -o - \ ; RUN: | %llc_dwarf -dwarf-linkage-names=All -filetype=obj -O0 \ @@ -56,8 +56,6 @@ ; ModuleID = 'type-unique-odr-a.cpp' -target triple = "x86_64-unknown-linux-gnu" - %class.A = type { i32 } ; Function Attrs: nounwind diff --git a/llvm/test/Linker/type-unique-odr-b.ll b/llvm/test/Linker/type-unique-odr-b.ll index 92b489a10a7c3..0526b2a6dba94 100644 --- a/llvm/test/Linker/type-unique-odr-b.ll +++ b/llvm/test/Linker/type-unique-odr-b.ll @@ -19,8 +19,6 @@ ; ModuleID = 'type-unique-odr-b.cpp' -target triple = "x86_64-unknown-linux-gnu" - %class.A = type { i32 } ; Function Attrs: nounwind diff --git a/llvm/test/Linker/type-unique-simple2-a.ll b/llvm/test/Linker/type-unique-simple2-a.ll index 28dddc298d04a..1032a43fd629d 100644 --- a/llvm/test/Linker/type-unique-simple2-a.ll +++ b/llvm/test/Linker/type-unique-simple2-a.ll @@ -1,4 +1,4 @@ -; REQUIRES: x86-registered-target +; REQUIRES: object-emission ; ; RUN: llvm-link %s %p/type-unique-simple2-b.ll -S -o - | %llc_dwarf -filetype=obj -O0 | llvm-dwarfdump -v -debug-info - | FileCheck %s ; @@ -26,8 +26,6 @@ ; ModuleID = 'a.cpp' -target triple = "x86_64-unknown-linux-gnu" - %class.A = type { ptr } @_ZTV1A = external unnamed_addr constant [4 x ptr] diff --git a/llvm/test/Linker/type-unique-simple2-b.ll b/llvm/test/Linker/type-unique-simple2-b.ll index 0b61b7fb723ee..38263314de449 100644 --- a/llvm/test/Linker/type-unique-simple2-b.ll +++ b/llvm/test/Linker/type-unique-simple2-b.ll @@ -10,8 +10,6 @@ ; target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; target triple = "x86_64-apple-macosx10.9.0" -target triple = "x86_64-unknown-linux-gnu" - %class.A = type { ptr } @_ZTV1A = unnamed_addr constant [4 x ptr] [ptr null, ptr @_ZTI1A, ptr @_ZN1A6setFooEv, ptr @_ZN1A6getFooEv] diff --git a/llvm/test/Transforms/InstCombine/dbg-scalable-store-fixed-frag.ll b/llvm/test/Transforms/InstCombine/dbg-scalable-store-fixed-frag.ll index 8743ac6f1473e..a8a7ee4608f65 100644 --- a/llvm/test/Transforms/InstCombine/dbg-scalable-store-fixed-frag.ll +++ b/llvm/test/Transforms/InstCombine/dbg-scalable-store-fixed-frag.ll @@ -4,14 +4,14 @@ define i32 @foo( %x) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARR:%.*]] = alloca [4 x i32], align 4 -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARR]], metadata [[META8:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14:![0-9]+]] +; CHECK-NEXT: [[ARR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @llvm.dbg.value(metadata undef, metadata [[META8:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14:![0-9]+]] ; CHECK-NEXT: store [[X:%.*]], ptr [[ARR]], align 4 ; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[ARR]], align 4 ; CHECK-NEXT: ret i32 [[RES]] ; entry: - %arr = alloca [4 x i32], align 4 + %arr = alloca i32, align 4 call void @llvm.dbg.declare(metadata ptr %arr, metadata !8, metadata !DIExpression()), !dbg !14 store %x, ptr %arr, align 4 %res = load i32, ptr %arr diff --git a/llvm/test/Transforms/InstCombine/dbg-simplify-alloca-size.ll b/llvm/test/Transforms/InstCombine/dbg-simplify-alloca-size.ll index 6a807ba378601..028b19fadf197 100644 --- a/llvm/test/Transforms/InstCombine/dbg-simplify-alloca-size.ll +++ b/llvm/test/Transforms/InstCombine/dbg-simplify-alloca-size.ll @@ -7,13 +7,13 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-LABEL: @toplevel( ; CHECK: entry: -; CHECK-NEXT: %pixels1 = alloca [4 x i8], align 1 +; CHECK-NEXT: %pixels1 = alloca [3 x i8], align 1 ; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr %pixels1, metadata ![[MD:[0-9]+]], metadata !DIExpression()), !dbg ![[DBG:[0-9]+]] ; CHECK-NEXT: call void @foo(ptr nonnull %pixels1) ; CHECK-NEXT: ret void define dso_local void @toplevel() { entry: - %pixels = alloca i8, i32 4 + %pixels = alloca i8, i32 3 call void @llvm.dbg.declare(metadata ptr %pixels, metadata !11, metadata !DIExpression()), !dbg !12 call void @foo(ptr %pixels) ret void diff --git a/llvm/test/Transforms/Util/dbg-user-of-aext.ll b/llvm/test/Transforms/Util/dbg-user-of-aext.ll index 7c9188f5513e1..c91b68a68e899 100644 --- a/llvm/test/Transforms/Util/dbg-user-of-aext.ll +++ b/llvm/test/Transforms/Util/dbg-user-of-aext.ll @@ -3,8 +3,6 @@ ; opposed to the operand of a [s|z]ext). ; RUN: opt -S -passes='sroa' %s | FileCheck %s -target datalayout = "i64:64" - ; Built from: ; struct foo { bool b; long i; }; ; void f(bool b, bool expr, foo g) { @@ -14,15 +12,15 @@ target datalayout = "i64:64" ; Expect two fragments: ; * first starting at bit 0, 8 bits (for the bool) -; * second starting at bit 64, 64 bits (for the long) -; (this happens to create/demonstrate a gap from bits [7, 64)) +; * second starting at bit 32, 32 bits (for the long) +; (this happens to create/demonstrate a gap from bits [7, 32)) ; But also check that a complex expression is not used for a lone bool ; parameter. It can reference the register it's in directly without masking off ; high bits or anything ; CHECK: call void @llvm.dbg.value(metadata i8 %g.coerce0, metadata ![[VAR_STRUCT:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 8)) -; CHECK: call void @llvm.dbg.value(metadata i64 %g.coerce1, metadata ![[VAR_STRUCT]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) +; CHECK: call void @llvm.dbg.value(metadata i64 %g.coerce1, metadata ![[VAR_STRUCT]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 64)) ; CHECK: call void @llvm.dbg.value(metadata i8 %frombool, metadata ![[VAR_BOOL:[0-9]+]], metadata !DIExpression()) ; CHECK: call void @llvm.dbg.value(metadata i8 %frombool1, metadata ![[VAR_FRAG:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 4)) From 3b23704f161c3dd89d4a0b637c9008f573cb87c8 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Mon, 16 Oct 2023 10:23:01 +0000 Subject: [PATCH 213/720] [lldb][PDB] Fix test build after plugin namespace change This was failing to build on Windows after 1673a1ba5decd907d49e64ef705980a145b891d1. --- lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp b/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp index bd69d246072ca..acd381ccad13d 100644 --- a/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp +++ b/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp @@ -53,7 +53,7 @@ class SymbolFilePDBTests : public testing::Test { FileSystem::Initialize(); HostInfo::Initialize(); ObjectFilePECOFF::Initialize(); - SymbolFileDWARF::Initialize(); + plugin::dwarf::SymbolFileDWARF::Initialize(); TypeSystemClang::Initialize(); SymbolFilePDB::Initialize(); @@ -64,7 +64,7 @@ class SymbolFilePDBTests : public testing::Test { void TearDown() override { SymbolFilePDB::Terminate(); TypeSystemClang::Initialize(); - SymbolFileDWARF::Terminate(); + plugin::dwarf::SymbolFileDWARF::Terminate(); ObjectFilePECOFF::Terminate(); HostInfo::Terminate(); FileSystem::Terminate(); From b5743d4798b250506965e07ebab806a3c2d767cc Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 16 Oct 2023 12:45:48 +0200 Subject: [PATCH 214/720] [ValueTracking] Remove by-ref computeKnownBits() overloads (NFC) Remove the old overloads that accept KnownBits by reference, in favor of those that return it by value. --- llvm/include/llvm/Analysis/ValueTracking.h | 33 ++++---------- .../Transforms/InstCombine/InstCombiner.h | 5 --- llvm/lib/Analysis/DemandedBits.cpp | 9 ++-- llvm/lib/Analysis/ScalarEvolution.cpp | 5 +-- llvm/lib/Analysis/ValueTracking.cpp | 32 +++----------- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +- .../Hexagon/HexagonLoopIdiomRecognition.cpp | 3 +- .../InstCombine/InstCombineCalls.cpp | 6 +-- .../InstCombine/InstCombineSelect.cpp | 3 +- .../InstCombineSimplifyDemanded.cpp | 44 +++++++++---------- .../Transforms/Utils/BypassSlowDivision.cpp | 4 +- .../Vectorize/LoadStoreVectorizer.cpp | 5 +-- 12 files changed, 48 insertions(+), 105 deletions(-) diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 25272e0581c93..191f81e0797c1 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -46,43 +46,26 @@ class Value; constexpr unsigned MaxAnalysisRecursionDepth = 6; -/// Determine which bits of V are known to be either zero or one and return -/// them in the KnownZero/KnownOne bit sets. +/// Determine which bits of V are known to be either zero or one. /// /// This function is defined on values with integer type, values with pointer /// type, and vectors of integers. In the case /// where V is a vector, the known zero and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, - unsigned Depth = 0, AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); - -/// Determine which bits of V are known to be either zero or one and return -/// them in the KnownZero/KnownOne bit sets. -/// -/// This function is defined on values with integer type, values with pointer -/// type, and vectors of integers. In the case -/// where V is a vector, the known zero and known one values are the -/// same width as the vector element, and the bit is set only if it is true -/// for all of the demanded elements in the vector. -void computeKnownBits(const Value *V, const APInt &DemandedElts, - KnownBits &Known, const DataLayout &DL, - unsigned Depth = 0, AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); - -/// Returns the known bits rather than passing by reference. KnownBits computeKnownBits(const Value *V, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, bool UseInstrInfo = true); -/// Returns the known bits rather than passing by reference. +/// Determine which bits of V are known to be either zero or one. +/// +/// This function is defined on values with integer type, values with pointer +/// type, and vectors of integers. In the case +/// where V is a vector, the known zero and known one values are the +/// same width as the vector element, and the bit is set only if it is true +/// for all of the demanded elements in the vector. KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h index dcfcc8f41dd58..09a08d92c368d 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -466,11 +466,6 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner { /// methods should return the value returned by this function. virtual Instruction *eraseInstFromFunction(Instruction &I) = 0; - void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, - const Instruction *CxtI) const { - llvm::computeKnownBits(V, Known, DL, Depth, &AC, CxtI, &DT); - } - KnownBits computeKnownBits(const Value *V, unsigned Depth, const Instruction *CxtI) const { return llvm::computeKnownBits(V, DL, Depth, &AC, CxtI, &DT); diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp index c5017bf52498e..2c41451b3aab2 100644 --- a/llvm/lib/Analysis/DemandedBits.cpp +++ b/llvm/lib/Analysis/DemandedBits.cpp @@ -70,13 +70,10 @@ void DemandedBits::determineLiveOperandBits( KnownBitsComputed = true; const DataLayout &DL = UserI->getModule()->getDataLayout(); - Known = KnownBits(BitWidth); - computeKnownBits(V1, Known, DL, 0, &AC, UserI, &DT); + Known = computeKnownBits(V1, DL, 0, &AC, UserI, &DT); - if (V2) { - Known2 = KnownBits(BitWidth); - computeKnownBits(V2, Known2, DL, 0, &AC, UserI, &DT); - } + if (V2) + Known2 = computeKnownBits(V2, DL, 0, &AC, UserI, &DT); }; switch (UserI->getOpcode()) { diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 4850a6aa5625d..d542f82b83ca1 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -7722,9 +7722,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { unsigned LZ = A.countl_zero(); unsigned TZ = A.countr_zero(); unsigned BitWidth = A.getBitWidth(); - KnownBits Known(BitWidth); - computeKnownBits(BO->LHS, Known, getDataLayout(), - 0, &AC, nullptr, &DT); + KnownBits Known = + computeKnownBits(BO->LHS, getDataLayout(), 0, &AC, nullptr, &DT); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 82310444326d6..18a2562ec2dce 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -159,25 +159,6 @@ static void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, computeKnownBits(V, DemandedElts, Known, Depth, Q); } -void llvm::computeKnownBits(const Value *V, KnownBits &Known, - const DataLayout &DL, unsigned Depth, - AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT, bool UseInstrInfo) { - ::computeKnownBits( - V, Known, Depth, - SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); -} - -void llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, - KnownBits &Known, const DataLayout &DL, - unsigned Depth, AssumptionCache *AC, - const Instruction *CxtI, const DominatorTree *DT, - bool UseInstrInfo) { - ::computeKnownBits( - V, DemandedElts, Known, Depth, - SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); -} - static KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, unsigned Depth, const SimplifyQuery &Q); @@ -250,11 +231,9 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, match(LHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) return true; } - IntegerType *IT = cast(LHS->getType()->getScalarType()); - KnownBits LHSKnown(IT->getBitWidth()); - KnownBits RHSKnown(IT->getBitWidth()); - ::computeKnownBits(LHS, LHSKnown, 0, SQ); - ::computeKnownBits(RHS, RHSKnown, 0, SQ); + + KnownBits LHSKnown = ::computeKnownBits(LHS, 0, SQ); + KnownBits RHSKnown = ::computeKnownBits(RHS, 0, SQ); return KnownBits::haveNoCommonBitsSet(LHSKnown, RHSKnown); } @@ -8140,9 +8119,8 @@ static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, // If X & C == 0 then (X | C) == X +_{nuw} C if (match(A, m_Or(m_Value(X), m_APInt(CA))) && match(B, m_Or(m_Specific(X), m_APInt(CB)))) { - KnownBits Known(CA->getBitWidth()); - computeKnownBits(X, Known, DL, Depth + 1, /*AC*/ nullptr, - /*CxtI*/ nullptr, /*DT*/ nullptr); + KnownBits Known = computeKnownBits(X, DL, Depth + 1, /*AC*/ nullptr, + /*CxtI*/ nullptr, /*DT*/ nullptr); if (CA->isSubsetOf(Known.Zero) && CB->isSubsetOf(Known.Zero)) return true; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index e831316efff52..9a37627e36b9f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12147,9 +12147,7 @@ MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const { const GlobalValue *GV = nullptr; int64_t GVOffset = 0; if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { - unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); - KnownBits Known(PtrWidth); - llvm::computeKnownBits(GV, Known, getDataLayout()); + KnownBits Known = llvm::computeKnownBits(GV, getDataLayout()); unsigned AlignBits = Known.countMinTrailingZeros(); if (AlignBits) return commonAlignment(Align(1ull << std::min(31U, AlignBits)), GVOffset); diff --git a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index 51ef72b873a51..5e54a754a02f3 100644 --- a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -1270,8 +1270,7 @@ bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V, if (!T) return false; - KnownBits Known(T->getBitWidth()); - computeKnownBits(V, Known, DL); + KnownBits Known = computeKnownBits(V, DL); return Known.countMinLeadingZeros() >= IterCount; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index e29fb869686ca..88636ff60f5cd 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -642,8 +642,7 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) { return CastInst::Create(Instruction::ZExt, NarrowPop, Ty); } - KnownBits Known(BitWidth); - IC.computeKnownBits(Op0, Known, 0, &II); + KnownBits Known = IC.computeKnownBits(Op0, 0, &II); // If all bits are zero except for exactly one fixed bit, then the result // must be 0 or 1, and we can get that answer by shifting to LSB: @@ -2875,8 +2874,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // If there is a dominating assume with the same condition as this one, // then this one is redundant, and should be removed. - KnownBits Known(1); - computeKnownBits(IIOperand, Known, 0, II); + KnownBits Known = computeKnownBits(IIOperand, 0, II); if (Known.isAllOnes() && isAssumeWithEmptyBundle(cast(*II))) return eraseInstFromFunction(*II); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 7a15c0dee492b..8f15ff178a580 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3749,8 +3749,7 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { // The motivation for this call into value tracking is to take advantage of // the assumption cache, so make sure that is populated. if (!CondVal->getType()->isVectorTy() && !AC.assumptions().empty()) { - KnownBits Known(1); - computeKnownBits(CondVal, Known, 0, &SI); + KnownBits Known = computeKnownBits(CondVal, 0, &SI); if (Known.One.isOne()) return replaceInstUsesWith(SI, TrueVal); if (Known.Zero.isOne()) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index be005e61a8d2d..308c462482bc8 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -119,7 +119,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, "Value *V, DemandedMask and Known must have same BitWidth"); if (isa(V)) { - computeKnownBits(V, Known, Depth, CxtI); + Known = computeKnownBits(V, Depth, CxtI); return nullptr; } @@ -132,7 +132,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Instruction *I = dyn_cast(V); if (!I) { - computeKnownBits(V, Known, Depth, CxtI); + Known = computeKnownBits(V, Depth, CxtI); return nullptr; // Only analyze instructions. } @@ -184,7 +184,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, switch (I->getOpcode()) { default: - computeKnownBits(I, Known, Depth, CxtI); + Known = computeKnownBits(I, Depth, CxtI); break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. @@ -598,7 +598,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return InsertNewInstWith(And1, I->getIterator()); } - computeKnownBits(I, Known, Depth, CxtI); + Known = computeKnownBits(I, Depth, CxtI); break; } case Instruction::Shl: { @@ -660,7 +660,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return I; } } - computeKnownBits(I, Known, Depth, CxtI); + Known = computeKnownBits(I, Depth, CxtI); } break; } @@ -712,7 +712,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (ShiftAmt) Known.Zero.setHighBits(ShiftAmt); // high bits known zero. } else { - computeKnownBits(I, Known, Depth, CxtI); + Known = computeKnownBits(I, Depth, CxtI); } break; } @@ -775,7 +775,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Known.One |= HighBits; } } else { - computeKnownBits(I, Known, Depth, CxtI); + Known = computeKnownBits(I, Depth, CxtI); } break; } @@ -797,7 +797,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Known = KnownBits::udiv(LHSKnown, KnownBits::makeConstant(*SA), cast(I)->isExact()); } else { - computeKnownBits(I, Known, Depth, CxtI); + Known = computeKnownBits(I, Depth, CxtI); } break; } @@ -837,7 +837,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } } - computeKnownBits(I, Known, Depth, CxtI); + Known = computeKnownBits(I, Depth, CxtI); break; } case Instruction::URem: { @@ -977,7 +977,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } if (!KnownBitsComputed) - computeKnownBits(V, Known, Depth, CxtI); + Known = computeKnownBits(V, Depth, CxtI); break; } } @@ -1007,8 +1007,8 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( // this instruction has a simpler value in that context. switch (I->getOpcode()) { case Instruction::And: { - computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); - computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); + RHSKnown = computeKnownBits(I->getOperand(1), Depth + 1, CxtI); + LHSKnown = computeKnownBits(I->getOperand(0), Depth + 1, CxtI); Known = LHSKnown & RHSKnown; computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI)); @@ -1027,8 +1027,8 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( break; } case Instruction::Or: { - computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); - computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); + RHSKnown = computeKnownBits(I->getOperand(1), Depth + 1, CxtI); + LHSKnown = computeKnownBits(I->getOperand(0), Depth + 1, CxtI); Known = LHSKnown | RHSKnown; computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI)); @@ -1049,8 +1049,8 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( break; } case Instruction::Xor: { - computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); - computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); + RHSKnown = computeKnownBits(I->getOperand(1), Depth + 1, CxtI); + LHSKnown = computeKnownBits(I->getOperand(0), Depth + 1, CxtI); Known = LHSKnown ^ RHSKnown; computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI)); @@ -1075,11 +1075,11 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( // If an operand adds zeros to every bit below the highest demanded bit, // that operand doesn't change the result. Return the other side. - computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); + RHSKnown = computeKnownBits(I->getOperand(1), Depth + 1, CxtI); if (DemandedFromOps.isSubsetOf(RHSKnown.Zero)) return I->getOperand(0); - computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); + LHSKnown = computeKnownBits(I->getOperand(0), Depth + 1, CxtI); if (DemandedFromOps.isSubsetOf(LHSKnown.Zero)) return I->getOperand(1); @@ -1094,19 +1094,19 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( // If an operand subtracts zeros from every bit below the highest demanded // bit, that operand doesn't change the result. Return the other side. - computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); + RHSKnown = computeKnownBits(I->getOperand(1), Depth + 1, CxtI); if (DemandedFromOps.isSubsetOf(RHSKnown.Zero)) return I->getOperand(0); bool NSW = cast(I)->hasNoSignedWrap(); - computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); + LHSKnown = computeKnownBits(I->getOperand(0), Depth + 1, CxtI); Known = KnownBits::computeForAddSub(/*Add*/ false, NSW, LHSKnown, RHSKnown); computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI)); break; } case Instruction::AShr: { // Compute the Known bits to simplify things downstream. - computeKnownBits(I, Known, Depth, CxtI); + Known = computeKnownBits(I, Depth, CxtI); // If this user is only demanding bits that we know, return the known // constant. @@ -1133,7 +1133,7 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( } default: // Compute the Known bits to simplify things downstream. - computeKnownBits(I, Known, Depth, CxtI); + Known = computeKnownBits(I, Depth, CxtI); // If this user is only demanding bits that we know, return the known // constant. diff --git a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp index 73a50b793e6d2..b92df30124526 100644 --- a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -234,9 +234,7 @@ ValueRange FastDivInsertionTask::getValueRange(Value *V, unsigned HiBits = LongLen - ShortLen; const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout(); - KnownBits Known(LongLen); - - computeKnownBits(V, Known, DL); + KnownBits Known = computeKnownBits(V, DL); if (Known.countMinLeadingZeros() >= HiBits) return VALRNG_KNOWN_SHORT; diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index 73a8070267192..b97054be2fc98 100644 --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -1262,9 +1262,8 @@ std::optional Vectorizer::getConstantOffsetComplexAddrs( if (!Safe) { // When computing known bits, use the GEPs as context instructions, since // they likely are in the same BB as the load/store. - KnownBits Known(BitWidth); - computeKnownBits((IdxDiff.sge(0) ? ValA : OpB), Known, DL, 0, &AC, - ContextInst, &DT); + KnownBits Known = computeKnownBits((IdxDiff.sge(0) ? ValA : OpB), DL, 0, + &AC, ContextInst, &DT); APInt BitsAllowedToBeSet = Known.Zero.zext(IdxDiff.getBitWidth()); if (Signed) BitsAllowedToBeSet.clearBit(BitWidth - 1); From 4d6fc88946eec6b2ef1d8a91e3425a8e0a84288b Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Mon, 16 Oct 2023 13:07:56 +0200 Subject: [PATCH 215/720] [AMDGPU] Add patterns for V_CMP_O/U (#69157) Fixes SWDEV-427162 --- llvm/lib/Target/AMDGPU/VOPCInstructions.td | 8 + .../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll | 346 +++++++++++++++ .../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll | 402 ++++++++++++++++++ 3 files changed, 756 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index 6fc3d0957dce1..cbea380ab28c0 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -1081,6 +1081,8 @@ multiclass FCMP_Pattern { } } +defm : FCMP_Pattern ; +defm : FCMP_Pattern ; defm : FCMP_Pattern ; defm : FCMP_Pattern ; defm : FCMP_Pattern ; @@ -1088,6 +1090,8 @@ defm : FCMP_Pattern ; defm : FCMP_Pattern ; defm : FCMP_Pattern ; +defm : FCMP_Pattern ; +defm : FCMP_Pattern ; defm : FCMP_Pattern ; defm : FCMP_Pattern ; defm : FCMP_Pattern ; @@ -1110,6 +1114,8 @@ defm : FCMP_Pattern ; defm : FCMP_Pattern ; let OtherPredicates = [HasTrue16BitInsts] in { +defm : FCMP_Pattern ; +defm : FCMP_Pattern ; defm : FCMP_Pattern ; defm : FCMP_Pattern ; defm : FCMP_Pattern ; @@ -1126,6 +1132,8 @@ defm : FCMP_Pattern ; } // End OtherPredicates = [HasTrue16BitInsts] let OtherPredicates = [NotHasTrue16BitInsts] in { +defm : FCMP_Pattern ; +defm : FCMP_Pattern ; defm : FCMP_Pattern ; defm : FCMP_Pattern ; defm : FCMP_Pattern ; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll index 007b52fa3a0c6..5a950d803e9c5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll @@ -494,6 +494,121 @@ define amdgpu_kernel void @v_fcmp_f32_ole(ptr addrspace(1) %out, float %src) { ret void } +define amdgpu_kernel void @v_fcmp_f32_o(ptr addrspace(1) %out, float %src) { +; SDAG-GFX11-LABEL: v_fcmp_f32_o: +; SDAG-GFX11: ; %bb.0: +; SDAG-GFX11-NEXT: s_clause 0x1 +; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c +; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-GFX11-NEXT: v_cmp_o_f32_e64 s2, 0x42c80000, s2 +; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] +; SDAG-GFX11-NEXT: s_nop 0 +; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; SDAG-GFX11-NEXT: s_endpgm +; +; SDAG-GFX10-LABEL: v_fcmp_f32_o: +; SDAG-GFX10: ; %bb.0: +; SDAG-GFX10-NEXT: s_clause 0x1 +; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c +; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-GFX10-NEXT: v_cmp_o_f32_e64 s0, 0x42c80000, s4 +; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0 +; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3] +; SDAG-GFX10-NEXT: s_endpgm +; +; GISEL-GFX11-LABEL: v_fcmp_f32_o: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: s_clause 0x1 +; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c +; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX11-NEXT: v_cmp_o_f32_e64 s2, 0x42c80000, s2 +; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] +; GISEL-GFX11-NEXT: s_nop 0 +; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GISEL-GFX11-NEXT: s_endpgm +; +; GISEL-GFX10-LABEL: v_fcmp_f32_o: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_clause 0x1 +; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX10-NEXT: v_cmp_o_f32_e64 s0, 0x42c80000, s4 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] +; GISEL-GFX10-NEXT: s_endpgm + %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 7) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_fcmp_f32_uo(ptr addrspace(1) %out, float %src) { +; SDAG-GFX11-LABEL: v_fcmp_f32_uo: +; SDAG-GFX11: ; %bb.0: +; SDAG-GFX11-NEXT: s_clause 0x1 +; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c +; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-GFX11-NEXT: v_cmp_u_f32_e64 s2, 0x42c80000, s2 +; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] +; SDAG-GFX11-NEXT: s_nop 0 +; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; SDAG-GFX11-NEXT: s_endpgm +; +; SDAG-GFX10-LABEL: v_fcmp_f32_uo: +; SDAG-GFX10: ; %bb.0: +; SDAG-GFX10-NEXT: s_clause 0x1 +; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c +; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-GFX10-NEXT: v_cmp_u_f32_e64 s0, 0x42c80000, s4 +; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0 +; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3] +; SDAG-GFX10-NEXT: s_endpgm +; +; GISEL-GFX11-LABEL: v_fcmp_f32_uo: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: s_clause 0x1 +; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c +; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX11-NEXT: v_cmp_u_f32_e64 s2, 0x42c80000, s2 +; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] +; GISEL-GFX11-NEXT: s_nop 0 +; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GISEL-GFX11-NEXT: s_endpgm +; +; GISEL-GFX10-LABEL: v_fcmp_f32_uo: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_clause 0x1 +; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX10-NEXT: v_cmp_u_f32_e64 s0, 0x42c80000, s4 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] +; GISEL-GFX10-NEXT: s_endpgm + %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 8) + store i32 %result, ptr addrspace(1) %out + ret void +} define amdgpu_kernel void @v_fcmp_f32_ueq(ptr addrspace(1) %out, float %src) { ; SDAG-GFX11-LABEL: v_fcmp_f32_ueq: @@ -1249,6 +1364,122 @@ define amdgpu_kernel void @v_fcmp_f64_ueq(ptr addrspace(1) %out, double %src) { ret void } +define amdgpu_kernel void @v_fcmp_f64_o(ptr addrspace(1) %out, double %src) { +; SDAG-GFX11-LABEL: v_fcmp_f64_o: +; SDAG-GFX11: ; %bb.0: +; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; SDAG-GFX11-NEXT: s_mov_b32 s4, 0 +; SDAG-GFX11-NEXT: s_mov_b32 s5, 0x40590000 +; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-GFX11-NEXT: v_cmp_o_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] +; SDAG-GFX11-NEXT: s_nop 0 +; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; SDAG-GFX11-NEXT: s_endpgm +; +; SDAG-GFX10-LABEL: v_fcmp_f64_o: +; SDAG-GFX10: ; %bb.0: +; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; SDAG-GFX10-NEXT: s_mov_b32 s4, 0 +; SDAG-GFX10-NEXT: s_mov_b32 s5, 0x40590000 +; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-GFX10-NEXT: v_cmp_o_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 +; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] +; SDAG-GFX10-NEXT: s_endpgm +; +; GISEL-GFX11-LABEL: v_fcmp_f64_o: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GISEL-GFX11-NEXT: s_mov_b32 s4, 0 +; GISEL-GFX11-NEXT: s_mov_b32 s5, 0x40590000 +; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX11-NEXT: v_cmp_o_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] +; GISEL-GFX11-NEXT: s_nop 0 +; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GISEL-GFX11-NEXT: s_endpgm +; +; GISEL-GFX10-LABEL: v_fcmp_f64_o: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-GFX10-NEXT: s_mov_b32 s4, 0 +; GISEL-GFX10-NEXT: s_mov_b32 s5, 0x40590000 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX10-NEXT: v_cmp_o_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] +; GISEL-GFX10-NEXT: s_endpgm + %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 7) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_fcmp_f64_uo(ptr addrspace(1) %out, double %src) { +; SDAG-GFX11-LABEL: v_fcmp_f64_uo: +; SDAG-GFX11: ; %bb.0: +; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; SDAG-GFX11-NEXT: s_mov_b32 s4, 0 +; SDAG-GFX11-NEXT: s_mov_b32 s5, 0x40590000 +; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-GFX11-NEXT: v_cmp_u_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] +; SDAG-GFX11-NEXT: s_nop 0 +; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; SDAG-GFX11-NEXT: s_endpgm +; +; SDAG-GFX10-LABEL: v_fcmp_f64_uo: +; SDAG-GFX10: ; %bb.0: +; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; SDAG-GFX10-NEXT: s_mov_b32 s4, 0 +; SDAG-GFX10-NEXT: s_mov_b32 s5, 0x40590000 +; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-GFX10-NEXT: v_cmp_u_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 +; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] +; SDAG-GFX10-NEXT: s_endpgm +; +; GISEL-GFX11-LABEL: v_fcmp_f64_uo: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GISEL-GFX11-NEXT: s_mov_b32 s4, 0 +; GISEL-GFX11-NEXT: s_mov_b32 s5, 0x40590000 +; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX11-NEXT: v_cmp_u_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] +; GISEL-GFX11-NEXT: s_nop 0 +; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GISEL-GFX11-NEXT: s_endpgm +; +; GISEL-GFX10-LABEL: v_fcmp_f64_uo: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-GFX10-NEXT: s_mov_b32 s4, 0 +; GISEL-GFX10-NEXT: s_mov_b32 s5, 0x40590000 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX10-NEXT: v_cmp_u_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] +; GISEL-GFX10-NEXT: s_endpgm + %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 8) + store i32 %result, ptr addrspace(1) %out + ret void +} + define amdgpu_kernel void @v_fcmp_f64_une(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_une: ; SDAG-GFX11: ; %bb.0: @@ -2348,6 +2579,121 @@ define amdgpu_kernel void @v_fcmp_f16_ult(ptr addrspace(1) %out, half %src) { ret void } +define amdgpu_kernel void @v_fcmp_f16_o(ptr addrspace(1) %out, half %src) { +; SDAG-GFX11-LABEL: v_fcmp_f16_o: +; SDAG-GFX11: ; %bb.0: +; SDAG-GFX11-NEXT: s_clause 0x1 +; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c +; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-GFX11-NEXT: v_cmp_o_f16_e64 s2, 0x5640, s2 +; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] +; SDAG-GFX11-NEXT: s_nop 0 +; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; SDAG-GFX11-NEXT: s_endpgm +; +; SDAG-GFX10-LABEL: v_fcmp_f16_o: +; SDAG-GFX10: ; %bb.0: +; SDAG-GFX10-NEXT: s_clause 0x1 +; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c +; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-GFX10-NEXT: v_cmp_o_f16_e64 s0, 0x5640, s4 +; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0 +; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3] +; SDAG-GFX10-NEXT: s_endpgm +; +; GISEL-GFX11-LABEL: v_fcmp_f16_o: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: s_clause 0x1 +; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c +; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX11-NEXT: v_cmp_o_f16_e64 s2, 0x5640, s2 +; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] +; GISEL-GFX11-NEXT: s_nop 0 +; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GISEL-GFX11-NEXT: s_endpgm +; +; GISEL-GFX10-LABEL: v_fcmp_f16_o: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_clause 0x1 +; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX10-NEXT: v_cmp_o_f16_e64 s0, 0x5640, s4 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] +; GISEL-GFX10-NEXT: s_endpgm + %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 7) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_fcmp_f16_uo(ptr addrspace(1) %out, half %src) { +; SDAG-GFX11-LABEL: v_fcmp_f16_uo: +; SDAG-GFX11: ; %bb.0: +; SDAG-GFX11-NEXT: s_clause 0x1 +; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c +; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-GFX11-NEXT: v_cmp_u_f16_e64 s2, 0x5640, s2 +; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] +; SDAG-GFX11-NEXT: s_nop 0 +; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; SDAG-GFX11-NEXT: s_endpgm +; +; SDAG-GFX10-LABEL: v_fcmp_f16_uo: +; SDAG-GFX10: ; %bb.0: +; SDAG-GFX10-NEXT: s_clause 0x1 +; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c +; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-GFX10-NEXT: v_cmp_u_f16_e64 s0, 0x5640, s4 +; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0 +; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3] +; SDAG-GFX10-NEXT: s_endpgm +; +; GISEL-GFX11-LABEL: v_fcmp_f16_uo: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: s_clause 0x1 +; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c +; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX11-NEXT: v_cmp_u_f16_e64 s2, 0x5640, s2 +; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] +; GISEL-GFX11-NEXT: s_nop 0 +; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GISEL-GFX11-NEXT: s_endpgm +; +; GISEL-GFX10-LABEL: v_fcmp_f16_uo: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_clause 0x1 +; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX10-NEXT: v_cmp_u_f16_e64 s0, 0x5640, s4 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] +; GISEL-GFX10-NEXT: s_endpgm + %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 8) + store i32 %result, ptr addrspace(1) %out + ret void +} define amdgpu_kernel void @v_fcmp_f16_ule(ptr addrspace(1) %out, half %src) { ; SDAG-GFX11-LABEL: v_fcmp_f16_ule: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll index eeff0c57bb461..e2bdcfa6bbddc 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll @@ -546,6 +546,129 @@ define amdgpu_kernel void @v_fcmp_f32_ole(ptr addrspace(1) %out, float %src) { ret void } +define amdgpu_kernel void @v_fcmp_f32_o(ptr addrspace(1) %out, float %src) { +; GFX11-LABEL: v_fcmp_f32_o: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_cmp_o_f32_e64 s[2:3], 0x42c80000, s2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-NEXT: v_mov_b32_e32 v1, s3 +; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +; +; GFX9-LABEL: v_fcmp_f32_o: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x42c80000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_o_f32_e64 s[0:1], s4, v0 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GFX9-NEXT: s_endpgm +; +; VI-SDAG-LABEL: v_fcmp_f32_o: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c +; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x42c80000 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_cmp_o_f32_e64 s[2:3], s2, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 +; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: v_fcmp_f32_o: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c +; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42c80000 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cmp_o_f32_e64 s[2:3], s2, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; VI-GISEL-NEXT: s_endpgm + %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 7) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_fcmp_f32_uo(ptr addrspace(1) %out, float %src) { +; GFX11-LABEL: v_fcmp_f32_uo: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_cmp_u_f32_e64 s[2:3], 0x42c80000, s2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-NEXT: v_mov_b32_e32 v1, s3 +; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +; +; GFX9-LABEL: v_fcmp_f32_uo: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x42c80000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_u_f32_e64 s[0:1], s4, v0 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GFX9-NEXT: s_endpgm +; +; VI-SDAG-LABEL: v_fcmp_f32_uo: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c +; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x42c80000 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_cmp_u_f32_e64 s[2:3], s2, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 +; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: v_fcmp_f32_uo: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c +; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42c80000 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cmp_u_f32_e64 s[2:3], s2, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; VI-GISEL-NEXT: s_endpgm + %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 8) + store i64 %result, ptr addrspace(1) %out + ret void +} define amdgpu_kernel void @v_fcmp_f32_ueq(ptr addrspace(1) %out, float %src) { ; GFX11-LABEL: v_fcmp_f32_ueq: @@ -1465,6 +1588,162 @@ define amdgpu_kernel void @v_fcmp_f64_ueq(ptr addrspace(1) %out, double %src) { ret void } +define amdgpu_kernel void @v_fcmp_f64_o(ptr addrspace(1) %out, double %src) { +; GFX11-LABEL: v_fcmp_f64_o: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_mov_b32 s5, 0x40590000 +; GFX11-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_cmp_o_f64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-NEXT: v_mov_b32_e32 v1, s3 +; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +; +; GFX9-SDAG-LABEL: v_fcmp_f64_o: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 +; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-SDAG-NEXT: v_cmp_o_f64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s3 +; GFX9-SDAG-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] +; GFX9-SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: v_fcmp_f64_o: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40590000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_o_f64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-GISEL-NEXT: s_endpgm +; +; VI-SDAG-LABEL: v_fcmp_f64_o: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_cmp_o_f64_e64 s[2:3], s[2:3], v[0:1] +; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 +; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: v_fcmp_f64_o: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-GISEL-NEXT: s_mov_b32 s4, 0 +; VI-GISEL-NEXT: s_mov_b32 s5, 0x40590000 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cmp_o_f64_e64 s[2:3], s[2:3], v[0:1] +; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; VI-GISEL-NEXT: s_endpgm + %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 7) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_fcmp_f64_uo(ptr addrspace(1) %out, double %src) { +; GFX11-LABEL: v_fcmp_f64_uo: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_mov_b32 s5, 0x40590000 +; GFX11-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_cmp_u_f64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-NEXT: v_mov_b32_e32 v1, s3 +; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +; +; GFX9-SDAG-LABEL: v_fcmp_f64_uo: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 +; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-SDAG-NEXT: v_cmp_u_f64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s3 +; GFX9-SDAG-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] +; GFX9-SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: v_fcmp_f64_uo: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40590000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_u_f64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-GISEL-NEXT: s_endpgm +; +; VI-SDAG-LABEL: v_fcmp_f64_uo: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_cmp_u_f64_e64 s[2:3], s[2:3], v[0:1] +; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 +; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: v_fcmp_f64_uo: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-GISEL-NEXT: s_mov_b32 s4, 0 +; VI-GISEL-NEXT: s_mov_b32 s5, 0x40590000 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cmp_u_f64_e64 s[2:3], s[2:3], v[0:1] +; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; VI-GISEL-NEXT: s_endpgm + %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 8) + store i64 %result, ptr addrspace(1) %out + ret void +} + define amdgpu_kernel void @v_fcmp_f64_une(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_une: ; GFX11: ; %bb.0: @@ -2731,6 +3010,129 @@ define amdgpu_kernel void @v_fcmp_f16_ult(ptr addrspace(1) %out, half %src) { ret void } +define amdgpu_kernel void @v_fcmp_f16_o(ptr addrspace(1) %out, half %src) { +; GFX11-LABEL: v_fcmp_f16_o: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_cmp_o_f16_e64 s[2:3], 0x5640, s2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-NEXT: v_mov_b32_e32 v1, s3 +; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +; +; GFX9-LABEL: v_fcmp_f16_o: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x5640 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_o_f16_e64 s[0:1], s4, v0 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GFX9-NEXT: s_endpgm +; +; VI-SDAG-LABEL: v_fcmp_f16_o: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c +; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_cmp_o_f16_e64 s[2:3], s2, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 +; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: v_fcmp_f16_o: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c +; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cmp_o_f16_e64 s[2:3], s2, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; VI-GISEL-NEXT: s_endpgm + %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 7) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_fcmp_f16_uo(ptr addrspace(1) %out, half %src) { +; GFX11-LABEL: v_fcmp_f16_uo: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_cmp_u_f16_e64 s[2:3], 0x5640, s2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-NEXT: v_mov_b32_e32 v1, s3 +; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +; +; GFX9-LABEL: v_fcmp_f16_uo: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x5640 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_u_f16_e64 s[0:1], s4, v0 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GFX9-NEXT: s_endpgm +; +; VI-SDAG-LABEL: v_fcmp_f16_uo: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c +; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_cmp_u_f16_e64 s[2:3], s2, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 +; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: v_fcmp_f16_uo: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c +; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cmp_u_f16_e64 s[2:3], s2, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; VI-GISEL-NEXT: s_endpgm + %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 8) + store i64 %result, ptr addrspace(1) %out + ret void +} define amdgpu_kernel void @v_fcmp_f16_ule(ptr addrspace(1) %out, half %src) { ; GFX11-LABEL: v_fcmp_f16_ule: From d4300154b6e7afff10e6b5f69c244c329ba829f3 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 16 Oct 2023 14:03:06 +0200 Subject: [PATCH 216/720] Revert "[ValueTracking] Remove by-ref computeKnownBits() overloads (NFC)" This reverts commit b5743d4798b250506965e07ebab806a3c2d767cc. This causes some minor compile-time impact. Revert for now, better to do the change more gradually. --- llvm/include/llvm/Analysis/ValueTracking.h | 33 ++++++++++---- .../Transforms/InstCombine/InstCombiner.h | 5 +++ llvm/lib/Analysis/DemandedBits.cpp | 9 ++-- llvm/lib/Analysis/ScalarEvolution.cpp | 5 ++- llvm/lib/Analysis/ValueTracking.cpp | 32 +++++++++++--- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +- .../Hexagon/HexagonLoopIdiomRecognition.cpp | 3 +- .../InstCombine/InstCombineCalls.cpp | 6 ++- .../InstCombine/InstCombineSelect.cpp | 3 +- .../InstCombineSimplifyDemanded.cpp | 44 +++++++++---------- .../Transforms/Utils/BypassSlowDivision.cpp | 4 +- .../Vectorize/LoadStoreVectorizer.cpp | 5 ++- 12 files changed, 105 insertions(+), 48 deletions(-) diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 191f81e0797c1..25272e0581c93 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -46,26 +46,43 @@ class Value; constexpr unsigned MaxAnalysisRecursionDepth = 6; -/// Determine which bits of V are known to be either zero or one. +/// Determine which bits of V are known to be either zero or one and return +/// them in the KnownZero/KnownOne bit sets. /// /// This function is defined on values with integer type, values with pointer /// type, and vectors of integers. In the case /// where V is a vector, the known zero and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -KnownBits computeKnownBits(const Value *V, const DataLayout &DL, - unsigned Depth = 0, AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); - -/// Determine which bits of V are known to be either zero or one. +void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); + +/// Determine which bits of V are known to be either zero or one and return +/// them in the KnownZero/KnownOne bit sets. /// /// This function is defined on values with integer type, values with pointer /// type, and vectors of integers. In the case /// where V is a vector, the known zero and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the demanded elements in the vector. +void computeKnownBits(const Value *V, const APInt &DemandedElts, + KnownBits &Known, const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); + +/// Returns the known bits rather than passing by reference. +KnownBits computeKnownBits(const Value *V, const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); + +/// Returns the known bits rather than passing by reference. KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h index 09a08d92c368d..dcfcc8f41dd58 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -466,6 +466,11 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner { /// methods should return the value returned by this function. virtual Instruction *eraseInstFromFunction(Instruction &I) = 0; + void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, + const Instruction *CxtI) const { + llvm::computeKnownBits(V, Known, DL, Depth, &AC, CxtI, &DT); + } + KnownBits computeKnownBits(const Value *V, unsigned Depth, const Instruction *CxtI) const { return llvm::computeKnownBits(V, DL, Depth, &AC, CxtI, &DT); diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp index 2c41451b3aab2..c5017bf52498e 100644 --- a/llvm/lib/Analysis/DemandedBits.cpp +++ b/llvm/lib/Analysis/DemandedBits.cpp @@ -70,10 +70,13 @@ void DemandedBits::determineLiveOperandBits( KnownBitsComputed = true; const DataLayout &DL = UserI->getModule()->getDataLayout(); - Known = computeKnownBits(V1, DL, 0, &AC, UserI, &DT); + Known = KnownBits(BitWidth); + computeKnownBits(V1, Known, DL, 0, &AC, UserI, &DT); - if (V2) - Known2 = computeKnownBits(V2, DL, 0, &AC, UserI, &DT); + if (V2) { + Known2 = KnownBits(BitWidth); + computeKnownBits(V2, Known2, DL, 0, &AC, UserI, &DT); + } }; switch (UserI->getOpcode()) { diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index d542f82b83ca1..4850a6aa5625d 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -7722,8 +7722,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { unsigned LZ = A.countl_zero(); unsigned TZ = A.countr_zero(); unsigned BitWidth = A.getBitWidth(); - KnownBits Known = - computeKnownBits(BO->LHS, getDataLayout(), 0, &AC, nullptr, &DT); + KnownBits Known(BitWidth); + computeKnownBits(BO->LHS, Known, getDataLayout(), + 0, &AC, nullptr, &DT); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 18a2562ec2dce..82310444326d6 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -159,6 +159,25 @@ static void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, computeKnownBits(V, DemandedElts, Known, Depth, Q); } +void llvm::computeKnownBits(const Value *V, KnownBits &Known, + const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT, bool UseInstrInfo) { + ::computeKnownBits( + V, Known, Depth, + SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); +} + +void llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, + KnownBits &Known, const DataLayout &DL, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, const DominatorTree *DT, + bool UseInstrInfo) { + ::computeKnownBits( + V, DemandedElts, Known, Depth, + SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); +} + static KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, unsigned Depth, const SimplifyQuery &Q); @@ -231,9 +250,11 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, match(LHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) return true; } - - KnownBits LHSKnown = ::computeKnownBits(LHS, 0, SQ); - KnownBits RHSKnown = ::computeKnownBits(RHS, 0, SQ); + IntegerType *IT = cast(LHS->getType()->getScalarType()); + KnownBits LHSKnown(IT->getBitWidth()); + KnownBits RHSKnown(IT->getBitWidth()); + ::computeKnownBits(LHS, LHSKnown, 0, SQ); + ::computeKnownBits(RHS, RHSKnown, 0, SQ); return KnownBits::haveNoCommonBitsSet(LHSKnown, RHSKnown); } @@ -8119,8 +8140,9 @@ static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, // If X & C == 0 then (X | C) == X +_{nuw} C if (match(A, m_Or(m_Value(X), m_APInt(CA))) && match(B, m_Or(m_Specific(X), m_APInt(CB)))) { - KnownBits Known = computeKnownBits(X, DL, Depth + 1, /*AC*/ nullptr, - /*CxtI*/ nullptr, /*DT*/ nullptr); + KnownBits Known(CA->getBitWidth()); + computeKnownBits(X, Known, DL, Depth + 1, /*AC*/ nullptr, + /*CxtI*/ nullptr, /*DT*/ nullptr); if (CA->isSubsetOf(Known.Zero) && CB->isSubsetOf(Known.Zero)) return true; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 9a37627e36b9f..e831316efff52 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12147,7 +12147,9 @@ MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const { const GlobalValue *GV = nullptr; int64_t GVOffset = 0; if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { - KnownBits Known = llvm::computeKnownBits(GV, getDataLayout()); + unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); + KnownBits Known(PtrWidth); + llvm::computeKnownBits(GV, Known, getDataLayout()); unsigned AlignBits = Known.countMinTrailingZeros(); if (AlignBits) return commonAlignment(Align(1ull << std::min(31U, AlignBits)), GVOffset); diff --git a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index 5e54a754a02f3..51ef72b873a51 100644 --- a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -1270,7 +1270,8 @@ bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V, if (!T) return false; - KnownBits Known = computeKnownBits(V, DL); + KnownBits Known(T->getBitWidth()); + computeKnownBits(V, Known, DL); return Known.countMinLeadingZeros() >= IterCount; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 88636ff60f5cd..e29fb869686ca 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -642,7 +642,8 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) { return CastInst::Create(Instruction::ZExt, NarrowPop, Ty); } - KnownBits Known = IC.computeKnownBits(Op0, 0, &II); + KnownBits Known(BitWidth); + IC.computeKnownBits(Op0, Known, 0, &II); // If all bits are zero except for exactly one fixed bit, then the result // must be 0 or 1, and we can get that answer by shifting to LSB: @@ -2874,7 +2875,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // If there is a dominating assume with the same condition as this one, // then this one is redundant, and should be removed. - KnownBits Known = computeKnownBits(IIOperand, 0, II); + KnownBits Known(1); + computeKnownBits(IIOperand, Known, 0, II); if (Known.isAllOnes() && isAssumeWithEmptyBundle(cast(*II))) return eraseInstFromFunction(*II); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 8f15ff178a580..7a15c0dee492b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3749,7 +3749,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { // The motivation for this call into value tracking is to take advantage of // the assumption cache, so make sure that is populated. if (!CondVal->getType()->isVectorTy() && !AC.assumptions().empty()) { - KnownBits Known = computeKnownBits(CondVal, 0, &SI); + KnownBits Known(1); + computeKnownBits(CondVal, Known, 0, &SI); if (Known.One.isOne()) return replaceInstUsesWith(SI, TrueVal); if (Known.Zero.isOne()) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 308c462482bc8..be005e61a8d2d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -119,7 +119,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, "Value *V, DemandedMask and Known must have same BitWidth"); if (isa(V)) { - Known = computeKnownBits(V, Depth, CxtI); + computeKnownBits(V, Known, Depth, CxtI); return nullptr; } @@ -132,7 +132,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Instruction *I = dyn_cast(V); if (!I) { - Known = computeKnownBits(V, Depth, CxtI); + computeKnownBits(V, Known, Depth, CxtI); return nullptr; // Only analyze instructions. } @@ -184,7 +184,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, switch (I->getOpcode()) { default: - Known = computeKnownBits(I, Depth, CxtI); + computeKnownBits(I, Known, Depth, CxtI); break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. @@ -598,7 +598,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return InsertNewInstWith(And1, I->getIterator()); } - Known = computeKnownBits(I, Depth, CxtI); + computeKnownBits(I, Known, Depth, CxtI); break; } case Instruction::Shl: { @@ -660,7 +660,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return I; } } - Known = computeKnownBits(I, Depth, CxtI); + computeKnownBits(I, Known, Depth, CxtI); } break; } @@ -712,7 +712,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (ShiftAmt) Known.Zero.setHighBits(ShiftAmt); // high bits known zero. } else { - Known = computeKnownBits(I, Depth, CxtI); + computeKnownBits(I, Known, Depth, CxtI); } break; } @@ -775,7 +775,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Known.One |= HighBits; } } else { - Known = computeKnownBits(I, Depth, CxtI); + computeKnownBits(I, Known, Depth, CxtI); } break; } @@ -797,7 +797,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Known = KnownBits::udiv(LHSKnown, KnownBits::makeConstant(*SA), cast(I)->isExact()); } else { - Known = computeKnownBits(I, Depth, CxtI); + computeKnownBits(I, Known, Depth, CxtI); } break; } @@ -837,7 +837,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } } - Known = computeKnownBits(I, Depth, CxtI); + computeKnownBits(I, Known, Depth, CxtI); break; } case Instruction::URem: { @@ -977,7 +977,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } if (!KnownBitsComputed) - Known = computeKnownBits(V, Depth, CxtI); + computeKnownBits(V, Known, Depth, CxtI); break; } } @@ -1007,8 +1007,8 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( // this instruction has a simpler value in that context. switch (I->getOpcode()) { case Instruction::And: { - RHSKnown = computeKnownBits(I->getOperand(1), Depth + 1, CxtI); - LHSKnown = computeKnownBits(I->getOperand(0), Depth + 1, CxtI); + computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); + computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); Known = LHSKnown & RHSKnown; computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI)); @@ -1027,8 +1027,8 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( break; } case Instruction::Or: { - RHSKnown = computeKnownBits(I->getOperand(1), Depth + 1, CxtI); - LHSKnown = computeKnownBits(I->getOperand(0), Depth + 1, CxtI); + computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); + computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); Known = LHSKnown | RHSKnown; computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI)); @@ -1049,8 +1049,8 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( break; } case Instruction::Xor: { - RHSKnown = computeKnownBits(I->getOperand(1), Depth + 1, CxtI); - LHSKnown = computeKnownBits(I->getOperand(0), Depth + 1, CxtI); + computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); + computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); Known = LHSKnown ^ RHSKnown; computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI)); @@ -1075,11 +1075,11 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( // If an operand adds zeros to every bit below the highest demanded bit, // that operand doesn't change the result. Return the other side. - RHSKnown = computeKnownBits(I->getOperand(1), Depth + 1, CxtI); + computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); if (DemandedFromOps.isSubsetOf(RHSKnown.Zero)) return I->getOperand(0); - LHSKnown = computeKnownBits(I->getOperand(0), Depth + 1, CxtI); + computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); if (DemandedFromOps.isSubsetOf(LHSKnown.Zero)) return I->getOperand(1); @@ -1094,19 +1094,19 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( // If an operand subtracts zeros from every bit below the highest demanded // bit, that operand doesn't change the result. Return the other side. - RHSKnown = computeKnownBits(I->getOperand(1), Depth + 1, CxtI); + computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); if (DemandedFromOps.isSubsetOf(RHSKnown.Zero)) return I->getOperand(0); bool NSW = cast(I)->hasNoSignedWrap(); - LHSKnown = computeKnownBits(I->getOperand(0), Depth + 1, CxtI); + computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); Known = KnownBits::computeForAddSub(/*Add*/ false, NSW, LHSKnown, RHSKnown); computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI)); break; } case Instruction::AShr: { // Compute the Known bits to simplify things downstream. - Known = computeKnownBits(I, Depth, CxtI); + computeKnownBits(I, Known, Depth, CxtI); // If this user is only demanding bits that we know, return the known // constant. @@ -1133,7 +1133,7 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( } default: // Compute the Known bits to simplify things downstream. - Known = computeKnownBits(I, Depth, CxtI); + computeKnownBits(I, Known, Depth, CxtI); // If this user is only demanding bits that we know, return the known // constant. diff --git a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp index b92df30124526..73a50b793e6d2 100644 --- a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -234,7 +234,9 @@ ValueRange FastDivInsertionTask::getValueRange(Value *V, unsigned HiBits = LongLen - ShortLen; const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout(); - KnownBits Known = computeKnownBits(V, DL); + KnownBits Known(LongLen); + + computeKnownBits(V, Known, DL); if (Known.countMinLeadingZeros() >= HiBits) return VALRNG_KNOWN_SHORT; diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index b97054be2fc98..73a8070267192 100644 --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -1262,8 +1262,9 @@ std::optional Vectorizer::getConstantOffsetComplexAddrs( if (!Safe) { // When computing known bits, use the GEPs as context instructions, since // they likely are in the same BB as the load/store. - KnownBits Known = computeKnownBits((IdxDiff.sge(0) ? ValA : OpB), DL, 0, - &AC, ContextInst, &DT); + KnownBits Known(BitWidth); + computeKnownBits((IdxDiff.sge(0) ? ValA : OpB), Known, DL, 0, &AC, + ContextInst, &DT); APInt BitsAllowedToBeSet = Known.Zero.zext(IdxDiff.getBitWidth()); if (Signed) BitsAllowedToBeSet.clearBit(BitWidth - 1); From de9b3c5eba41fd024aef6dfa4dab0c8feae29b18 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Mon, 16 Oct 2023 14:11:25 +0200 Subject: [PATCH 217/720] [clang][Interp] Handle delegating constructors (#67823) --- clang/lib/AST/Interp/ByteCodeStmtGen.cpp | 8 ++++++++ clang/test/AST/Interp/records.cpp | 23 +++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/clang/lib/AST/Interp/ByteCodeStmtGen.cpp b/clang/lib/AST/Interp/ByteCodeStmtGen.cpp index 6193a8d55a146..509abe3ae867f 100644 --- a/clang/lib/AST/Interp/ByteCodeStmtGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeStmtGen.cpp @@ -198,6 +198,14 @@ bool ByteCodeStmtGen::visitFunc(const FunctionDecl *F) { return false; if (!this->emitInitPtrPop(InitExpr)) return false; + } else { + assert(Init->isDelegatingInitializer()); + if (!this->emitThis(InitExpr)) + return false; + if (!this->visitInitializer(Init->getInit())) + return false; + if (!this->emitPopPtr(InitExpr)) + return false; } } } diff --git a/clang/test/AST/Interp/records.cpp b/clang/test/AST/Interp/records.cpp index bcc84087fc540..3c866825d1f07 100644 --- a/clang/test/AST/Interp/records.cpp +++ b/clang/test/AST/Interp/records.cpp @@ -1066,3 +1066,26 @@ namespace ParenInit { constexpr B b(A(1),2); } #endif + +namespace DelegatingConstructors { + struct S { + int a; + constexpr S() : S(10) {} + constexpr S(int a) : a(a) {} + }; + constexpr S s = {}; + static_assert(s.a == 10, ""); + + struct B { + int a; + int b; + + constexpr B(int a) : a(a), b(a + 2) {} + }; + struct A : B { + constexpr A() : B(10) {}; + }; + constexpr A d4 = {}; + static_assert(d4.a == 10, ""); + static_assert(d4.b == 12, ""); +} From 499d41cef2e7bbb65804f6a815b9fa8b27efce0f Mon Sep 17 00:00:00 2001 From: Leandro Lupori Date: Mon, 16 Oct 2023 09:12:53 -0300 Subject: [PATCH 218/720] [flang][OpenMP] Fix threadprivate common blocks (#68739) Using a threadprivate common block within a nested scope resulted in compilation errors. This happened because common block names were being first resolved to those in the parent scope. Because of this, in a nested scope, an inner threadprivate directive would be applied to the outter common block. This caused a 'common_block appears in more than one data-sharing clause' error. Also, when a copyin clause in a parallel region tried to use the common block, getting the inner version of it, their objects would be missing the threadprivate attribute, causing a 'Non-THREADPRIVATE object in COPYIN clause' error. Fixes https://github.com/llvm/llvm-project/issues/61200 --- flang/lib/Semantics/resolve-directives.cpp | 19 ++++++------ .../test/Semantics/OpenMP/threadprivate06.f90 | 30 +++++++++++++++++++ 2 files changed, 40 insertions(+), 9 deletions(-) create mode 100644 flang/test/Semantics/OpenMP/threadprivate06.f90 diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 7d7f1ee2d2459..7c8fdb651af9f 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -1947,18 +1947,19 @@ void OmpAttributeVisitor::ResolveOmpNameList( Symbol *OmpAttributeVisitor::ResolveOmpCommonBlockName( const parser::Name *name) { - if (auto *prev{name - ? GetContext().scope.parent().FindCommonBlock(name->source) - : nullptr}) { + if (!name) { + return nullptr; + } + // First check if the Common Block is declared in the current scope + if (auto *cur{GetContext().scope.FindCommonBlock(name->source)}) { + name->symbol = cur; + return cur; + } + // Then check parent scope + if (auto *prev{GetContext().scope.parent().FindCommonBlock(name->source)}) { name->symbol = prev; return prev; } - // Check if the Common Block is declared in the current scope - if (auto *commonBlockSymbol{ - name ? GetContext().scope.FindCommonBlock(name->source) : nullptr}) { - name->symbol = commonBlockSymbol; - return commonBlockSymbol; - } return nullptr; } diff --git a/flang/test/Semantics/OpenMP/threadprivate06.f90 b/flang/test/Semantics/OpenMP/threadprivate06.f90 new file mode 100644 index 0000000000000..f31c38f6f2b24 --- /dev/null +++ b/flang/test/Semantics/OpenMP/threadprivate06.f90 @@ -0,0 +1,30 @@ +! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp +! OpenMP Version 5.1 +! Check OpenMP construct validity for the following directives: +! 2.21.2 Threadprivate Directive + +program main + call sub1() + print *, 'pass' +end program main + +subroutine sub1() + common /c/ a + !$omp threadprivate(/c/) + integer :: a + + a = 100 + call sub2() + if (a .ne. 101) print *, 'err' + +contains + subroutine sub2() + common /c/ a + !$omp threadprivate(/c/) + integer :: a + + !$omp parallel copyin(/c/) + a = a + 1 + !$omp end parallel + end subroutine +end subroutine From c202a17d024068c70364116f2d06535d79535b30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= Date: Mon, 16 Oct 2023 14:51:05 +0200 Subject: [PATCH 219/720] [clang][analyzer] Move checker alpha.unix.StdCLibraryFunctions out of alpha. (#66207) --- clang/docs/ReleaseNotes.rst | 2 + clang/docs/analyzer/checkers.rst | 195 +++++++++--------- .../clang/StaticAnalyzer/Checkers/Checkers.td | 43 ++-- clang/test/Analysis/PR49642.c | 2 +- clang/test/Analysis/analyzer-config.c | 4 +- .../test/Analysis/analyzer-enabled-checkers.c | 1 + clang/test/Analysis/conversion.c | 4 +- .../errno-stdlibraryfunctions-notes.c | 4 +- .../test/Analysis/errno-stdlibraryfunctions.c | 4 +- .../std-c-library-functions-POSIX-lookup.c | 6 +- ...ibrary-functions-POSIX-socket-sockaddr.cpp | 6 +- .../Analysis/std-c-library-functions-POSIX.c | 12 +- ...ry-functions-arg-constraints-note-tags.cpp | 4 +- ...ibrary-functions-arg-constraints-notes.cpp | 4 +- ...functions-arg-constraints-tracking-notes.c | 2 +- .../std-c-library-functions-arg-constraints.c | 8 +- ...td-c-library-functions-arg-constraints.cpp | 2 +- ...library-functions-arg-cstring-dependency.c | 4 +- ...c-library-functions-arg-enabled-checkers.c | 10 +- .../std-c-library-functions-arg-weakdeps.c | 10 +- .../Analysis/std-c-library-functions-eof.c | 10 +- .../std-c-library-functions-inlined.c | 10 +- .../Analysis/std-c-library-functions-lookup.c | 4 +- .../std-c-library-functions-lookup.cpp | 4 +- .../std-c-library-functions-path-notes.c | 4 +- .../std-c-library-functions-restrict.c | 4 +- .../std-c-library-functions-restrict.cpp | 4 +- ...td-c-library-functions-vs-stream-checker.c | 8 +- clang/test/Analysis/std-c-library-functions.c | 12 +- .../test/Analysis/std-c-library-functions.cpp | 2 +- .../test/Analysis/std-c-library-posix-crash.c | 4 +- clang/test/Analysis/stream-errno-note.c | 4 +- clang/test/Analysis/stream-errno.c | 4 +- clang/test/Analysis/stream-noopen.c | 8 +- clang/test/Analysis/stream-note.c | 4 +- .../Analysis/stream-stdlibraryfunctionargs.c | 10 +- clang/test/Analysis/weak-dependencies.c | 2 +- 37 files changed, 214 insertions(+), 211 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 52d5b9a3f66d1..9782c123f4c93 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -662,6 +662,8 @@ Static Analyzer - Added a new checker ``core.BitwiseShift`` which reports situations where bitwise shift operators produce undefined behavior (because some operand is negative or too large). +- Move checker ``alpha.unix.StdCLibraryFunctions`` out of the ``alpha`` package + to ``unix.StdCLibraryFunctions``. - Fix false positive in mutation check when using pointer to member function. (`#66204: `_). diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index 81f333e644f31..597ffcc4a10a2 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -1016,7 +1016,7 @@ Check the size argument passed into C string functions for common erroneous patt .. _unix-cstring-NullArg: unix.cstring.NullArg (C) -""""""""""""""""""""""""" +"""""""""""""""""""""""" Check for null pointers being passed as arguments to C string functions: ``strlen, strnlen, strcpy, strncpy, strcat, strncat, strcmp, strncmp, strcasecmp, strncasecmp, wcslen, wcsnlen``. @@ -1026,6 +1026,99 @@ Check for null pointers being passed as arguments to C string functions: return strlen(0); // warn } +.. _unix-StdCLibraryFunctions: + +unix.StdCLibraryFunctions (C) +""""""""""""""""""""""""""""" +Check for calls of standard library functions that violate predefined argument +constraints. For example, according to the C standard the behavior of function +``int isalnum(int ch)`` is undefined if the value of ``ch`` is not representable +as ``unsigned char`` and is not equal to ``EOF``. + +You can think of this checker as defining restrictions (pre- and postconditions) +on standard library functions. Preconditions are checked, and when they are +violated, a warning is emitted. Postconditions are added to the analysis, e.g. +that the return value of a function is not greater than 255. Preconditions are +added to the analysis too, in the case when the affected values are not known +before the call. + +For example, if an argument to a function must be in between 0 and 255, but the +value of the argument is unknown, the analyzer will assume that it is in this +interval. Similarly, if a function mustn't be called with a null pointer and the +analyzer cannot prove that it is null, then it will assume that it is non-null. + +These are the possible checks on the values passed as function arguments: + - The argument has an allowed range (or multiple ranges) of values. The checker + can detect if a passed value is outside of the allowed range and show the + actual and allowed values. + - The argument has pointer type and is not allowed to be null pointer. Many + (but not all) standard functions can produce undefined behavior if a null + pointer is passed, these cases can be detected by the checker. + - The argument is a pointer to a memory block and the minimal size of this + buffer is determined by another argument to the function, or by + multiplication of two arguments (like at function ``fread``), or is a fixed + value (for example ``asctime_r`` requires at least a buffer of size 26). The + checker can detect if the buffer size is too small and in optimal case show + the size of the buffer and the values of the corresponding arguments. + +.. code-block:: c + + #define EOF -1 + void test_alnum_concrete(int v) { + int ret = isalnum(256); // \ + // warning: Function argument outside of allowed range + (void)ret; + } + + void buffer_size_violation(FILE *file) { + enum { BUFFER_SIZE = 1024 }; + wchar_t wbuf[BUFFER_SIZE]; + + const size_t size = sizeof(*wbuf); // 4 + const size_t nitems = sizeof(wbuf); // 4096 + + // Below we receive a warning because the 3rd parameter should be the + // number of elements to read, not the size in bytes. This case is a known + // vulnerability described by the ARR38-C SEI-CERT rule. + fread(wbuf, size, nitems, file); + } + + int test_alnum_symbolic(int x) { + int ret = isalnum(x); + // after the call, ret is assumed to be in the range [-1, 255] + + if (ret > 255) // impossible (infeasible branch) + if (x == 0) + return ret / x; // division by zero is not reported + return ret; + } + +Additionally to the argument and return value conditions, this checker also adds +state of the value ``errno`` if applicable to the analysis. Many system +functions set the ``errno`` value only if an error occurs (together with a +specific return value of the function), otherwise it becomes undefined. This +checker changes the analysis state to contain such information. This data is +used by other checkers, for example :ref:`alpha-unix-Errno`. + +**Limitations** + +The checker can not always provide notes about the values of the arguments. +Without this information it is hard to confirm if the constraint is indeed +violated. The argument values are shown if they are known constants or the value +is determined by previous (not too complicated) assumptions. + +The checker can produce false positives in cases such as if the program has +invariants not known to the analyzer engine or the bug report path contains +calls to unknown functions. In these cases the analyzer fails to detect the real +range of the argument. + +**Parameters** + +The checker models functions (and emits diagnostics) from the C standard by +default. The ``ModelPOSIX`` option enables modeling (and emit diagnostics) of +additional functions that are defined in the POSIX standard. This option is +disabled by default. + .. _osx-checkers: osx @@ -2677,101 +2770,7 @@ For a more detailed description of configuration options, please see the file. This causes potential true positive findings to be lost. alpha.unix -^^^^^^^^^^^ - -.. _alpha-unix-StdCLibraryFunctions: - -alpha.unix.StdCLibraryFunctions (C) -""""""""""""""""""""""""""""""""""" -Check for calls of standard library functions that violate predefined argument -constraints. For example, it is stated in the C standard that for the ``int -isalnum(int ch)`` function the behavior is undefined if the value of ``ch`` is -not representable as unsigned char and is not equal to ``EOF``. - -.. code-block:: c - - #define EOF -1 - void test_alnum_concrete(int v) { - int ret = isalnum(256); // \ - // warning: Function argument outside of allowed range - (void)ret; - } - - void buffer_size_violation(FILE *file) { - enum { BUFFER_SIZE = 1024 }; - wchar_t wbuf[BUFFER_SIZE]; - - const size_t size = sizeof(*wbuf); // 4 - const size_t nitems = sizeof(wbuf); // 4096 - - // Below we receive a warning because the 3rd parameter should be the - // number of elements to read, not the size in bytes. This case is a known - // vulnerability described by the ARR38-C SEI-CERT rule. - fread(wbuf, size, nitems, file); - } - -You can think of this checker as defining restrictions (pre- and postconditions) -on standard library functions. Preconditions are checked, and when they are -violated, a warning is emitted. Post conditions are added to the analysis, e.g. -that the return value must be no greater than 255. - -For example if an argument to a function must be in between 0 and 255, but the -value of the argument is unknown, the analyzer will conservatively assume that -it is in this interval. Similarly, if a function mustn't be called with a null -pointer and the null value of the argument can not be proven, the analyzer will -assume that it is non-null. - -These are the possible checks on the values passed as function arguments: - - The argument has an allowed range (or multiple ranges) of values. The checker - can detect if a passed value is outside of the allowed range and show the - actual and allowed values. - - The argument has pointer type and is not allowed to be null pointer. Many - (but not all) standard functions can produce undefined behavior if a null - pointer is passed, these cases can be detected by the checker. - - The argument is a pointer to a memory block and the minimal size of this - buffer is determined by another argument to the function, or by - multiplication of two arguments (like at function ``fread``), or is a fixed - value (for example ``asctime_r`` requires at least a buffer of size 26). The - checker can detect if the buffer size is too small and in optimal case show - the size of the buffer and the values of the corresponding arguments. - -.. code-block:: c - - int test_alnum_symbolic(int x) { - int ret = isalnum(x); - // after the call, ret is assumed to be in the range [-1, 255] - - if (ret > 255) // impossible (infeasible branch) - if (x == 0) - return ret / x; // division by zero is not reported - return ret; - } - -Additionally to the argument and return value conditions, this checker also adds -state of the value ``errno`` if applicable to the analysis. Many system -functions set the ``errno`` value only if an error occurs (together with a -specific return value of the function), otherwise it becomes undefined. This -checker changes the analysis state to contain such information. This data is -used by other checkers, for example :ref:`alpha-unix-Errno`. - -**Limitations** - -The checker can not always provide notes about the values of the arguments. -Without this information it is hard to confirm if the constraint is indeed -violated. The argument values are shown if they are known constants or the value -is determined by previous (not too complicated) assumptions. - -The checker can produce false positives in cases such as if the program has -invariants not known to the analyzer engine or the bug report path contains -calls to unknown functions. In these cases the analyzer fails to detect the real -range of the argument. - -**Parameters** - -The checker models functions (and emits diagnostics) from the C standard by -default. The ``ModelPOSIX`` option enables modeling (and emit diagnostics) of -additional functions that are defined in the POSIX standard. This option is -disabled by default. +^^^^^^^^^^ .. _alpha-unix-BlockInCriticalSection: @@ -2840,9 +2839,9 @@ pages of the functions and in the `POSIX standard , Dependencies<[DynamicMemoryModeling]>, Documentation; +def StdCLibraryFunctionsChecker : Checker<"StdCLibraryFunctions">, + HelpText<"Check for invalid arguments of C standard library functions, " + "and apply relations between arguments and return value">, + CheckerOptions<[ + CmdLineOption, + CmdLineOption + ]>, + WeakDependencies<[CallAndMessageChecker, NonNullParamChecker]>, + Documentation; + def VforkChecker : Checker<"Vfork">, HelpText<"Check for proper usage of vfork">, Documentation; @@ -574,27 +595,6 @@ def BlockInCriticalSectionChecker : Checker<"BlockInCriticalSection">, HelpText<"Check for calls to blocking functions inside a critical section">, Documentation; -def StdCLibraryFunctionsChecker : Checker<"StdCLibraryFunctions">, - HelpText<"Check for invalid arguments of C standard library functions, " - "and apply relations between arguments and return value">, - CheckerOptions<[ - CmdLineOption, - CmdLineOption - ]>, - WeakDependencies<[CallAndMessageChecker, NonNullParamChecker, StreamChecker]>, - Documentation; - } // end "alpha.unix" //===----------------------------------------------------------------------===// @@ -1627,6 +1627,7 @@ def DebugIteratorModeling : Checker<"DebugIteratorModeling">, def StdCLibraryFunctionsTesterChecker : Checker<"StdCLibraryFunctionsTester">, HelpText<"Add test functions to the summary map, so testing of individual " "summary constituents becomes possible.">, + WeakDependencies<[StdCLibraryFunctionsChecker]>, Documentation; } // end "debug" diff --git a/clang/test/Analysis/PR49642.c b/clang/test/Analysis/PR49642.c index c21050fd4a5c8..78bbde79d8300 100644 --- a/clang/test/Analysis/PR49642.c +++ b/clang/test/Analysis/PR49642.c @@ -1,6 +1,6 @@ // RUN: %clang_analyze_cc1 -Wno-implicit-function-declaration -Wno-implicit-int -w -verify %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions +// RUN: -analyzer-checker=unix.StdCLibraryFunctions // expected-no-diagnostics diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c index d86ca5d19219c..794ef8b9cc086 100644 --- a/clang/test/Analysis/analyzer-config.c +++ b/clang/test/Analysis/analyzer-config.c @@ -13,8 +13,6 @@ // CHECK-NEXT: alpha.security.MmapWriteExec:MmapProtRead = 0x01 // CHECK-NEXT: alpha.security.taint.TaintPropagation:Config = "" // CHECK-NEXT: alpha.unix.Errno:AllowErrnoReadOutsideConditionExpressions = true -// CHECK-NEXT: alpha.unix.StdCLibraryFunctions:DisplayLoadedSummaries = false -// CHECK-NEXT: alpha.unix.StdCLibraryFunctions:ModelPOSIX = false // CHECK-NEXT: apply-fixits = false // CHECK-NEXT: assume-controlled-environment = false // CHECK-NEXT: avoid-suppressing-null-argument-paths = false @@ -129,6 +127,8 @@ // CHECK-NEXT: track-conditions-debug = false // CHECK-NEXT: unix.DynamicMemoryModeling:AddNoOwnershipChangeNotes = true // CHECK-NEXT: unix.DynamicMemoryModeling:Optimistic = false +// CHECK-NEXT: unix.StdCLibraryFunctions:DisplayLoadedSummaries = false +// CHECK-NEXT: unix.StdCLibraryFunctions:ModelPOSIX = false // CHECK-NEXT: unroll-loops = false // CHECK-NEXT: verbose-report-filename = false // CHECK-NEXT: widen-loops = false diff --git a/clang/test/Analysis/analyzer-enabled-checkers.c b/clang/test/Analysis/analyzer-enabled-checkers.c index ed8334b9e2db0..cf69a6b04c979 100644 --- a/clang/test/Analysis/analyzer-enabled-checkers.c +++ b/clang/test/Analysis/analyzer-enabled-checkers.c @@ -47,6 +47,7 @@ // CHECK-NEXT: unix.Malloc // CHECK-NEXT: unix.MallocSizeof // CHECK-NEXT: unix.MismatchedDeallocator +// CHECK-NEXT: unix.StdCLibraryFunctions // CHECK-NEXT: unix.Vfork // CHECK-NEXT: unix.cstring.BadSizeArg // CHECK-NEXT: unix.cstring.NullArg diff --git a/clang/test/Analysis/conversion.c b/clang/test/Analysis/conversion.c index 0d2e005550b16..cafe9c37c2402 100644 --- a/clang/test/Analysis/conversion.c +++ b/clang/test/Analysis/conversion.c @@ -1,6 +1,6 @@ // RUN: %clang_analyze_cc1 %s \ // RUN: -Wno-conversion -Wno-tautological-constant-compare \ -// RUN: -analyzer-checker=core,apiModeling,alpha.unix.StdCLibraryFunctions,alpha.core.Conversion \ +// RUN: -analyzer-checker=core,apiModeling,unix.StdCLibraryFunctions,alpha.core.Conversion \ // RUN: -verify unsigned char U8; @@ -187,7 +187,7 @@ char dontwarn10(long long x) { } -// C library functions, handled via alpha.unix.StdCLibraryFunctions +// C library functions, handled via unix.StdCLibraryFunctions int isascii(int c); void libraryFunction1(void) { diff --git a/clang/test/Analysis/errno-stdlibraryfunctions-notes.c b/clang/test/Analysis/errno-stdlibraryfunctions-notes.c index 991384cc373ef..c3fac58c46b37 100644 --- a/clang/test/Analysis/errno-stdlibraryfunctions-notes.c +++ b/clang/test/Analysis/errno-stdlibraryfunctions-notes.c @@ -1,10 +1,10 @@ // RUN: %clang_analyze_cc1 -verify -analyzer-output text %s \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=debug.ExprInspection \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ // RUN: -analyzer-checker=apiModeling.Errno \ // RUN: -analyzer-checker=alpha.unix.Errno \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true #include "Inputs/errno_var.h" diff --git a/clang/test/Analysis/errno-stdlibraryfunctions.c b/clang/test/Analysis/errno-stdlibraryfunctions.c index a3b42f4425c35..fce5e5d6b0a47 100644 --- a/clang/test/Analysis/errno-stdlibraryfunctions.c +++ b/clang/test/Analysis/errno-stdlibraryfunctions.c @@ -1,10 +1,10 @@ // RUN: %clang_analyze_cc1 -verify %s \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=debug.ExprInspection \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ // RUN: -analyzer-checker=apiModeling.Errno \ // RUN: -analyzer-checker=alpha.unix.Errno \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true #include "Inputs/errno_var.h" diff --git a/clang/test/Analysis/std-c-library-functions-POSIX-lookup.c b/clang/test/Analysis/std-c-library-functions-POSIX-lookup.c index 22f752fee0ece..5338fa092d9d2 100644 --- a/clang/test/Analysis/std-c-library-functions-POSIX-lookup.c +++ b/clang/test/Analysis/std-c-library-functions-POSIX-lookup.c @@ -1,8 +1,8 @@ // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false \ // RUN: -triple i686-unknown-linux 2>&1 | FileCheck %s --allow-empty diff --git a/clang/test/Analysis/std-c-library-functions-POSIX-socket-sockaddr.cpp b/clang/test/Analysis/std-c-library-functions-POSIX-socket-sockaddr.cpp index c835b80960c39..8aa370287562a 100644 --- a/clang/test/Analysis/std-c-library-functions-POSIX-socket-sockaddr.cpp +++ b/clang/test/Analysis/std-c-library-functions-POSIX-socket-sockaddr.cpp @@ -1,8 +1,8 @@ // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false \ // RUN: -triple i686-unknown-linux 2>&1 | FileCheck %s diff --git a/clang/test/Analysis/std-c-library-functions-POSIX.c b/clang/test/Analysis/std-c-library-functions-POSIX.c index 870af4f86c27f..84ce0f21e569f 100644 --- a/clang/test/Analysis/std-c-library-functions-POSIX.c +++ b/clang/test/Analysis/std-c-library-functions-POSIX.c @@ -1,17 +1,17 @@ // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false \ // RUN: -triple i686-unknown-linux -verify // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false \ // RUN: -triple i686-unknown-linux 2>&1 | FileCheck %s diff --git a/clang/test/Analysis/std-c-library-functions-arg-constraints-note-tags.cpp b/clang/test/Analysis/std-c-library-functions-arg-constraints-note-tags.cpp index 573b0076a0e73..7eea4512898e6 100644 --- a/clang/test/Analysis/std-c-library-functions-arg-constraints-note-tags.cpp +++ b/clang/test/Analysis/std-c-library-functions-arg-constraints-note-tags.cpp @@ -1,8 +1,8 @@ // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ // RUN: -analyzer-checker=debug.StdCLibraryFunctionsTester \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false \ // RUN: -triple i686-unknown-linux \ diff --git a/clang/test/Analysis/std-c-library-functions-arg-constraints-notes.cpp b/clang/test/Analysis/std-c-library-functions-arg-constraints-notes.cpp index 781b96d53103a..f30f977bcd1dd 100644 --- a/clang/test/Analysis/std-c-library-functions-arg-constraints-notes.cpp +++ b/clang/test/Analysis/std-c-library-functions-arg-constraints-notes.cpp @@ -1,8 +1,8 @@ // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ // RUN: -analyzer-checker=debug.StdCLibraryFunctionsTester \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false \ // RUN: -triple i686-unknown-linux \ diff --git a/clang/test/Analysis/std-c-library-functions-arg-constraints-tracking-notes.c b/clang/test/Analysis/std-c-library-functions-arg-constraints-tracking-notes.c index d497b87c48473..0a66e49be9b2a 100644 --- a/clang/test/Analysis/std-c-library-functions-arg-constraints-tracking-notes.c +++ b/clang/test/Analysis/std-c-library-functions-arg-constraints-tracking-notes.c @@ -1,7 +1,7 @@ // Check the bugpath related to the reports. // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ // RUN: -analyzer-checker=debug.StdCLibraryFunctionsTester \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -triple x86_64-unknown-linux-gnu \ diff --git a/clang/test/Analysis/std-c-library-functions-arg-constraints.c b/clang/test/Analysis/std-c-library-functions-arg-constraints.c index 062faccfb63cd..0b817dda98c72 100644 --- a/clang/test/Analysis/std-c-library-functions-arg-constraints.c +++ b/clang/test/Analysis/std-c-library-functions-arg-constraints.c @@ -1,8 +1,8 @@ // Check the basic reporting/warning and the application of constraints. // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true \ // RUN: -analyzer-checker=debug.StdCLibraryFunctionsTester \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -triple x86_64-unknown-linux-gnu \ @@ -11,8 +11,8 @@ // Check the bugpath related to the reports. // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true \ // RUN: -analyzer-checker=debug.StdCLibraryFunctionsTester \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -triple x86_64-unknown-linux-gnu \ diff --git a/clang/test/Analysis/std-c-library-functions-arg-constraints.cpp b/clang/test/Analysis/std-c-library-functions-arg-constraints.cpp index 80a680eb55842..037b5d9ad9520 100644 --- a/clang/test/Analysis/std-c-library-functions-arg-constraints.cpp +++ b/clang/test/Analysis/std-c-library-functions-arg-constraints.cpp @@ -1,6 +1,6 @@ // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ // RUN: -analyzer-checker=debug.StdCLibraryFunctionsTester \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false \ diff --git a/clang/test/Analysis/std-c-library-functions-arg-cstring-dependency.c b/clang/test/Analysis/std-c-library-functions-arg-cstring-dependency.c index 5ebb07e524753..2fa15c00cb600 100644 --- a/clang/test/Analysis/std-c-library-functions-arg-cstring-dependency.c +++ b/clang/test/Analysis/std-c-library-functions-arg-cstring-dependency.c @@ -5,9 +5,9 @@ // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ // RUN: -analyzer-checker=unix.cstring.NullArg \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true \ // RUN: -triple x86_64-unknown-linux-gnu \ // RUN: -verify diff --git a/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c b/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c index 40fb4a734fe77..7f5bfba6ff568 100644 --- a/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c +++ b/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c @@ -3,9 +3,9 @@ // RUN: %clang --analyze %s --target=x86_64-pc-linux-gnu \ // RUN: -Xclang -analyzer-checker=core \ -// RUN: -Xclang -analyzer-checker=alpha.unix.StdCLibraryFunctions \ +// RUN: -Xclang -analyzer-checker=unix.StdCLibraryFunctions \ // RUN: -Xclang -analyzer-config \ -// RUN: -Xclang alpha.unix.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -Xclang unix.StdCLibraryFunctions:ModelPOSIX=true \ // RUN: -Xclang -analyzer-checker=alpha.unix.Stream \ // RUN: -Xclang -analyzer-list-enabled-checkers \ // RUN: -Xclang -analyzer-display-progress \ @@ -14,17 +14,16 @@ // CHECK: OVERVIEW: Clang Static Analyzer Enabled Checkers List // CHECK-EMPTY: -// CHECK-NEXT: core.CallAndMessageModeling -// CHECK-NEXT: core.CallAndMessage // CHECK-NEXT: core.NonNullParamChecker // CHECK-NEXT: alpha.unix.Stream -// CHECK-NEXT: alpha.unix.StdCLibraryFunctions // CHECK-NEXT: apiModeling.Errno // CHECK-NEXT: apiModeling.TrustNonnull // CHECK-NEXT: apiModeling.TrustReturnsNonnull // CHECK-NEXT: apiModeling.llvm.CastValue // CHECK-NEXT: apiModeling.llvm.ReturnValue // CHECK-NEXT: core.BitwiseShift +// CHECK-NEXT: core.CallAndMessageModeling +// CHECK-NEXT: core.CallAndMessage // CHECK-NEXT: core.DivideZero // CHECK-NEXT: core.DynamicTypePropagation // CHECK-NEXT: core.NonnilStringConstants @@ -57,6 +56,7 @@ // CHECK-NEXT: unix.Malloc // CHECK-NEXT: unix.MallocSizeof // CHECK-NEXT: unix.MismatchedDeallocator +// CHECK-NEXT: unix.StdCLibraryFunctions // CHECK-NEXT: unix.Vfork // CHECK-NEXT: unix.cstring.BadSizeArg // CHECK-NEXT: unix.cstring.NullArg diff --git a/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c b/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c index 87f07a2d90a14..5df5a770015b5 100644 --- a/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c +++ b/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c @@ -4,8 +4,8 @@ // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=alpha.unix.Stream \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true \ // RUN: -triple x86_64-unknown-linux-gnu \ // RUN: -verify @@ -14,9 +14,9 @@ // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ // RUN: -triple x86_64-unknown-linux 2>&1 | FileCheck %s // CHECK: Loaded summary for: int isalnum(int) diff --git a/clang/test/Analysis/std-c-library-functions-eof.c b/clang/test/Analysis/std-c-library-functions-eof.c index 0050bf2d9bee2..0fadf73436ac7 100644 --- a/clang/test/Analysis/std-c-library-functions-eof.c +++ b/clang/test/Analysis/std-c-library-functions-eof.c @@ -1,8 +1,8 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.StdCLibraryFunctions,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s -// RUN: %clang_analyze_cc1 -triple i686-unknown-linux -analyzer-checker=core,alpha.unix.StdCLibraryFunctions,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s -// RUN: %clang_analyze_cc1 -triple x86_64-unknown-linux -analyzer-checker=core,alpha.unix.StdCLibraryFunctions,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s -// RUN: %clang_analyze_cc1 -triple armv7-a15-linux -analyzer-checker=core,alpha.unix.StdCLibraryFunctions,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s -// RUN: %clang_analyze_cc1 -triple thumbv7-a15-linux -analyzer-checker=core,alpha.unix.StdCLibraryFunctions,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.StdCLibraryFunctions,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s +// RUN: %clang_analyze_cc1 -triple i686-unknown-linux -analyzer-checker=core,unix.StdCLibraryFunctions,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s +// RUN: %clang_analyze_cc1 -triple x86_64-unknown-linux -analyzer-checker=core,unix.StdCLibraryFunctions,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s +// RUN: %clang_analyze_cc1 -triple armv7-a15-linux -analyzer-checker=core,unix.StdCLibraryFunctions,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s +// RUN: %clang_analyze_cc1 -triple thumbv7-a15-linux -analyzer-checker=core,unix.StdCLibraryFunctions,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s void clang_analyzer_eval(int); diff --git a/clang/test/Analysis/std-c-library-functions-inlined.c b/clang/test/Analysis/std-c-library-functions-inlined.c index e40f5204f6321..5277a6efbe079 100644 --- a/clang/test/Analysis/std-c-library-functions-inlined.c +++ b/clang/test/Analysis/std-c-library-functions-inlined.c @@ -1,8 +1,8 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.unix.StdCLibraryFunctions -verify %s -// RUN: %clang_analyze_cc1 -triple i686-unknown-linux -analyzer-checker=alpha.unix.StdCLibraryFunctions -verify %s -// RUN: %clang_analyze_cc1 -triple x86_64-unknown-linux -analyzer-checker=alpha.unix.StdCLibraryFunctions -verify %s -// RUN: %clang_analyze_cc1 -triple armv7-a15-linux -analyzer-checker=alpha.unix.StdCLibraryFunctions -verify %s -// RUN: %clang_analyze_cc1 -triple thumbv7-a15-linux -analyzer-checker=alpha.unix.StdCLibraryFunctions -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=unix.StdCLibraryFunctions -verify %s +// RUN: %clang_analyze_cc1 -triple i686-unknown-linux -analyzer-checker=unix.StdCLibraryFunctions -verify %s +// RUN: %clang_analyze_cc1 -triple x86_64-unknown-linux -analyzer-checker=unix.StdCLibraryFunctions -verify %s +// RUN: %clang_analyze_cc1 -triple armv7-a15-linux -analyzer-checker=unix.StdCLibraryFunctions -verify %s +// RUN: %clang_analyze_cc1 -triple thumbv7-a15-linux -analyzer-checker=unix.StdCLibraryFunctions -verify %s // This test tests crashes that occur when standard functions are available // for inlining. diff --git a/clang/test/Analysis/std-c-library-functions-lookup.c b/clang/test/Analysis/std-c-library-functions-lookup.c index 7032dca1b8baa..e47d9bddda91b 100644 --- a/clang/test/Analysis/std-c-library-functions-lookup.c +++ b/clang/test/Analysis/std-c-library-functions-lookup.c @@ -1,7 +1,7 @@ // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false \ // RUN: -triple i686-unknown-linux 2>&1 | FileCheck %s diff --git a/clang/test/Analysis/std-c-library-functions-lookup.cpp b/clang/test/Analysis/std-c-library-functions-lookup.cpp index 22778b2fdefbd..9480b88bec78d 100644 --- a/clang/test/Analysis/std-c-library-functions-lookup.cpp +++ b/clang/test/Analysis/std-c-library-functions-lookup.cpp @@ -1,7 +1,7 @@ // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false \ // RUN: -triple i686-unknown-linux 2>&1 | FileCheck %s diff --git a/clang/test/Analysis/std-c-library-functions-path-notes.c b/clang/test/Analysis/std-c-library-functions-path-notes.c index 6b5d1d7bd4eb9..d0957483c1391 100644 --- a/clang/test/Analysis/std-c-library-functions-path-notes.c +++ b/clang/test/Analysis/std-c-library-functions-path-notes.c @@ -1,6 +1,6 @@ // RUN: %clang_analyze_cc1 -verify %s \ -// RUN: -analyzer-checker=core,alpha.unix.StdCLibraryFunctions \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-checker=core,unix.StdCLibraryFunctions \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true \ // RUN: -analyzer-output=text #include "Inputs/std-c-library-functions-POSIX.h" diff --git a/clang/test/Analysis/std-c-library-functions-restrict.c b/clang/test/Analysis/std-c-library-functions-restrict.c index 6260f851cdfa5..27e223c6e5b2f 100644 --- a/clang/test/Analysis/std-c-library-functions-restrict.c +++ b/clang/test/Analysis/std-c-library-functions-restrict.c @@ -1,8 +1,8 @@ // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ // RUN: -analyzer-checker=debug.StdCLibraryFunctionsTester \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ // RUN: -triple i686-unknown-linux 2>&1 | FileCheck %s // The signatures for these functions are the same and they specify their diff --git a/clang/test/Analysis/std-c-library-functions-restrict.cpp b/clang/test/Analysis/std-c-library-functions-restrict.cpp index e431b14b19525..8954ab48862ae 100644 --- a/clang/test/Analysis/std-c-library-functions-restrict.cpp +++ b/clang/test/Analysis/std-c-library-functions-restrict.cpp @@ -1,8 +1,8 @@ // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ // RUN: -analyzer-checker=debug.StdCLibraryFunctionsTester \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ // RUN: -triple i686-unknown-linux 2>&1 | FileCheck %s // The signatures for these functions are the same and they specify their diff --git a/clang/test/Analysis/std-c-library-functions-vs-stream-checker.c b/clang/test/Analysis/std-c-library-functions-vs-stream-checker.c index 4df46207da70d..281fbaaffe703 100644 --- a/clang/test/Analysis/std-c-library-functions-vs-stream-checker.c +++ b/clang/test/Analysis/std-c-library-functions-vs-stream-checker.c @@ -8,8 +8,8 @@ // Check the case when only the StdLibraryFunctionsChecker is enabled. // RUN: %clang_analyze_cc1 %s \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false \ // RUN: -triple x86_64-unknown-linux \ @@ -19,8 +19,8 @@ // StdLibraryFunctionsChecker are enabled. // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core,alpha.unix.Stream \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false \ // RUN: -triple x86_64-unknown-linux \ diff --git a/clang/test/Analysis/std-c-library-functions.c b/clang/test/Analysis/std-c-library-functions.c index 392784722d385..b7eb6b284460e 100644 --- a/clang/test/Analysis/std-c-library-functions.c +++ b/clang/test/Analysis/std-c-library-functions.c @@ -1,6 +1,6 @@ // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false \ // RUN: -triple i686-unknown-linux \ @@ -8,7 +8,7 @@ // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false \ // RUN: -triple x86_64-unknown-linux \ @@ -16,7 +16,7 @@ // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false \ // RUN: -triple armv7-a15-linux \ @@ -24,7 +24,7 @@ // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false \ // RUN: -triple thumbv7-a15-linux \ @@ -32,8 +32,8 @@ // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false \ // RUN: -triple i686-unknown-linux 2>&1 | FileCheck %s diff --git a/clang/test/Analysis/std-c-library-functions.cpp b/clang/test/Analysis/std-c-library-functions.cpp index 2da01d6351997..00b341af5f922 100644 --- a/clang/test/Analysis/std-c-library-functions.cpp +++ b/clang/test/Analysis/std-c-library-functions.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -triple x86_64-unknown-linux -analyzer-checker=alpha.unix.StdCLibraryFunctions,debug.ExprInspection -verify %s +// RUN: %clang_analyze_cc1 -triple x86_64-unknown-linux -analyzer-checker=unix.StdCLibraryFunctions,debug.ExprInspection -verify %s // Test that we don't model functions with broken prototypes. // Because they probably work differently as well. diff --git a/clang/test/Analysis/std-c-library-posix-crash.c b/clang/test/Analysis/std-c-library-posix-crash.c index 66e7bf4656b34..68ad771aa997d 100644 --- a/clang/test/Analysis/std-c-library-posix-crash.c +++ b/clang/test/Analysis/std-c-library-posix-crash.c @@ -1,6 +1,6 @@ // RUN: %clang_analyze_cc1 \ -// RUN: -analyzer-checker=core,alpha.unix.StdCLibraryFunctions \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-checker=core,unix.StdCLibraryFunctions \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true \ // RUN: -verify %s // // expected-no-diagnostics diff --git a/clang/test/Analysis/stream-errno-note.c b/clang/test/Analysis/stream-errno-note.c index 4ab215a64539d..32d9d4fd9689d 100644 --- a/clang/test/Analysis/stream-errno-note.c +++ b/clang/test/Analysis/stream-errno-note.c @@ -1,8 +1,8 @@ // RUN: %clang_analyze_cc1 -analyzer-checker=core \ // RUN: -analyzer-checker=alpha.unix.Stream \ // RUN: -analyzer-checker=alpha.unix.Errno \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true \ // RUN: -analyzer-output text -verify %s #include "Inputs/system-header-simulator.h" diff --git a/clang/test/Analysis/stream-errno.c b/clang/test/Analysis/stream-errno.c index d8c0c8223ad20..cf4e2e3d781d9 100644 --- a/clang/test/Analysis/stream-errno.c +++ b/clang/test/Analysis/stream-errno.c @@ -1,5 +1,5 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream,alpha.unix.Errno,alpha.unix.StdCLibraryFunctions,debug.ExprInspection \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream,alpha.unix.Errno,unix.StdCLibraryFunctions,debug.ExprInspection \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true -verify %s #include "Inputs/system-header-simulator.h" #include "Inputs/errno_func.h" diff --git a/clang/test/Analysis/stream-noopen.c b/clang/test/Analysis/stream-noopen.c index 03784603d9fcc..cbeac276fdee2 100644 --- a/clang/test/Analysis/stream-noopen.c +++ b/clang/test/Analysis/stream-noopen.c @@ -2,16 +2,16 @@ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=alpha.unix.Errno \ // RUN: -analyzer-checker=alpha.unix.Stream \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true \ // RUN: -analyzer-checker=debug.ExprInspection // enable only StdCLibraryFunctions checker // RUN: %clang_analyze_cc1 -verify %s \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=alpha.unix.Errno \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true \ // RUN: -analyzer-checker=debug.ExprInspection #include "Inputs/system-header-simulator.h" diff --git a/clang/test/Analysis/stream-note.c b/clang/test/Analysis/stream-note.c index 257245754dadd..b9fdc16b19e55 100644 --- a/clang/test/Analysis/stream-note.c +++ b/clang/test/Analysis/stream-note.c @@ -1,7 +1,7 @@ // RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream -analyzer-output text \ // RUN: -verify %s -// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream,alpha.unix.StdCLibraryFunctions -analyzer-output text \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true -verify=expected,stdargs %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream,unix.StdCLibraryFunctions -analyzer-output text \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true -verify=expected,stdargs %s #include "Inputs/system-header-simulator.h" diff --git a/clang/test/Analysis/stream-stdlibraryfunctionargs.c b/clang/test/Analysis/stream-stdlibraryfunctionargs.c index a14befde51038..938901ec08829 100644 --- a/clang/test/Analysis/stream-stdlibraryfunctionargs.c +++ b/clang/test/Analysis/stream-stdlibraryfunctionargs.c @@ -1,11 +1,11 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream,alpha.unix.StdCLibraryFunctions,debug.ExprInspection \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true -verify=stream,any %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream,unix.StdCLibraryFunctions,debug.ExprInspection \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true -verify=stream,any %s // RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true -verify=stream,any %s +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true -verify=stream,any %s -// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.StdCLibraryFunctions,debug.ExprInspection \ -// RUN: -analyzer-config alpha.unix.StdCLibraryFunctions:ModelPOSIX=true -verify=stdfunc,any %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.StdCLibraryFunctions,debug.ExprInspection \ +// RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true -verify=stdfunc,any %s #include "Inputs/system-header-simulator.h" diff --git a/clang/test/Analysis/weak-dependencies.c b/clang/test/Analysis/weak-dependencies.c index 9946af8f4dfae..9d4b7b6defb3c 100644 --- a/clang/test/Analysis/weak-dependencies.c +++ b/clang/test/Analysis/weak-dependencies.c @@ -1,5 +1,5 @@ // RUN: %clang_analyze_cc1 %s -verify \ -// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctions \ +// RUN: -analyzer-checker=unix.StdCLibraryFunctions \ // RUN: -analyzer-checker=core typedef __typeof(sizeof(int)) size_t; From 4acb96c99f3b9c414f403f6e1ab2b317851abf0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Pettersson?= Date: Mon, 16 Oct 2023 14:53:53 +0200 Subject: [PATCH 220/720] [SelectionDAG] Tidy up around endianness and isConstantSplat (#68212) The BuildVectorSDNode::isConstantSplat function could depend on endianness, and it takes a bool argument that can be used to indicate if big or little endian should be considered when internally casting from a vector to a scalar. However, that argument is default set to false (= little endian). And in many situations, even in target generic code such as DAGCombiner, the endianness isn't specified when using the function. The intent with this patch is to highlight that endianness doesn't matter, depending on the context in which the function is used. In DAGCombiner the code is slightly refactored. Back in the days when the code was written it wasn't possible to request a MinSplatBits size when calling isConstantSplat. Instead the code re-expanded the found SplatValue to match with the EltBitWidth. Now we can just provide EltBitWidth as MinSplatBits and remove the logic for doing the re-expand. While being at it, tidying up around isConstantSplat, this patch also adds an explicit check in BuildVectorSDNode::isConstantSplat to break out from the loop if trying to split an on VecWidth into two halves. Haven't been able to prove that there could be miscompiles involved if not doing so. There are lit tests that trigger that scenario, although I think they happen to later discard the returned SplatValue for other reasons. --- llvm/docs/LangRef.rst | 10 ++--- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 37 +++++++++---------- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 17 ++++++++- .../WebAssembly/WebAssemblyISelLowering.cpp | 2 + 4 files changed, 40 insertions(+), 26 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 35123474381e7..ee893d8e384b6 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3888,7 +3888,7 @@ integer to memory. A bitcast from a vector type to a scalar integer type will see the elements being packed together (without padding). The order in which elements are -inserted in the integer depends on endianess. For little endian element zero +inserted in the integer depends on endianness. For little endian element zero is put in the least significant bits of the integer, and for big endian element zero is put in the most significant bits. @@ -11677,7 +11677,7 @@ To convert pointers to other types, use the :ref:`inttoptr ` or :ref:`ptrtoint ` instructions first. There is a caveat for bitcasts involving vector types in relation to -endianess. For example ``bitcast <2 x i8> to i16`` puts element zero +endianness. For example ``bitcast <2 x i8> to i16`` puts element zero of the vector in the least significant bits of the i16 for little-endian while element zero ends up in the most significant bits for big-endian. @@ -11686,9 +11686,9 @@ Example: .. code-block:: text - %X = bitcast i8 255 to i8 ; yields i8 :-1 - %Y = bitcast i32* %x to i16* ; yields i16*:%x - %Z = bitcast <2 x i32> %V to i64; ; yields i64: %V (depends on endianess) + %X = bitcast i8 255 to i8 ; yields i8 :-1 + %Y = bitcast i32* %x to i16* ; yields i16*:%x + %Z = bitcast <2 x i32> %V to i64; ; yields i64: %V (depends on endianness) %Z = bitcast <2 x i32*> %V to <2 x i64*> ; yields <2 x i64*> .. _i_addrspacecast: diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 73438113651f5..20ad4c766a1a3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7076,12 +7076,23 @@ SDValue DAGCombiner::visitAND(SDNode *N) { N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) { Constant = C->getAPIntValue(); } else if (BuildVectorSDNode *Vector = dyn_cast(N1)) { + unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits(); APInt SplatValue, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; - bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, - SplatBitSize, HasAnyUndefs); - if (IsSplat) { + // Endianness should not matter here. Code below makes sure that we only + // use the result if the SplatBitSize is a multiple of the vector element + // size. And after that we AND all element sized parts of the splat + // together. So the end result should be the same regardless of in which + // order we do those operations. + const bool IsBigEndian = false; + bool IsSplat = + Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs, EltBitWidth, IsBigEndian); + + // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a + // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. + if (IsSplat && (SplatBitSize % EltBitWidth) == 0) { // Undef bits can contribute to a possible optimisation if set, so // set them. SplatValue |= SplatUndef; @@ -7090,23 +7101,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // the first vector value and FF for the rest, repeating. We need a mask // that will apply equally to all members of the vector, so AND all the // lanes of the constant together. - unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits(); - - // If the splat value has been compressed to a bitlength lower - // than the size of the vector lane, we need to re-expand it to - // the lane size. - if (EltBitWidth > SplatBitSize) - for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth); - SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2) - SplatValue |= SplatValue.shl(SplatBitSize); - - // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a - // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. - if ((SplatBitSize % EltBitWidth) == 0) { - Constant = APInt::getAllOnes(EltBitWidth); - for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i) - Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth); - } + Constant = APInt::getAllOnes(EltBitWidth); + for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i) + Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index e831316efff52..3f06d0bd4eaa1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -161,8 +161,13 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { unsigned SplatBitSize; bool HasUndefs; unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); + // Endianness does not matter here. We are checking for a splat given the + // element size of the vector, and if we find such a splat for little endian + // layout, then that should be valid also for big endian (as the full vector + // size is known to be a multiple of the element size). + const bool IsBigEndian = false; return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs, - EltSize) && + EltSize, IsBigEndian) && EltSize == SplatBitSize; } @@ -12357,6 +12362,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, // FIXME: This does not work for vectors with elements less than 8 bits. while (VecWidth > 8) { + // If we can't split in half, stop here. + if (VecWidth & 1) + break; + unsigned HalfSize = VecWidth / 2; APInt HighValue = SplatValue.extractBits(HalfSize, HalfSize); APInt LowValue = SplatValue.extractBits(HalfSize, 0); @@ -12374,6 +12383,12 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, VecWidth = HalfSize; } + // FIXME: The loop above only tries to split in halves. But if the input + // vector for example is <3 x i16> it wouldn't be able to detect a + // SplatBitSize of 16. No idea if that is a design flaw currently limiting + // optimizations. I guess that back in the days when this helper was created + // vectors normally was power-of-2 sized. + SplatBitSize = VecWidth; return true; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 61cfcdc914cdb..70629b2a50a98 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -2576,6 +2576,8 @@ performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { APInt SplatValue, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; + // Endianness doesn't matter in this context because we are looking for + // an all-zero value. return Splat && Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs) && From 7ac516a119a36a0f26c0d617fe67b5291eb2cd61 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Mon, 16 Oct 2023 13:59:49 +0100 Subject: [PATCH 221/720] [amdgpu] Disable openmp test that is blocking CI after changing hardware, need to diagnose memory fault --- openmp/libomptarget/test/offloading/target_critical_region.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/openmp/libomptarget/test/offloading/target_critical_region.cpp b/openmp/libomptarget/test/offloading/target_critical_region.cpp index 533d290b4d32a..9a741bef6c591 100644 --- a/openmp/libomptarget/test/offloading/target_critical_region.cpp +++ b/openmp/libomptarget/test/offloading/target_critical_region.cpp @@ -6,6 +6,7 @@ // UNSUPPORTED: nvptx64-nvidia-cuda-LTO // UNSUPPORTED: x86_64-pc-linux-gnu // UNSUPPORTED: x86_64-pc-linux-gnu-LTO +// UNSUPPORTED: amdgcn-amd-amdhsa #include #include From f41ec27f7eba34548a280a4a4d7de2ef32837210 Mon Sep 17 00:00:00 2001 From: Kiran Chandramohan Date: Mon, 16 Oct 2023 12:22:59 +0000 Subject: [PATCH 222/720] [Flang][OpenMP] Port atomic read, write tests to HLFIR flow These are copies of tests from flang/test/Lower/OpenMP/FIR --- flang/test/Lower/OpenMP/atomic-read.f90 | 89 ++++++++++++++++++++++++ flang/test/Lower/OpenMP/atomic-write.f90 | 73 +++++++++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 flang/test/Lower/OpenMP/atomic-read.f90 create mode 100644 flang/test/Lower/OpenMP/atomic-write.f90 diff --git a/flang/test/Lower/OpenMP/atomic-read.f90 b/flang/test/Lower/OpenMP/atomic-read.f90 new file mode 100644 index 0000000000000..97a3777bd3dca --- /dev/null +++ b/flang/test/Lower/OpenMP/atomic-read.f90 @@ -0,0 +1,89 @@ +! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s + +! This test checks the lowering of atomic read + +!CHECK: func @_QQmain() attributes {fir.bindc_name = "ompatomic"} { +!CHECK: %[[A_C1:.*]] = arith.constant 1 : index +!CHECK: %[[A_REF:.*]] = fir.alloca !fir.char<1> {bindc_name = "a", uniq_name = "_QFEa"} +!CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %[[A_REF]] typeparams %[[A_C1]] {uniq_name = "_QFEa"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[B_C1:.*]] = arith.constant 1 : index +!CHECK: %[[B_REF:.*]] = fir.alloca !fir.char<1> {bindc_name = "b", uniq_name = "_QFEb"} +!CHECK: %[[B_DECL:.*]]:2 = hlfir.declare %[[B_REF]] typeparams %[[B_C1]] {uniq_name = "_QFEb"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[C_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "c", uniq_name = "_QFEc"} +!CHECK: %[[C_DECL:.*]]:2 = hlfir.declare %[[C_REF]] {uniq_name = "_QFEc"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[D_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "d", uniq_name = "_QFEd"} +!CHECK: %[[D_DECL:.*]]:2 = hlfir.declare %[[D_REF]] {uniq_name = "_QFEd"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[E_C8:.*]] = arith.constant 8 : index +!CHECK: %[[E_REF:.*]] = fir.alloca !fir.char<1,8> {bindc_name = "e", uniq_name = "_QFEe"} +!CHECK: %[[E_DECL:.*]]:2 = hlfir.declare %[[E_REF]] typeparams %[[E_C8]] {uniq_name = "_QFEe"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[F_C8:.*]] = arith.constant 8 : index +!CHECK: %[[F_REF:.*]] = fir.alloca !fir.char<1,8> {bindc_name = "f", uniq_name = "_QFEf"} +!CHECK: %[[F_DECL:.*]]:2 = hlfir.declare %[[F_REF]] typeparams %[[F_C8]] {uniq_name = "_QFEf"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[G_REF:.*]] = fir.alloca f32 {bindc_name = "g", uniq_name = "_QFEg"} +!CHECK: %[[G_DECL:.*]]:2 = hlfir.declare %[[G_REF]] {uniq_name = "_QFEg"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[H_REF:.*]] = fir.alloca f32 {bindc_name = "h", uniq_name = "_QFEh"} +!CHECK: %[[H_DECL:.*]]:2 = hlfir.declare %[[H_REF]] {uniq_name = "_QFEh"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"} +!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[Y_REF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"} +!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_REF]] {uniq_name = "_QFEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: omp.atomic.read %[[X_DECL]]#1 = %[[Y_DECL]]#1 memory_order(acquire) hint(uncontended) : !fir.ref, i32 +!CHECK: omp.atomic.read %[[A_DECL]]#1 = %[[B_DECL]]#1 memory_order(relaxed) : !fir.ref>, !fir.char<1> +!CHECK: omp.atomic.read %[[C_DECL]]#1 = %[[D_DECL]]#1 memory_order(seq_cst) hint(contended) : !fir.ref>, !fir.logical<4> +!CHECK: omp.atomic.read %[[E_DECL]]#1 = %[[F_DECL]]#1 hint(speculative) : !fir.ref>, !fir.char<1,8> +!CHECK: omp.atomic.read %[[G_DECL]]#1 = %[[H_DECL]]#1 hint(nonspeculative) : !fir.ref, f32 +!CHECK: omp.atomic.read %[[G_DECL]]#1 = %[[H_DECL]]#1 : !fir.ref, f32 + +program OmpAtomic + + use omp_lib + integer :: x, y + character :: a, b + logical :: c, d + character(8) :: e, f + real g, h + !$omp atomic acquire read hint(omp_sync_hint_uncontended) + x = y + !$omp atomic relaxed read hint(omp_sync_hint_none) + a = b + !$omp atomic read seq_cst hint(omp_sync_hint_contended) + c = d + !$omp atomic read hint(omp_sync_hint_speculative) + e = f + !$omp atomic read hint(omp_sync_hint_nonspeculative) + g = h + !$omp atomic read + g = h +end program OmpAtomic + +! Test lowering atomic read for pointer variables. +! Please notice to use %[[VAL_4]] and %[[VAL_1]] for operands of atomic +! operation, instead of %[[VAL_3]] and %[[VAL_0]]. + +!CHECK-LABEL: func.func @_QPatomic_read_pointer() { +!CHECK: %[[X_REF:.*]] = fir.alloca !fir.box> {bindc_name = "x", uniq_name = "_QFatomic_read_pointerEx"} +!CHECK: fir.store %2 to %0 : !fir.ref>> +!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFatomic_read_pointerEx"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) +!CHECK: %[[Y_REF:.*]] = fir.alloca !fir.box> {bindc_name = "y", uniq_name = "_QFatomic_read_pointerEy"} +!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_REF]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFatomic_read_pointerEy"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) +!CHECK: %[[X_ADDR:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref>> +!CHECK: %[[X_POINTEE_ADDR:.*]] = fir.box_addr %[[X_ADDR]] : (!fir.box>) -> !fir.ptr +!CHECK: %[[Y_ADDR:.*]] = fir.load %[[Y_DECL]]#0 : !fir.ref>> +!CHECK: %[[Y_POINTEE_ADDR:.*]] = fir.box_addr %[[Y_ADDR]] : (!fir.box>) -> !fir.ptr +!CHECK: omp.atomic.read %[[Y_POINTEE_ADDR]] = %[[X_POINTEE_ADDR]] : !fir.ptr, i32 +!CHECK: %[[Y_ADDR:.*]] = fir.load %[[Y_DECL]]#0 : !fir.ref>> +!CHECK: %[[Y_POINTEE_ADDR:.*]] = fir.box_addr %[[Y_ADDR]] : (!fir.box>) -> !fir.ptr +!CHECK: %[[Y_POINTEE_VAL:.*]] = fir.load %[[Y_POINTEE_ADDR]] : !fir.ptr +!CHECK: %[[X_ADDR:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref>> +!CHECK: %[[X_POINTEE_ADDR:.*]] = fir.box_addr %[[X_ADDR]] : (!fir.box> +!CHECK: hlfir.assign %[[Y_POINTEE_VAL]] to %[[X_POINTEE_ADDR]] : i32, !fir.ptr + +subroutine atomic_read_pointer() + integer, pointer :: x, y + + !$omp atomic read + y = x + + x = y +end + diff --git a/flang/test/Lower/OpenMP/atomic-write.f90 b/flang/test/Lower/OpenMP/atomic-write.f90 new file mode 100644 index 0000000000000..119f60c1a92f5 --- /dev/null +++ b/flang/test/Lower/OpenMP/atomic-write.f90 @@ -0,0 +1,73 @@ +! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s + +! This test checks the lowering of atomic write + +!CHECK: func @_QQmain() attributes {fir.bindc_name = "ompatomicwrite"} { +!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"} +!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[Y_REF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"} +!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_REF]] {uniq_name = "_QFEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[Z_REF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFEz"} +!CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[Z_REF]] {uniq_name = "_QFEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[C44:.*]] = arith.constant 44 : i32 +!CHECK: omp.atomic.write %[[X_DECL:.*]]#1 = %[[C44]] hint(uncontended) memory_order(seq_cst) : !fir.ref, i32 +!CHECK: %[[C7:.*]] = arith.constant 7 : i32 +!CHECK: %[[Y_VAL:.*]] = fir.load %[[Y_DECL]]#0 : !fir.ref +!CHECK: %[[SEVEN_Y_VAL:.*]] = arith.muli %[[C7]], %[[Y_VAL]] : i32 +!CHECK: omp.atomic.write %[[X_DECL]]#1 = %[[SEVEN_Y_VAL]] memory_order(relaxed) : !fir.ref, i32 +!CHECK: %[[C10:.*]] = arith.constant 10 : i32 +!CHECK: %[[X_VAL:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref +!CHECK: %[[TEN_X:.*]] = arith.muli %[[C10]], %[[X_VAL]] : i32 +!CHECK: %[[Z_VAL:.*]] = fir.load %[[Z_DECL]]#0 : !fir.ref +!CHECK: %[[C2:.*]] = arith.constant 2 : i32 +!CHECK: %[[Z_DIV_2:.*]] = arith.divsi %[[Z_VAL]], %[[C2]] : i32 +!CHECK: %172 = arith.addi %[[TEN_X]], %[[Z_DIV_2]] : i32 +!CHECK: omp.atomic.write %163#1 = %172 hint(speculative) memory_order(release) : !fir.ref, i32 + +program OmpAtomicWrite + use omp_lib + integer :: x, y, z + !$omp atomic seq_cst write hint(omp_sync_hint_uncontended) + x = 8*4 + 12 + + !$omp atomic write relaxed + x = 7 * y + + !$omp atomic write release hint(omp_sync_hint_speculative) + y = 10*x + z/2 +end program OmpAtomicWrite + +! Test lowering atomic read for pointer variables. + +!CHECK-LABEL: func.func @_QPatomic_write_pointer() { +!CHECK: %[[X_REF:.*]] = fir.alloca !fir.box> {bindc_name = "x", uniq_name = "_QFatomic_write_pointerEx"} +!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFatomic_write_pointerEx"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) +!CHECK: %[[C1:.*]] = arith.constant 1 : i32 +!CHECK: %[[X_ADDR_BOX:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref>> +!CHECK: %[[X_POINTEE_ADDR:.*]] = fir.box_addr %[[X_ADDR_BOX]] : (!fir.box>) -> !fir.ptr +!CHECK: omp.atomic.write %[[X_POINTEE_ADDR]] = %[[C1]] : !fir.ptr, i32 +!CHECK: %[[C2:.*]] = arith.constant 2 : i32 +!CHECK: %[[X_ADDR_BOX:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref>> +!CHECK: %[[X_POINTEE_ADDR:.*]] = fir.box_addr %[[X_ADDR_BOX]] : (!fir.box>) -> !fir.ptr +!CHECK: hlfir.assign %[[C2]] to %[[X_POINTEE_ADDR]] : i32, !fir.ptr + +subroutine atomic_write_pointer() + integer, pointer :: x + + !$omp atomic write + x = 1 + + x = 2 +end + +!CHECK-LABEL: func.func @_QPatomic_write_typed_assign +!CHECK: %[[R2_REF:.*]] = fir.alloca f32 {bindc_name = "r2", uniq_name = "_QFatomic_write_typed_assignEr2"} +!CHECK: %[[R2_DECL:.*]]:2 = hlfir.declare %[[R2_REF]] {uniq_name = "_QFatomic_write_typed_assignEr2"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[C0:.*]] = arith.constant 0.000000e+00 : f32 +!CHECK: omp.atomic.write %[[R2_DECL]]#1 = %[[C0]] : !fir.ref, f32 + +subroutine atomic_write_typed_assign + real :: r2 + !$omp atomic write + r2 = 0 +end subroutine From cc3d2533cc2e4ea06981b86ede5087fbf801e789 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Mon, 16 Oct 2023 16:18:27 +0200 Subject: [PATCH 223/720] [AMDGPU] Add i1 mul patterns (#67291) i1 muls can sometimes happen after SCEV. They resulted in ISel failures because we were missing the patterns for them. Solves SWDEV-423354 --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 + llvm/test/CodeGen/AMDGPU/mul.ll | 403 +++++++++++++++++----- 2 files changed, 328 insertions(+), 77 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index cd849560feac2..9c5b166c96522 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -769,6 +769,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, // extract of relevant bits. setOperationAction(ISD::GET_FPMODE, MVT::i32, Legal); + setOperationAction(ISD::MUL, MVT::i1, Promote); + setTargetDAGCombine({ISD::ADD, ISD::UADDO_CARRY, ISD::SUB, diff --git a/llvm/test/CodeGen/AMDGPU/mul.ll b/llvm/test/CodeGen/AMDGPU/mul.ll index b4e9376d82777..da7645d5011fc 100644 --- a/llvm/test/CodeGen/AMDGPU/mul.ll +++ b/llvm/test/CodeGen/AMDGPU/mul.ll @@ -1059,6 +1059,255 @@ entry: ret void } +define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8 x i32], i1 %b) nounwind { +; SI-LABEL: s_mul_i1: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_load_dword s4, s[0:1], 0x13 +; SI-NEXT: s_load_dword s5, s[0:1], 0x1c +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mul_i32 s4, s4, s5 +; SI-NEXT: s_and_b32 s4, s4, 1 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: s_mul_i1: +; VI: ; %bb.0: ; %entry +; VI-NEXT: s_load_dword s4, s[0:1], 0x70 +; VI-NEXT: s_load_dword s5, s[0:1], 0x4c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mul_lo_u16_e32 v0, s5, v0 +; VI-NEXT: v_and_b32_e32 v0, 1, v0 +; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: s_mul_i1: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x70 +; GFX9-NEXT: s_load_dword s3, s[0:1], 0x4c +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mul_lo_u16_e32 v0, s3, v0 +; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX9-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: s_mul_i1: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_clause 0x2 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x4c +; GFX10-NEXT: s_load_dword s3, s[0:1], 0x70 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 +; GFX10-NEXT: s_mov_b32 s7, 0x31016000 +; GFX10-NEXT: s_mov_b32 s6, -1 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_mul_lo_u16 v0, s2, s3 +; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX10-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: s_mul_i1: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x4c +; GFX11-NEXT: s_load_b32 s3, s[0:1], 0x70 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_mul_lo_u16 v0, s2, s3 +; GFX11-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-NEXT: s_mov_b32 s2, -1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +; +; EG-LABEL: s_mul_i1: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 0, @10, KC0[], KC1[] +; EG-NEXT: TEX 1 @6 +; EG-NEXT: ALU 12, @11, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_8 T1.X, T0.X, 72, #3 +; EG-NEXT: VTX_READ_8 T0.X, T0.X, 108, #3 +; EG-NEXT: ALU clause starting at 10: +; EG-NEXT: MOV * T0.X, 0.0, +; EG-NEXT: ALU clause starting at 11: +; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, +; EG-NEXT: MULLO_INT * T0.X, T1.X, T0.X, +; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) +; EG-NEXT: AND_INT T1.W, PS, 1, +; EG-NEXT: LSHL * T0.W, PV.W, literal.x, +; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) +; EG-NEXT: LSHL T0.X, PV.W, PS, +; EG-NEXT: LSHL * T0.W, literal.x, PS, +; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) +; EG-NEXT: MOV T0.Y, 0.0, +; EG-NEXT: MOV * T0.Z, 0.0, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +entry: + %mul = mul i1 %a, %b + store i1 %mul, ptr addrspace(1) %out, align 4 + ret void +} + +define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; SI-LABEL: v_mul_i1: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_mov_b32 s9, s3 +; SI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 +; SI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:4 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_mul_lo_u32 v0, v0, v1 +; SI-NEXT: v_and_b32_e32 v0, 1, v0 +; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: v_mul_i1: +; VI: ; %bb.0: ; %entry +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_mov_b32 s10, s6 +; VI-NEXT: s_mov_b32 s11, s7 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s2 +; VI-NEXT: s_mov_b32 s9, s3 +; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 +; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:4 +; VI-NEXT: s_mov_b32 s4, s0 +; VI-NEXT: s_mov_b32 s5, s1 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_mul_lo_u16_e32 v0, v0, v1 +; VI-NEXT: v_and_b32_e32 v0, 1, v0 +; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: v_mul_i1: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 +; GFX9-NEXT: s_mov_b32 s10, s6 +; GFX9-NEXT: s_mov_b32 s11, s7 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s8, s2 +; GFX9-NEXT: s_mov_b32 s9, s3 +; GFX9-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 +; GFX9-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:4 +; GFX9-NEXT: s_mov_b32 s4, s0 +; GFX9-NEXT: s_mov_b32 s5, s1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mul_lo_u16_e32 v0, v0, v1 +; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX9-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: v_mul_i1: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX10-NEXT: s_mov_b32 s6, -1 +; GFX10-NEXT: s_mov_b32 s7, 0x31016000 +; GFX10-NEXT: s_mov_b32 s10, s6 +; GFX10-NEXT: s_mov_b32 s11, s7 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_mov_b32 s8, s2 +; GFX10-NEXT: s_mov_b32 s9, s3 +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 +; GFX10-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:4 +; GFX10-NEXT: s_mov_b32 s4, s0 +; GFX10-NEXT: s_mov_b32 s5, s1 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX10-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: v_mul_i1: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX11-NEXT: s_mov_b32 s6, -1 +; GFX11-NEXT: s_mov_b32 s7, 0x31016000 +; GFX11-NEXT: s_mov_b32 s10, s6 +; GFX11-NEXT: s_mov_b32 s11, s7 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_mov_b32 s8, s2 +; GFX11-NEXT: s_mov_b32 s9, s3 +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: buffer_load_u8 v0, off, s[8:11], 0 +; GFX11-NEXT: buffer_load_u8 v1, off, s[8:11], 0 offset:4 +; GFX11-NEXT: s_mov_b32 s4, s0 +; GFX11-NEXT: s_mov_b32 s5, s1 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX11-NEXT: buffer_store_b8 v0, off, s[4:7], 0 +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +; +; EG-LABEL: v_mul_i1: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 1 @6 +; EG-NEXT: ALU 12, @11, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_8 T1.X, T0.X, 4, #1 +; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 10: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 11: +; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, +; EG-NEXT: MULLO_INT * T0.X, T0.X, T1.X, +; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) +; EG-NEXT: AND_INT T1.W, PS, 1, +; EG-NEXT: LSHL * T0.W, PV.W, literal.x, +; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) +; EG-NEXT: LSHL T0.X, PV.W, PS, +; EG-NEXT: LSHL * T0.W, literal.x, PS, +; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) +; EG-NEXT: MOV T0.Y, 0.0, +; EG-NEXT: MOV * T0.Z, 0.0, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +entry: + %b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 + %a = load i1, ptr addrspace(1) %in + %b = load i1, ptr addrspace(1) %b_ptr + %result = mul i1 %a, %b + store i1 %result, ptr addrspace(1) %out + ret void +} + ; A standard 64-bit multiply. The expansion should be around 6 instructions. ; It would be difficult to match the expansion correctly without writing ; a really complicated list of FileCheck expressions. I don't want @@ -1213,7 +1462,7 @@ define amdgpu_kernel void @v_mul_i64(ptr addrspace(1) %out, ptr addrspace(1) %ap ; SI-NEXT: v_mul_hi_u32 v4, v2, v0 ; SI-NEXT: v_mul_lo_u32 v3, v3, v0 ; SI-NEXT: v_mul_lo_u32 v0, v2, v0 -; SI-NEXT: v_add_i32_e32 v1, vcc, v4, v1 +; SI-NEXT: v_add_i32_e32 v1, vcc, v1, v4 ; SI-NEXT: v_add_i32_e32 v1, vcc, v1, v3 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm @@ -1367,30 +1616,30 @@ define amdgpu_kernel void @mul32_in_branch(ptr addrspace(1) %out, ptr addrspace( ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_cmp_lg_u32 s2, 0 -; SI-NEXT: s_cbranch_scc0 .LBB11_2 +; SI-NEXT: s_cbranch_scc0 .LBB13_2 ; SI-NEXT: ; %bb.1: ; %else ; SI-NEXT: s_mul_i32 s6, s2, s3 ; SI-NEXT: s_mov_b64 s[4:5], 0 -; SI-NEXT: s_branch .LBB11_3 -; SI-NEXT: .LBB11_2: +; SI-NEXT: s_branch .LBB13_3 +; SI-NEXT: .LBB13_2: ; SI-NEXT: s_mov_b64 s[4:5], -1 ; SI-NEXT: ; implicit-def: $sgpr6 -; SI-NEXT: .LBB11_3: ; %Flow +; SI-NEXT: .LBB13_3: ; %Flow ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; SI-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b64 vcc, vcc -; SI-NEXT: s_cbranch_vccnz .LBB11_5 +; SI-NEXT: s_cbranch_vccnz .LBB13_5 ; SI-NEXT: ; %bb.4: ; %if ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_mov_b32 s4, s2 ; SI-NEXT: s_mov_b32 s5, s3 ; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0 -; SI-NEXT: s_branch .LBB11_6 -; SI-NEXT: .LBB11_5: +; SI-NEXT: s_branch .LBB13_6 +; SI-NEXT: .LBB13_5: ; SI-NEXT: v_mov_b32_e32 v0, s6 -; SI-NEXT: .LBB11_6: ; %endif +; SI-NEXT: .LBB13_6: ; %endif ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) @@ -1402,18 +1651,18 @@ define amdgpu_kernel void @mul32_in_branch(ptr addrspace(1) %out, ptr addrspace( ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_cmp_lg_u32 s2, 0 -; VI-NEXT: s_cbranch_scc0 .LBB11_2 +; VI-NEXT: s_cbranch_scc0 .LBB13_2 ; VI-NEXT: ; %bb.1: ; %else ; VI-NEXT: s_mul_i32 s6, s2, s3 ; VI-NEXT: s_mov_b64 s[4:5], 0 -; VI-NEXT: s_branch .LBB11_3 -; VI-NEXT: .LBB11_2: +; VI-NEXT: s_branch .LBB13_3 +; VI-NEXT: .LBB13_2: ; VI-NEXT: s_mov_b64 s[4:5], -1 ; VI-NEXT: ; implicit-def: $sgpr6 -; VI-NEXT: .LBB11_3: ; %Flow +; VI-NEXT: .LBB13_3: ; %Flow ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: s_andn2_b64 vcc, exec, s[4:5] -; VI-NEXT: s_cbranch_vccnz .LBB11_5 +; VI-NEXT: s_cbranch_vccnz .LBB13_5 ; VI-NEXT: ; %bb.4: ; %if ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 @@ -1421,10 +1670,10 @@ define amdgpu_kernel void @mul32_in_branch(ptr addrspace(1) %out, ptr addrspace( ; VI-NEXT: s_mov_b32 s4, s2 ; VI-NEXT: s_mov_b32 s5, s3 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0 -; VI-NEXT: s_branch .LBB11_6 -; VI-NEXT: .LBB11_5: +; VI-NEXT: s_branch .LBB13_6 +; VI-NEXT: .LBB13_5: ; VI-NEXT: v_mov_b32_e32 v0, s6 -; VI-NEXT: .LBB11_6: ; %endif +; VI-NEXT: .LBB13_6: ; %endif ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 @@ -1437,18 +1686,18 @@ define amdgpu_kernel void @mul32_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_cmp_lg_u32 s2, 0 -; GFX9-NEXT: s_cbranch_scc0 .LBB11_2 +; GFX9-NEXT: s_cbranch_scc0 .LBB13_2 ; GFX9-NEXT: ; %bb.1: ; %else ; GFX9-NEXT: s_mul_i32 s6, s2, s3 ; GFX9-NEXT: s_mov_b64 s[4:5], 0 -; GFX9-NEXT: s_branch .LBB11_3 -; GFX9-NEXT: .LBB11_2: +; GFX9-NEXT: s_branch .LBB13_3 +; GFX9-NEXT: .LBB13_2: ; GFX9-NEXT: s_mov_b64 s[4:5], -1 ; GFX9-NEXT: ; implicit-def: $sgpr6 -; GFX9-NEXT: .LBB11_3: ; %Flow +; GFX9-NEXT: .LBB13_3: ; %Flow ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: s_andn2_b64 vcc, exec, s[4:5] -; GFX9-NEXT: s_cbranch_vccnz .LBB11_5 +; GFX9-NEXT: s_cbranch_vccnz .LBB13_5 ; GFX9-NEXT: ; %bb.4: ; %if ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 @@ -1456,10 +1705,10 @@ define amdgpu_kernel void @mul32_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX9-NEXT: s_mov_b32 s4, s2 ; GFX9-NEXT: s_mov_b32 s5, s3 ; GFX9-NEXT: buffer_load_dword v0, off, s[4:7], 0 -; GFX9-NEXT: s_branch .LBB11_6 -; GFX9-NEXT: .LBB11_5: +; GFX9-NEXT: s_branch .LBB13_6 +; GFX9-NEXT: .LBB13_5: ; GFX9-NEXT: v_mov_b32_e32 v0, s6 -; GFX9-NEXT: .LBB11_6: ; %endif +; GFX9-NEXT: .LBB13_6: ; %endif ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 @@ -1473,17 +1722,17 @@ define amdgpu_kernel void @mul32_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_cmp_lg_u32 s2, 0 -; GFX10-NEXT: s_cbranch_scc0 .LBB11_2 +; GFX10-NEXT: s_cbranch_scc0 .LBB13_2 ; GFX10-NEXT: ; %bb.1: ; %else ; GFX10-NEXT: s_mul_i32 s5, s2, s3 -; GFX10-NEXT: s_branch .LBB11_3 -; GFX10-NEXT: .LBB11_2: +; GFX10-NEXT: s_branch .LBB13_3 +; GFX10-NEXT: .LBB13_2: ; GFX10-NEXT: s_mov_b32 s4, -1 ; GFX10-NEXT: ; implicit-def: $sgpr5 -; GFX10-NEXT: .LBB11_3: ; %Flow +; GFX10-NEXT: .LBB13_3: ; %Flow ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX10-NEXT: s_andn2_b32 vcc_lo, exec_lo, s4 -; GFX10-NEXT: s_cbranch_vccnz .LBB11_5 +; GFX10-NEXT: s_cbranch_vccnz .LBB13_5 ; GFX10-NEXT: ; %bb.4: ; %if ; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; GFX10-NEXT: s_mov_b32 s6, -1 @@ -1491,10 +1740,10 @@ define amdgpu_kernel void @mul32_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX10-NEXT: s_mov_b32 s4, s2 ; GFX10-NEXT: s_mov_b32 s5, s3 ; GFX10-NEXT: buffer_load_dword v0, off, s[4:7], 0 -; GFX10-NEXT: s_branch .LBB11_6 -; GFX10-NEXT: .LBB11_5: +; GFX10-NEXT: s_branch .LBB13_6 +; GFX10-NEXT: .LBB13_5: ; GFX10-NEXT: v_mov_b32_e32 v0, s5 -; GFX10-NEXT: .LBB11_6: ; %endif +; GFX10-NEXT: .LBB13_6: ; %endif ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; GFX10-NEXT: s_mov_b32 s2, -1 @@ -1508,17 +1757,17 @@ define amdgpu_kernel void @mul32_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_cmp_lg_u32 s2, 0 -; GFX11-NEXT: s_cbranch_scc0 .LBB11_2 +; GFX11-NEXT: s_cbranch_scc0 .LBB13_2 ; GFX11-NEXT: ; %bb.1: ; %else ; GFX11-NEXT: s_mul_i32 s5, s2, s3 -; GFX11-NEXT: s_branch .LBB11_3 -; GFX11-NEXT: .LBB11_2: +; GFX11-NEXT: s_branch .LBB13_3 +; GFX11-NEXT: .LBB13_2: ; GFX11-NEXT: s_mov_b32 s4, -1 ; GFX11-NEXT: ; implicit-def: $sgpr5 -; GFX11-NEXT: .LBB11_3: ; %Flow +; GFX11-NEXT: .LBB13_3: ; %Flow ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-NEXT: s_cbranch_vccnz .LBB11_5 +; GFX11-NEXT: s_cbranch_vccnz .LBB13_5 ; GFX11-NEXT: ; %bb.4: ; %if ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s6, -1 @@ -1526,10 +1775,10 @@ define amdgpu_kernel void @mul32_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX11-NEXT: s_mov_b32 s4, s2 ; GFX11-NEXT: s_mov_b32 s5, s3 ; GFX11-NEXT: buffer_load_b32 v0, off, s[4:7], 0 -; GFX11-NEXT: s_branch .LBB11_6 -; GFX11-NEXT: .LBB11_5: +; GFX11-NEXT: s_branch .LBB13_6 +; GFX11-NEXT: .LBB13_5: ; GFX11-NEXT: v_mov_b32_e32 v0, s5 -; GFX11-NEXT: .LBB11_6: ; %endif +; GFX11-NEXT: .LBB13_6: ; %endif ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 @@ -1601,7 +1850,7 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_cmp_ne_u64_e64 s[10:11], s[4:5], 0 ; SI-NEXT: s_and_b64 vcc, exec, s[10:11] -; SI-NEXT: s_cbranch_vccz .LBB12_4 +; SI-NEXT: s_cbranch_vccz .LBB14_4 ; SI-NEXT: ; %bb.1: ; %else ; SI-NEXT: v_mov_b32_e32 v0, s6 ; SI-NEXT: v_mul_hi_u32 v0, s4, v0 @@ -1612,22 +1861,22 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; SI-NEXT: v_add_i32_e32 v1, vcc, s5, v0 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: s_andn2_b64 vcc, exec, s[8:9] -; SI-NEXT: s_cbranch_vccnz .LBB12_3 -; SI-NEXT: .LBB12_2: ; %if +; SI-NEXT: s_cbranch_vccnz .LBB14_3 +; SI-NEXT: .LBB14_2: ; %if ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_mov_b32 s4, s2 ; SI-NEXT: s_mov_b32 s5, s3 ; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 -; SI-NEXT: .LBB12_3: ; %endif +; SI-NEXT: .LBB14_3: ; %endif ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm -; SI-NEXT: .LBB12_4: +; SI-NEXT: .LBB14_4: ; SI-NEXT: ; implicit-def: $vgpr0_vgpr1 -; SI-NEXT: s_branch .LBB12_2 +; SI-NEXT: s_branch .LBB14_2 ; ; VI-LABEL: mul64_in_branch: ; VI: ; %bb.0: ; %entry @@ -1635,7 +1884,7 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; VI-NEXT: s_mov_b64 s[8:9], 0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_cmp_lg_u64 s[4:5], 0 -; VI-NEXT: s_cbranch_scc0 .LBB12_4 +; VI-NEXT: s_cbranch_scc0 .LBB14_4 ; VI-NEXT: ; %bb.1: ; %else ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mad_u64_u32 v[0:1], s[10:11], s4, v0, 0 @@ -1644,22 +1893,22 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; VI-NEXT: s_mul_i32 s4, s5, s6 ; VI-NEXT: v_add_u32_e32 v1, vcc, s4, v1 ; VI-NEXT: s_andn2_b64 vcc, exec, s[8:9] -; VI-NEXT: s_cbranch_vccnz .LBB12_3 -; VI-NEXT: .LBB12_2: ; %if +; VI-NEXT: s_cbranch_vccnz .LBB14_3 +; VI-NEXT: .LBB14_2: ; %if ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_mov_b32 s4, s2 ; VI-NEXT: s_mov_b32 s5, s3 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 -; VI-NEXT: .LBB12_3: ; %endif +; VI-NEXT: .LBB14_3: ; %endif ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_endpgm -; VI-NEXT: .LBB12_4: +; VI-NEXT: .LBB14_4: ; VI-NEXT: ; implicit-def: $vgpr0_vgpr1 -; VI-NEXT: s_branch .LBB12_2 +; VI-NEXT: s_branch .LBB14_2 ; ; GFX9-LABEL: mul64_in_branch: ; GFX9: ; %bb.0: ; %entry @@ -1667,7 +1916,7 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX9-NEXT: s_mov_b64 s[8:9], 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_cmp_lg_u64 s[4:5], 0 -; GFX9-NEXT: s_cbranch_scc0 .LBB12_3 +; GFX9-NEXT: s_cbranch_scc0 .LBB14_3 ; GFX9-NEXT: ; %bb.1: ; %else ; GFX9-NEXT: s_mul_i32 s7, s4, s7 ; GFX9-NEXT: s_mul_hi_u32 s10, s4, s6 @@ -1676,21 +1925,21 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX9-NEXT: s_add_i32 s5, s7, s5 ; GFX9-NEXT: s_mul_i32 s4, s4, s6 ; GFX9-NEXT: s_andn2_b64 vcc, exec, s[8:9] -; GFX9-NEXT: s_cbranch_vccnz .LBB12_4 -; GFX9-NEXT: .LBB12_2: ; %if +; GFX9-NEXT: s_cbranch_vccnz .LBB14_4 +; GFX9-NEXT: .LBB14_2: ; %if ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_mov_b32 s4, s2 ; GFX9-NEXT: s_mov_b32 s5, s3 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 -; GFX9-NEXT: s_branch .LBB12_5 -; GFX9-NEXT: .LBB12_3: +; GFX9-NEXT: s_branch .LBB14_5 +; GFX9-NEXT: .LBB14_3: ; GFX9-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GFX9-NEXT: s_branch .LBB12_2 -; GFX9-NEXT: .LBB12_4: +; GFX9-NEXT: s_branch .LBB14_2 +; GFX9-NEXT: .LBB14_4: ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-NEXT: .LBB12_5: ; %endif +; GFX9-NEXT: .LBB14_5: ; %endif ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -1702,7 +1951,7 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX10-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_cmp_lg_u64 s[4:5], 0 -; GFX10-NEXT: s_cbranch_scc0 .LBB12_3 +; GFX10-NEXT: s_cbranch_scc0 .LBB14_3 ; GFX10-NEXT: ; %bb.1: ; %else ; GFX10-NEXT: s_mul_i32 s7, s4, s7 ; GFX10-NEXT: s_mul_hi_u32 s8, s4, s6 @@ -1711,22 +1960,22 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX10-NEXT: s_mul_i32 s4, s4, s6 ; GFX10-NEXT: s_add_i32 s5, s7, s5 ; GFX10-NEXT: s_mov_b32 s6, 0 -; GFX10-NEXT: s_cbranch_execnz .LBB12_4 -; GFX10-NEXT: .LBB12_2: ; %if +; GFX10-NEXT: s_cbranch_execnz .LBB14_4 +; GFX10-NEXT: .LBB14_2: ; %if ; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; GFX10-NEXT: s_mov_b32 s6, -1 ; GFX10-NEXT: s_mov_b32 s4, s2 ; GFX10-NEXT: s_mov_b32 s5, s3 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 -; GFX10-NEXT: s_branch .LBB12_5 -; GFX10-NEXT: .LBB12_3: +; GFX10-NEXT: s_branch .LBB14_5 +; GFX10-NEXT: .LBB14_3: ; GFX10-NEXT: s_mov_b32 s6, -1 ; GFX10-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GFX10-NEXT: s_branch .LBB12_2 -; GFX10-NEXT: .LBB12_4: +; GFX10-NEXT: s_branch .LBB14_2 +; GFX10-NEXT: .LBB14_4: ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-NEXT: .LBB12_5: ; %endif +; GFX10-NEXT: .LBB14_5: ; %endif ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; GFX10-NEXT: s_mov_b32 s2, -1 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1738,7 +1987,7 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_cmp_lg_u64 s[4:5], 0 -; GFX11-NEXT: s_cbranch_scc0 .LBB12_3 +; GFX11-NEXT: s_cbranch_scc0 .LBB14_3 ; GFX11-NEXT: ; %bb.1: ; %else ; GFX11-NEXT: s_mul_i32 s7, s4, s7 ; GFX11-NEXT: s_mul_hi_u32 s8, s4, s6 @@ -1747,21 +1996,21 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX11-NEXT: s_mul_i32 s4, s4, s6 ; GFX11-NEXT: s_add_i32 s5, s7, s5 ; GFX11-NEXT: s_mov_b32 s6, 0 -; GFX11-NEXT: s_cbranch_execnz .LBB12_4 -; GFX11-NEXT: .LBB12_2: ; %if +; GFX11-NEXT: s_cbranch_execnz .LBB14_4 +; GFX11-NEXT: .LBB14_2: ; %if ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s6, -1 ; GFX11-NEXT: s_mov_b32 s4, s2 ; GFX11-NEXT: s_mov_b32 s5, s3 ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[4:7], 0 -; GFX11-NEXT: s_branch .LBB12_5 -; GFX11-NEXT: .LBB12_3: +; GFX11-NEXT: s_branch .LBB14_5 +; GFX11-NEXT: .LBB14_3: ; GFX11-NEXT: s_mov_b32 s6, -1 ; GFX11-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GFX11-NEXT: s_branch .LBB12_2 -; GFX11-NEXT: .LBB12_4: +; GFX11-NEXT: s_branch .LBB14_2 +; GFX11-NEXT: .LBB14_4: ; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 -; GFX11-NEXT: .LBB12_5: ; %endif +; GFX11-NEXT: .LBB14_5: ; %endif ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_waitcnt vmcnt(0) From 97217d188469c78d69b65059cabc123e847a2c66 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Sat, 14 Oct 2023 17:27:37 -0400 Subject: [PATCH 224/720] [mlir] Fix '-Wunused' warning. NFC --- mlir/lib/Target/LLVMIR/ModuleImport.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp index d070e42ac0c7d..e3562049cd81c 100644 --- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp @@ -991,7 +991,7 @@ FailureOr ModuleImport::convertConstant(llvm::Constant *constant) { } // Convert none token constants. - if (auto *noneToken = dyn_cast(constant)) { + if (isa(constant)) { return builder.create(loc).getResult(); } From e9c101a7533a829f48678589c7382d4c21c2eb1b Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Mon, 16 Oct 2023 17:08:12 +0200 Subject: [PATCH 225/720] [libc++] Add missing <__availability> include --- libcxx/include/sstream | 1 + 1 file changed, 1 insertion(+) diff --git a/libcxx/include/sstream b/libcxx/include/sstream index 47c2d0553a57c..7db5409871873 100644 --- a/libcxx/include/sstream +++ b/libcxx/include/sstream @@ -267,6 +267,7 @@ typedef basic_stringstream wstringstream; */ #include <__assert> // all public C++ headers provide the assertion handler +#include <__availability> #include <__config> #include <__fwd/sstream.h> #include <__utility/swap.h> From 903faefc14eb838a20c0526a14d44dbb0fcea85b Mon Sep 17 00:00:00 2001 From: Kiran Chandramohan Date: Mon, 16 Oct 2023 15:14:48 +0000 Subject: [PATCH 226/720] [Flang][OpenMP] Port three tests to HLFIR flow These are copies of tests from flang/test/Lower/OpenMP/FIR --- .../Lower/OpenMP/firstprivate-commonblock.f90 | 34 ++ flang/test/Lower/OpenMP/unstructured.f90 | 348 ++++++++++++++++++ flang/test/Lower/OpenMP/wsloop.f90 | 75 ++++ 3 files changed, 457 insertions(+) create mode 100644 flang/test/Lower/OpenMP/firstprivate-commonblock.f90 create mode 100644 flang/test/Lower/OpenMP/unstructured.f90 create mode 100644 flang/test/Lower/OpenMP/wsloop.f90 diff --git a/flang/test/Lower/OpenMP/firstprivate-commonblock.f90 b/flang/test/Lower/OpenMP/firstprivate-commonblock.f90 new file mode 100644 index 0000000000000..ff064a74d491a --- /dev/null +++ b/flang/test/Lower/OpenMP/firstprivate-commonblock.f90 @@ -0,0 +1,34 @@ +! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s + +!CHECK: func.func @_QPfirstprivate_common() { +!CHECK: %[[val_0:.*]] = fir.address_of(@c_) : !fir.ref> +!CHECK: %[[val_1:.*]] = fir.convert %[[val_0]] : (!fir.ref>) -> !fir.ref> +!CHECK: %[[val_c0:.*]] = arith.constant 0 : index +!CHECK: %[[val_2:.*]] = fir.coordinate_of %[[val_1]], %[[val_c0]] : (!fir.ref>, index) -> !fir.ref +!CHECK: %[[val_3:.*]] = fir.convert %[[val_2]] : (!fir.ref) -> !fir.ref +!CHECK: %[[VAL_3_DECL:.*]]:2 = hlfir.declare %[[val_3]] {uniq_name = "_QFfirstprivate_commonEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[val_4:.*]] = fir.convert %[[val_0]] : (!fir.ref>) -> !fir.ref> +!CHECK: %[[val_c4:.*]] = arith.constant 4 : index +!CHECK: %[[val_5:.*]] = fir.coordinate_of %[[val_4]], %[[val_c4]] : (!fir.ref>, index) -> !fir.ref +!CHECK: %[[val_6:.*]] = fir.convert %[[val_5]] : (!fir.ref) -> !fir.ref +!CHECK: %[[VAL_6_DECL:.*]]:2 = hlfir.declare %[[val_6]] {uniq_name = "_QFfirstprivate_commonEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: omp.parallel { +!CHECK: %[[val_7:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFfirstprivate_commonEx"} +!CHECK: %[[VAL_7_DECL:.*]]:2 = hlfir.declare %[[val_7]] {uniq_name = "_QFfirstprivate_commonEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[val_8:.*]] = fir.load %[[VAL_3_DECL]]#1 : !fir.ref +!CHECK: fir.store %[[val_8]] to %[[VAL_7_DECL]]#1 : !fir.ref +!CHECK: %[[val_9:.*]] = fir.alloca f32 {bindc_name = "y", pinned, uniq_name = "_QFfirstprivate_commonEy"} +!CHECK: %[[VAL_9_DECL:.*]]:2 = hlfir.declare %[[val_9]] {uniq_name = "_QFfirstprivate_commonEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[val_10:.*]] = fir.load %[[VAL_6_DECL]]#1 : !fir.ref +!CHECK: fir.store %[[val_10]] to %[[VAL_9_DECL]]#1 : !fir.ref +!CHECK: omp.terminator +!CHECK: } +!CHECK: return +!CHECK: } + +subroutine firstprivate_common + common /c/ x, y + real x, y + !$omp parallel firstprivate(/c/) + !$omp end parallel +end subroutine diff --git a/flang/test/Lower/OpenMP/unstructured.f90 b/flang/test/Lower/OpenMP/unstructured.f90 new file mode 100644 index 0000000000000..e5bf980ce90fd --- /dev/null +++ b/flang/test/Lower/OpenMP/unstructured.f90 @@ -0,0 +1,348 @@ +! Test unstructured code adjacent to and inside OpenMP constructs. + +! RUN: bbc %s -fopenmp -emit-hlfir -o "-" | FileCheck %s + +! CHECK-LABEL: func @_QPss1{{.*}} { +! CHECK: br ^bb1 +! CHECK: ^bb1: // 2 preds: ^bb0, ^bb4 +! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb5 +! CHECK: ^bb2: // pred: ^bb1 +! CHECK: cond_br %{{[0-9]*}}, ^bb3, ^bb4 +! CHECK: ^bb4: // pred: ^bb2 +! CHECK: fir.call @_FortranAioBeginExternalListOutput +! CHECK: br ^bb1 +! CHECK: ^bb5: // 2 preds: ^bb1, ^bb3 +! CHECK: omp.master { +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: omp.terminator +! CHECK: } +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: } +subroutine ss1(n) ! unstructured code followed by a structured OpenMP construct + do i = 1, 3 + if (i .eq. n) exit + print*, 'ss1-A', i + enddo + !$omp master + print*, 'ss1-B', i + !$omp end master + print* +end + +! CHECK-LABEL: func @_QPss2{{.*}} { +! CHECK: omp.master { +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: br ^bb1 +! CHECK: ^bb1: // 2 preds: ^bb0, ^bb4 +! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb5 +! CHECK: ^bb2: // pred: ^bb1 +! CHECK: cond_br %{{[0-9]*}}, ^bb3, ^bb4 +! CHECK: ^bb3: // pred: ^bb2 +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: br ^bb1 +! CHECK: ^bb5: // 2 preds: ^bb1, ^bb3 +! CHECK: omp.terminator +! CHECK: } +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: } +subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct + !$omp master + print*, 'ss2-A', n + do i = 1, 3 + if (i .eq. n) exit + print*, 'ss2-B', i + enddo + !$omp end master + print*, 'ss2-C', i + print* +end + +! CHECK-LABEL: func @_QPss3{{.*}} { +! CHECK: omp.parallel { +! CHECK: %[[ALLOCA_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned} +! CHECK: %[[K_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_K]] {uniq_name = "_QFss3Ek"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[ALLOCA_1:.*]] = fir.alloca i32 {{{.*}}, pinned} +! CHECK: %[[OMP_LOOP_J_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_1]] {uniq_name = "_QFss3Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[ALLOCA_2:.*]] = fir.alloca i32 {{{.*}}, pinned} +! CHECK: %[[OMP_LOOP_K_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_2]] {uniq_name = "_QFss3Ek"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: br ^bb1 +! CHECK: ^bb1: // 2 preds: ^bb0, ^bb3 +! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb4 +! CHECK: ^bb2: // pred: ^bb1 +! CHECK: omp.wsloop for (%[[ARG1:.*]]) : {{.*}} { +! CHECK: fir.store %[[ARG1]] to %[[OMP_LOOP_K_DECL]]#1 : !fir.ref +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: %[[LOAD_1:.*]] = fir.load %[[OMP_LOOP_K_DECL]]#0 : !fir.ref +! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_1]]) +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.wsloop for (%[[ARG2:.*]]) : {{.*}} { +! CHECK: fir.store %[[ARG2]] to %[[OMP_LOOP_J_DECL]]#1 : !fir.ref +! CHECK: br ^bb1 +! CHECK: ^bb2: // 2 preds: ^bb1, ^bb5 +! CHECK: cond_br %{{[0-9]*}}, ^bb3, ^bb6 +! CHECK: ^bb3: // pred: ^bb2 +! CHECK: cond_br %{{[0-9]*}}, ^bb4, ^bb5 +! CHECK: ^bb4: // pred: ^bb3 +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: %[[LOAD_2:.*]] = fir.load %[[K_DECL]]#0 : !fir.ref +! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_2]]) +! CHECK: br ^bb2 +! CHECK: ^bb6: // 2 preds: ^bb2, ^bb4 +! CHECK: omp.yield +! CHECK: } +! CHECK: br ^bb1 +! CHECK: ^bb4: // pred: ^bb1 +! CHECK: omp.terminator +! CHECK: } +! CHECK: } +subroutine ss3(n) ! nested unstructured OpenMP constructs + !$omp parallel + do i = 1, 3 + !$omp do + do k = 1, 3 + print*, 'ss3-A', k + enddo + !$omp end do + !$omp do + do j = 1, 3 + do k = 1, 3 + if (k .eq. n) exit + print*, 'ss3-B', k + enddo + enddo + !$omp end do + enddo + !$omp end parallel +end + +! CHECK-LABEL: func @_QPss4{{.*}} { +! CHECK: omp.parallel { +! CHECK: %[[ALLOCA:.*]] = fir.alloca i32 {{{.*}}, pinned} +! CHECK: %[[OMP_LOOP_J_DECL:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "_QFss4Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: omp.wsloop for (%[[ARG:.*]]) : {{.*}} { +! CHECK: fir.store %[[ARG]] to %[[OMP_LOOP_J_DECL]]#1 : !fir.ref +! CHECK: %[[COND:.*]] = arith.cmpi eq, %{{.*}}, %{{.*}} +! CHECK: %[[COND_XOR:.*]] = arith.xori %[[COND]], %{{.*}} +! CHECK: fir.if %[[COND_XOR]] { +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: %[[LOAD:.*]] = fir.load %[[OMP_LOOP_J_DECL]]#0 : !fir.ref +! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD]]) +! CHECK: } else { +! CHECK: } +! CHECK-NEXT: omp.yield +! CHECK-NEXT: } +! CHECK: omp.terminator +! CHECK-NEXT:} +subroutine ss4(n) ! CYCLE in OpenMP wsloop constructs + !$omp parallel + do i = 1, 3 + !$omp do + do j = 1, 3 + if (j .eq. n) cycle + print*, 'ss4', j + enddo + !$omp end do + enddo + !$omp end parallel +end + +! CHECK-LABEL: func @_QPss5() { +! CHECK: omp.parallel { +! CHECK: omp.wsloop {{.*}} { +! CHECK: br ^[[BB1:.*]] +! CHECK: ^[[BB1]]: +! CHECK: br ^[[BB2:.*]] +! CHECK: ^[[BB2]]: +! CHECK: cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]] +! CHECK: ^[[BB3]]: +! CHECK: cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB3:.*]] +! CHECK: ^[[BB4]]: +! CHECK: br ^[[BB6]] +! CHECK: ^[[BB3]]: +! CHECK: br ^[[BB2]] +! CHECK: ^[[BB6]]: +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +subroutine ss5() ! EXIT inside OpenMP wsloop (inside parallel) + integer :: x + !$omp parallel private(x) + !$omp do + do j = 1, 3 + x = j * i + do k = 1, 3 + if (k .eq. n) exit + x = k + x = x + k + enddo + x = j - 222 + enddo + !$omp end do + !$omp end parallel +end + +! CHECK-LABEL: func @_QPss6() { +! CHECK: omp.parallel { +! CHECK: br ^[[BB1_OUTER:.*]] +! CHECK: ^[[BB1_OUTER]]: +! CHECK: cond_br %{{.*}}, ^[[BB2_OUTER:.*]], ^[[BB3_OUTER:.*]] +! CHECK: ^[[BB2_OUTER]]: +! CHECK: omp.wsloop {{.*}} { +! CHECK: br ^[[BB1:.*]] +! CHECK: ^[[BB1]]: +! CHECK: br ^[[BB2:.*]] +! CHECK: ^[[BB2]]: +! CHECK: cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]] +! CHECK: ^[[BB3]]: +! CHECK: cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]] +! CHECK: ^[[BB4]]: +! CHECK: br ^[[BB6]] +! CHECK: ^[[BB5]] +! CHECK: br ^[[BB2]] +! CHECK: ^[[BB6]]: +! CHECK: omp.yield +! CHECK: } +! CHECK: br ^[[BB1_OUTER]] +! CHECK: ^[[BB3_OUTER]]: +! CHECK: omp.terminator +! CHECK: } +subroutine ss6() ! EXIT inside OpenMP wsloop in a do loop (inside parallel) + integer :: x + !$omp parallel private(x) + do i = 1, 3 + !$omp do + do j = 1, 3 + x = j * i + do k = 1, 3 + if (k .eq. n) exit + x = k + x = x + k + enddo + x = j - 222 + enddo + !$omp end do + enddo + !$omp end parallel +end + +! CHECK-LABEL: func @_QPss7() { +! CHECK: br ^[[BB1_OUTER:.*]] +! CHECK: ^[[BB1_OUTER]]: +! CHECK: cond_br %{{.*}}, ^[[BB2_OUTER:.*]], ^[[BB3_OUTER:.*]] +! CHECK-NEXT: ^[[BB2_OUTER:.*]]: +! CHECK: omp.parallel { +! CHECK: omp.wsloop {{.*}} { +! CHECK: br ^[[BB1:.*]] +! CHECK-NEXT: ^[[BB1]]: +! CHECK: br ^[[BB2:.*]] +! CHECK-NEXT: ^[[BB2]]: +! CHECK: cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]] +! CHECK-NEXT: ^[[BB3]]: +! CHECK: cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]] +! CHECK-NEXT: ^[[BB4]]: +! CHECK: br ^[[BB6]] +! CHECK-NEXT: ^[[BB5]]: +! CHECK: br ^[[BB2]] +! CHECK-NEXT: ^[[BB6]]: +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: br ^[[BB1_OUTER]] +! CHECK-NEXT: ^[[BB3_OUTER]]: +! CHECK-NEXT: return +subroutine ss7() ! EXIT inside OpenMP parallel do (inside do loop) + integer :: x + do i = 1, 3 + !$omp parallel do private(x) + do j = 1, 3 + x = j * i + do k = 1, 3 + if (k .eq. n) exit + x = k + x = x + k + enddo + enddo + !$omp end parallel do + enddo +end + +! CHECK-LABEL: func @_QPss8() { +! CHECK: omp.parallel { +! CHECK: omp.wsloop {{.*}} { +! CHECK: br ^[[BB1:.*]] +! CHECK-NEXT: ^[[BB1]]: +! CHECK: br ^[[BB2:.*]] +! CHECK: ^[[BB2]]: +! CHECK: cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]] +! CHECK: ^[[BB3]]: +! CHECK: cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]] +! CHECK: ^[[BB4]]: +! CHECK-NEXT: br ^[[BB6]] +! CHECK: ^[[BB5]]: +! CHECK: br ^[[BB2]] +! CHECK-NEXT: ^[[BB6]]: +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +subroutine ss8() ! EXIT inside OpenMP parallel do + integer :: x + !$omp parallel do private(x) + do j = 1, 3 + x = j * i + do k = 1, 3 + if (k .eq. n) exit + x = k + x = x + k + enddo + enddo + !$omp end parallel do +end + +! CHECK-LABEL: func @_QPss9() { +! CHECK: omp.parallel { +! CHECK-NEXT: omp.parallel { +! CHECK: br ^[[BB1:.*]] +! CHECK: ^[[BB1]]: +! CHECK: cond_br %{{.*}}, ^[[BB2:.*]], ^[[BB5:.*]] +! CHECK-NEXT: ^[[BB2]]: +! CHECK: cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB4:.*]] +! CHECK-NEXT: ^[[BB3]]: +! CHECK-NEXT: br ^[[BB5]] +! CHECK-NEXT: ^[[BB4]]: +! CHECK: br ^[[BB1]] +! CHECK-NEXT: ^[[BB5]]: +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK: omp.terminator +! CHECK-NEXT } +! CHECK: } +subroutine ss9() ! EXIT inside OpenMP parallel (inside parallel) + integer :: x + !$omp parallel + !$omp parallel private(x) + do k = 1, 3 + if (k .eq. n) exit + x = k + x = x + k + end do + !$omp end parallel + !$omp end parallel +end + +! CHECK-LABEL: func @_QQmain +program p + call ss1(2) + call ss2(2) + call ss3(2) + call ss4(2) + call ss5() + call ss6() + call ss7() + call ss8() + call ss9() +end diff --git a/flang/test/Lower/OpenMP/wsloop.f90 b/flang/test/Lower/OpenMP/wsloop.f90 new file mode 100644 index 0000000000000..4068f715c3e18 --- /dev/null +++ b/flang/test/Lower/OpenMP/wsloop.f90 @@ -0,0 +1,75 @@ +! This test checks lowering of OpenMP DO Directive (Worksharing). + +! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s + +!CHECK-LABEL: func @_QPsimple_loop() +subroutine simple_loop + integer :: i + ! CHECK: omp.parallel + !$OMP PARALLEL + ! CHECK: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} + ! CHECK: %[[IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) + ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 + ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + !$OMP DO + do i=1, 9 + ! CHECK: fir.store %[[I]] to %[[IV_DECL:.*]]#1 : !fir.ref + ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_DECL]]#0 : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + print*, i + end do + ! CHECK: omp.yield + !$OMP END DO + ! CHECK: omp.terminator + !$OMP END PARALLEL +end subroutine + +!CHECK-LABEL: func @_QPsimple_loop_with_step() +subroutine simple_loop_with_step + integer :: i + ! CHECK: omp.parallel + !$OMP PARALLEL + ! CHECK: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} + ! CHECK: %[[IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loop_with_stepEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) + ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! CHECK: %[[WS_STEP:.*]] = arith.constant 2 : i32 + ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: fir.store %[[I]] to %[[IV_DECL]]#1 : !fir.ref + ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_DECL]]#0 : !fir.ref + !$OMP DO + do i=1, 9, 2 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + print*, i + end do + ! CHECK: omp.yield + !$OMP END DO + ! CHECK: omp.terminator + !$OMP END PARALLEL +end subroutine + +!CHECK-LABEL: func @_QPloop_with_schedule_nowait() +subroutine loop_with_schedule_nowait + integer :: i + ! CHECK: omp.parallel + !$OMP PARALLEL + ! CHECK: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} + ! CHECK: %[[IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFloop_with_schedule_nowaitEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) + ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 + ! CHECK: omp.wsloop schedule(runtime) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + !$OMP DO SCHEDULE(runtime) + do i=1, 9 + ! CHECK: fir.store %[[I]] to %[[IV_DECL]]#1 : !fir.ref + ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_DECL]]#0 : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + print*, i + end do + ! CHECK: omp.yield + !$OMP END DO NOWAIT + ! CHECK: omp.terminator + !$OMP END PARALLEL +end subroutine From 144c5b6d58803a2d4a0fe92a0fe331ff0347dc3b Mon Sep 17 00:00:00 2001 From: David Spickett Date: Mon, 16 Oct 2023 15:25:44 +0000 Subject: [PATCH 227/720] [compiler-rt][hwasan] Disable deep-recursion.c test on AArch64 Linux The test program occasionaly fails to detect the fault as it should. See https://github.com/llvm/llvm-project/issues/69221. --- compiler-rt/test/hwasan/TestCases/deep-recursion.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/compiler-rt/test/hwasan/TestCases/deep-recursion.c b/compiler-rt/test/hwasan/TestCases/deep-recursion.c index fde8a0db5ad15..39902d072a0d3 100644 --- a/compiler-rt/test/hwasan/TestCases/deep-recursion.c +++ b/compiler-rt/test/hwasan/TestCases/deep-recursion.c @@ -17,6 +17,9 @@ // Stack histories are currently not recorded on x86. // XFAIL: target=x86_64{{.*}} +// Flaky on AArch64 Linux, see https://github.com/llvm/llvm-project/issues/69221. +// UNSUPPORTED: target=aarch64-linux{{.*}} + #include // At least -O1 is needed for this function to not have a stack frame on // AArch64. From 6ade5183232dc1398205d7c9dbe21243b2560837 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Mon, 16 Oct 2023 08:52:02 -0700 Subject: [PATCH 228/720] [flang][openacc][NFC] Issue better error message when directive is wrong (#69034) --- flang/lib/Parser/openacc-parsers.cpp | 32 ++++++++++++---------- flang/test/Semantics/OpenACC/acc-error.f90 | 15 ++++++++++ 2 files changed, 33 insertions(+), 14 deletions(-) create mode 100644 flang/test/Semantics/OpenACC/acc-error.f90 diff --git a/flang/lib/Parser/openacc-parsers.cpp b/flang/lib/Parser/openacc-parsers.cpp index 09b30e679de0e..75aeffd29f92f 100644 --- a/flang/lib/Parser/openacc-parsers.cpp +++ b/flang/lib/Parser/openacc-parsers.cpp @@ -150,11 +150,12 @@ TYPE_PARSER(sourced(construct( TYPE_PARSER(construct( sourced(Parser{}), Parser{})) -TYPE_PARSER(construct(startAccLine >> "END LOOP"_tok)) +TYPE_PARSER(construct("END LOOP"_tok)) TYPE_PARSER(construct( sourced(Parser{} / endAccLine), - maybe(Parser{}), maybe(Parser{} / endAccLine))) + maybe(Parser{}), + maybe(startAccLine >> Parser{} / endAccLine))) // 2.15.1 Routine directive TYPE_PARSER(sourced(construct(verbatim("ROUTINE"_tok), @@ -227,22 +228,25 @@ TYPE_PARSER(construct( TYPE_PARSER(construct( sourced(Parser{}), Parser{})) -TYPE_PARSER( - startAccLine >> first(sourced(construct( - Parser{})), - sourced(construct( - Parser{})))) +TYPE_PARSER(startAccLine >> + withMessage("expected OpenACC directive"_err_en_US, + first(sourced(construct( + Parser{})), + sourced(construct( + Parser{}))))) // OpenACC constructs TYPE_CONTEXT_PARSER("OpenACC construct"_en_US, startAccLine >> - first(construct(Parser{}), - construct(Parser{}), - construct(Parser{}), - construct(Parser{}), - construct(Parser{}), - construct(Parser{}), - construct(Parser{}))) + withMessage("expected OpenACC directive"_err_en_US, + first(construct(Parser{}), + construct(Parser{}), + construct(Parser{}), + construct( + Parser{}), + construct(Parser{}), + construct(Parser{}), + construct(Parser{})))) TYPE_PARSER(startAccLine >> sourced(construct(sourced("END"_tok >> diff --git a/flang/test/Semantics/OpenACC/acc-error.f90 b/flang/test/Semantics/OpenACC/acc-error.f90 new file mode 100644 index 0000000000000..b1c3b77847429 --- /dev/null +++ b/flang/test/Semantics/OpenACC/acc-error.f90 @@ -0,0 +1,15 @@ +! RUN: %python %S/../test_errors.py %s %flang -fopenacc + +! Check parser specific error for OpenACC + + +subroutine test(a, n) + integer :: a(n) + !ERROR: expected OpenACC directive + !$acc p + integer :: i,j + + i = 0 + !ERROR: expected OpenACC directive + !$acc p + end subroutine From b51eaebd2b437ff4fdb8b2e80131a665da80a290 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 16 Oct 2023 09:18:40 -0700 Subject: [PATCH 229/720] [libc++] Remove workaround for clang-tidy 16 in the test suite setup code (#69035) We have moved to clang-tidy 17 now, so the workaround shouldn't be necessary. --- libcxx/utils/libcxx/test/params.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py index 456794b9b1cce..c3732560f5e46 100644 --- a/libcxx/utils/libcxx/test/params.py +++ b/libcxx/utils/libcxx/test/params.py @@ -77,9 +77,6 @@ def getStdFlag(cfg, std): - # TODO(LLVM-17) Remove this clang-tidy-16 work-around - if std == "c++23": - std = "c++2b" if hasCompileFlag(cfg, "-std=" + std): return "-std=" + std # TODO(LLVM-19) Remove the fallbacks needed for Clang 16. From 45d151138008c4880c8f9b77ffc43c23e0a9f1cb Mon Sep 17 00:00:00 2001 From: Xing Xue Date: Mon, 16 Oct 2023 12:24:05 -0400 Subject: [PATCH 230/720] [libunwind][AIX] Fix problem with stepping up from a leaf function when unwinding started in a signal handler Summary: The implementation of AIX unwinder gets the return address from the link area of the stack frame of a function and uses the return address to walk up functions. However, when unwinding starts from a signal handler and the function that raised the signal happens to be a leaf function and it does not have its own stack frame, the return address of the stack frame of the leaf function points to the caller of the function that calls the leaf function because the leaf function and its caller share the same stack frame. As a result, the caller of the leaf function is skipped. This patch fixes the problem by saving the LR value in sigcontext when the unwinder hits the signal handler trampoline frame and using it as the return address of the leaf function. The LR value from sigcontext is saved in the unwinding context slot for LR currently unused. Reviewed by: stephenpeckham Differential Revision: https://reviews.llvm.org/D158655 --- libunwind/src/Registers.hpp | 4 + libunwind/src/UnwindCursor.hpp | 96 +++++--- libunwind/src/UnwindRegistersSave.S | 14 +- libunwind/test/aix_signal_unwind.pass.sh.S | 245 +++++++++++++++++++++ 4 files changed, 325 insertions(+), 34 deletions(-) create mode 100644 libunwind/test/aix_signal_unwind.pass.sh.S diff --git a/libunwind/src/Registers.hpp b/libunwind/src/Registers.hpp index fb6e04e50fa1c..d11ddb3426d52 100644 --- a/libunwind/src/Registers.hpp +++ b/libunwind/src/Registers.hpp @@ -619,6 +619,8 @@ class _LIBUNWIND_HIDDEN Registers_ppc { void setIP(uint32_t value) { _registers.__srr0 = value; } uint64_t getCR() const { return _registers.__cr; } void setCR(uint32_t value) { _registers.__cr = value; } + uint64_t getLR() const { return _registers.__lr; } + void setLR(uint32_t value) { _registers.__lr = value; } private: struct ppc_thread_state_t { @@ -1189,6 +1191,8 @@ class _LIBUNWIND_HIDDEN Registers_ppc64 { void setIP(uint64_t value) { _registers.__srr0 = value; } uint64_t getCR() const { return _registers.__cr; } void setCR(uint64_t value) { _registers.__cr = value; } + uint64_t getLR() const { return _registers.__lr; } + void setLR(uint64_t value) { _registers.__lr = value; } private: struct ppc64_thread_state_t { diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp index dde94773bc341..f89c5b2c2f73e 100644 --- a/libunwind/src/UnwindCursor.hpp +++ b/libunwind/src/UnwindCursor.hpp @@ -2301,27 +2301,39 @@ int UnwindCursor::stepWithTBTable(pint_t pc, tbtable *TBTable, if (!getFunctionName(functionBuf, sizeof(functionBuf), &offset)) { functionName = ".anonymous."; } - _LIBUNWIND_TRACE_UNWINDING("%s: Look up traceback table of func=%s at %p", - __func__, functionName, - reinterpret_cast(TBTable)); + _LIBUNWIND_TRACE_UNWINDING( + "%s: Look up traceback table of func=%s at %p, pc=%p, " + "SP=%p, saves_lr=%d, stores_bc=%d", + __func__, functionName, reinterpret_cast(TBTable), + reinterpret_cast(pc), + reinterpret_cast(registers.getSP()), TBTable->tb.saves_lr, + TBTable->tb.stores_bc); } #if defined(__powerpc64__) - // Instruction to reload TOC register "l r2,40(r1)" + // Instruction to reload TOC register "ld r2,40(r1)" const uint32_t loadTOCRegInst = 0xe8410028; const int32_t unwPPCF0Index = UNW_PPC64_F0; const int32_t unwPPCV0Index = UNW_PPC64_V0; #else - // Instruction to reload TOC register "l r2,20(r1)" + // Instruction to reload TOC register "lwz r2,20(r1)" const uint32_t loadTOCRegInst = 0x80410014; const int32_t unwPPCF0Index = UNW_PPC_F0; const int32_t unwPPCV0Index = UNW_PPC_V0; #endif + // lastStack points to the stack frame of the next routine up. + pint_t curStack = static_cast(registers.getSP()); + pint_t lastStack = *reinterpret_cast(curStack); + + if (lastStack == 0) + return UNW_STEP_END; + R newRegisters = registers; - // lastStack points to the stack frame of the next routine up. - pint_t lastStack = *(reinterpret_cast(registers.getSP())); + // If backchain is not stored, use the current stack frame. + if (!TBTable->tb.stores_bc) + lastStack = curStack; // Return address is the address after call site instruction. pint_t returnAddress; @@ -2331,33 +2343,41 @@ int UnwindCursor::stepWithTBTable(pint_t pc, tbtable *TBTable, reinterpret_cast(lastStack)); sigcontext *sigContext = reinterpret_cast( - reinterpret_cast(lastStack) + STKMIN); + reinterpret_cast(lastStack) + STKMINALIGN); returnAddress = sigContext->sc_jmpbuf.jmp_context.iar; - _LIBUNWIND_TRACE_UNWINDING("From sigContext=%p, returnAddress=%p\n", - reinterpret_cast(sigContext), - reinterpret_cast(returnAddress)); - + bool useSTKMIN = false; if (returnAddress < 0x10000000) { - // Try again using STKMINALIGN + // Try again using STKMIN. sigContext = reinterpret_cast( - reinterpret_cast(lastStack) + STKMINALIGN); + reinterpret_cast(lastStack) + STKMIN); returnAddress = sigContext->sc_jmpbuf.jmp_context.iar; if (returnAddress < 0x10000000) { - _LIBUNWIND_TRACE_UNWINDING("Bad returnAddress=%p\n", - reinterpret_cast(returnAddress)); + _LIBUNWIND_TRACE_UNWINDING("Bad returnAddress=%p from sigcontext=%p", + reinterpret_cast(returnAddress), + reinterpret_cast(sigContext)); return UNW_EBADFRAME; - } else { - _LIBUNWIND_TRACE_UNWINDING("Tried again using STKMINALIGN: " - "sigContext=%p, returnAddress=%p. " - "Seems to be a valid address\n", - reinterpret_cast(sigContext), - reinterpret_cast(returnAddress)); } + useSTKMIN = true; } + _LIBUNWIND_TRACE_UNWINDING("Returning from a signal handler %s: " + "sigContext=%p, returnAddress=%p. " + "Seems to be a valid address", + useSTKMIN ? "STKMIN" : "STKMINALIGN", + reinterpret_cast(sigContext), + reinterpret_cast(returnAddress)); + // Restore the condition register from sigcontext. newRegisters.setCR(sigContext->sc_jmpbuf.jmp_context.cr); + // Save the LR in sigcontext for stepping up when the function that + // raised the signal is a leaf function. This LR has the return address + // to the caller of the leaf function. + newRegisters.setLR(sigContext->sc_jmpbuf.jmp_context.lr); + _LIBUNWIND_TRACE_UNWINDING( + "Save LR=%p from sigcontext", + reinterpret_cast(sigContext->sc_jmpbuf.jmp_context.lr)); + // Restore GPRs from sigcontext. for (int i = 0; i < 32; ++i) newRegisters.setRegister(i, sigContext->sc_jmpbuf.jmp_context.gpr[i]); @@ -2380,13 +2400,26 @@ int UnwindCursor::stepWithTBTable(pint_t pc, tbtable *TBTable, } } else { // Step up a normal frame. - returnAddress = reinterpret_cast(lastStack)[2]; - _LIBUNWIND_TRACE_UNWINDING("Extract info from lastStack=%p, " - "returnAddress=%p\n", - reinterpret_cast(lastStack), - reinterpret_cast(returnAddress)); - _LIBUNWIND_TRACE_UNWINDING("fpr_regs=%d, gpr_regs=%d, saves_cr=%d\n", + if (!TBTable->tb.saves_lr && registers.getLR()) { + // This case should only occur if we were called from a signal handler + // and the signal occurred in a function that doesn't save the LR. + returnAddress = registers.getLR(); + _LIBUNWIND_TRACE_UNWINDING("Use saved LR=%p", + reinterpret_cast(returnAddress)); + } else { + // Otherwise, use the LR value in the stack link area. + returnAddress = reinterpret_cast(lastStack)[2]; + } + + // Reset LR in the current context. + newRegisters.setLR(NULL); + + _LIBUNWIND_TRACE_UNWINDING( + "Extract info from lastStack=%p, returnAddress=%p", + reinterpret_cast(lastStack), + reinterpret_cast(returnAddress)); + _LIBUNWIND_TRACE_UNWINDING("fpr_regs=%d, gpr_regs=%d, saves_cr=%d", TBTable->tb.fpr_saved, TBTable->tb.gpr_saved, TBTable->tb.saves_cr); @@ -2450,7 +2483,7 @@ int UnwindCursor::stepWithTBTable(pint_t pc, tbtable *TBTable, struct vec_ext *vec_ext = reinterpret_cast(charPtr); - _LIBUNWIND_TRACE_UNWINDING("vr_saved=%d\n", vec_ext->vr_saved); + _LIBUNWIND_TRACE_UNWINDING("vr_saved=%d", vec_ext->vr_saved); // Restore vector register(s) if saved on the stack. if (vec_ext->vr_saved) { @@ -2480,11 +2513,11 @@ int UnwindCursor::stepWithTBTable(pint_t pc, tbtable *TBTable, // Do we need to set the TOC register? _LIBUNWIND_TRACE_UNWINDING( - "Current gpr2=%p\n", + "Current gpr2=%p", reinterpret_cast(newRegisters.getRegister(2))); if (firstInstruction == loadTOCRegInst) { _LIBUNWIND_TRACE_UNWINDING( - "Set gpr2=%p from frame\n", + "Set gpr2=%p from frame", reinterpret_cast(reinterpret_cast(lastStack)[5])); newRegisters.setRegister(2, reinterpret_cast(lastStack)[5]); } @@ -2516,7 +2549,6 @@ int UnwindCursor::stepWithTBTable(pint_t pc, tbtable *TBTable, } else { isSignalFrame = false; } - return UNW_STEP_SUCCESS; } #endif // defined(_LIBUNWIND_SUPPORT_TBTAB_UNWIND) diff --git a/libunwind/src/UnwindRegistersSave.S b/libunwind/src/UnwindRegistersSave.S index 5534d1734b6ba..dc0f7da31ccf8 100644 --- a/libunwind/src/UnwindRegistersSave.S +++ b/libunwind/src/UnwindRegistersSave.S @@ -352,7 +352,12 @@ LnoR2Fix: std 0, PPC64_OFFS_CR(3) mfxer 0 std 0, PPC64_OFFS_XER(3) +#if defined(_AIX) + // LR value saved from the register is not used, initialize it to 0. + li 0, 0 +#else mflr 0 +#endif std 0, PPC64_OFFS_LR(3) mfctr 0 std 0, PPC64_OFFS_CTR(3) @@ -565,8 +570,8 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) // is called from a different module. Save the original TOC register // in the context if this is the case. mflr 4 - lwz 4, 0(4) // Get the instruction at the return address. - xoris 0, 4, 0x8041 // Is it reloading the TOC register "ld 2,40(1)"? + lwz 4, 0(4) // Get the instruction at the return address. + xoris 0, 4, 0x8041 // Is it reloading the TOC register "lwz 2,20(1)"? cmplwi 0, 0x14 bne 0, LnoR2Fix // No need to fix up r2 if it is not. lwz 2, 20(1) // Use the saved TOC register in the stack. @@ -610,6 +615,11 @@ LnoR2Fix: // save CR registers mfcr 0 stw 0, 136(3) +#if defined(_AIX) + // LR value from the register is not used, initialize it to 0. + li 0, 0 + stw 0, 144(3) +#endif // save CTR register mfctr 0 stw 0, 148(3) diff --git a/libunwind/test/aix_signal_unwind.pass.sh.S b/libunwind/test/aix_signal_unwind.pass.sh.S new file mode 100644 index 0000000000000..9ca18e9481f4f --- /dev/null +++ b/libunwind/test/aix_signal_unwind.pass.sh.S @@ -0,0 +1,245 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Test that _Unwind_Backtrace() walks up from a signal handler and produces +// a correct traceback when the function raising the signal does not save +// the link register or does not store the stack back chain. + +// REQUIRES: target=powerpc{{(64)?}}-ibm-aix + +// Test when the function raising the signal does not save the link register +// RUN: %{cxx} -x c++ %s -o %t.exe -DCXX_CODE %{flags} %{compile_flags} +// RUN: %{exec} %t.exe + +// Test when the function raising the signal does not store stack back chain. +// RUN: %{cxx} -x c++ -c %s -o %t1.o -DCXX_CODE -DNOBACKCHAIN %{flags} \ +// RUN: %{compile_flags} +// RUN: %{cxx} -c %s -o %t2.o %{flags} %{compile_flags} +// RUN: %{cxx} -o %t1.exe %t1.o %t2.o %{flags} %{link_flags} +// RUN: %{exec} %t1.exe + +#ifdef CXX_CODE + +#undef NDEBUG +#include +#include +#include +#include +#include +#include +#include + +#define NAME_ARRAY_SIZE 10 +#define NAMES_EXPECTED 6 + +const char* namesExpected[] = {"handler", "abc", "bar", "foo", "main", + "__start"}; +char *namesObtained[NAME_ARRAY_SIZE] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +int funcIndex = 0; + +// Get the function name from traceback table. +char *getFuncName(uintptr_t pc, uint16_t *nameLen) { + uint32_t *p = reinterpret_cast(pc); + + // Keep looking forward until a word of 0 is found. The traceback + // table starts at the following word. + while (*p) + ++p; + tbtable *TBTable = reinterpret_cast(p + 1); + + if (!TBTable->tb.name_present) + return NULL; + + // Get to the optional portion of the traceback table. + p = reinterpret_cast(&TBTable->tb_ext); + + // Skip field parminfo if it exists. + if (TBTable->tb.fixedparms || TBTable->tb.floatparms) + ++p; + + // Skip field tb_offset if it exists. + if (TBTable->tb.has_tboff) + ++p; + + // Skip field hand_mask if it exists. + if (TBTable->tb.int_hndl) + ++p; + + // Skip fields ctl_info and ctl_info_disp if they exist. + if (TBTable->tb.has_ctl) + p += 1 + *p; + + *nameLen = *reinterpret_cast(p); + return reinterpret_cast(p) + sizeof(uint16_t); +} + +_Unwind_Reason_Code callBack(struct _Unwind_Context *uc, void *arg) { + (void)arg; + uint16_t nameLen; + uintptr_t ip = _Unwind_GetIP(uc); + if (funcIndex < NAME_ARRAY_SIZE) + namesObtained[funcIndex++] = strndup(getFuncName(ip, &nameLen), nameLen); + return _URC_NO_REASON; +} + +extern "C" void handler(int signum) { + (void)signum; + // Walk stack frames for traceback. + _Unwind_Backtrace(callBack, NULL); + + // Verify the traceback. + assert(funcIndex <= NAMES_EXPECTED && "Obtained names more than expected"); + for (int i = 0; i < funcIndex; ++i) { + assert(!strcmp(namesExpected[i], namesObtained[i]) && + "Function names do not match"); + free(namesObtained[i]); + } + exit(0); +} + +#ifdef NOBACKCHAIN +// abc() is in assembly. It raises signal SIGSEGV and does not store +// the stack back chain. +extern "C" void abc(); + +#else +volatile int *null = 0; + +// abc() raises signal SIGSEGV and does not save the link register. +extern "C" __attribute__((noinline)) void abc() { + // Produce a SIGSEGV. + *null = 0; +} +#endif + +extern "C" __attribute__((noinline)) void bar() { + abc(); +} + +extern "C" __attribute__((noinline)) void foo() { + bar(); +} + +int main() { + // Set signal handler for SIGSEGV. + signal(SIGSEGV, handler); + foo(); +} + +#else // Assembly code for abc(). +// This assembly code is similar to the following C code but it saves the +// link register. +// +// int *badp = 0; +// void abc() { +// *badp = 0; +// } + +#ifdef __64BIT__ + .csect [PR],5 + .file "abc.c" + .globl abc[DS] # -- Begin function abc + .globl .abc + .align 4 + .csect abc[DS],3 + .vbyte 8, .abc # @abc + .vbyte 8, TOC[TC0] + .vbyte 8, 0 + .csect [PR],5 +.abc: +# %bb.0: # %entry + mflr 0 + std 0, 16(1) + ld 3, L..C0(2) # @badp + bl $+4 + ld 4, 0(3) + li 3, 0 + stw 3, 0(4) + ld 0, 16(1) + mtlr 0 + blr +L..abc0: + .vbyte 4, 0x00000000 # Traceback table begin + .byte 0x00 # Version = 0 + .byte 0x09 # Language = CPlusPlus + .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue + # +HasTraceBackTableOffset, -IsInternalProcedure + # -HasControlledStorage, -IsTOCless + # -IsFloatingPointPresent + # -IsFloatingPointOperationLogOrAbortEnabled + .byte 0x61 # -IsInterruptHandler, +IsFunctionNamePresent, +IsAllocaUsed + # OnConditionDirective = 0, -IsCRSaved, +IsLRSaved + .byte 0x00 # -IsBackChainStored, -IsFixup, NumOfFPRsSaved = 0 + .byte 0x01 # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 1 + .byte 0x00 # NumberOfFixedParms = 0 + .byte 0x01 # NumberOfFPParms = 0, +HasParmsOnStack + .vbyte 4, L..abc0-.abc # Function size + .vbyte 2, 0x0003 # Function name len = 3 + .byte "abc" # Function Name + .byte 0x1f # AllocaUsed + # -- End function + .csect badp[RW],3 + .globl badp[RW] # @badp + .align 3 + .vbyte 8, 0 + .toc +L..C0: + .tc badp[TC],badp[RW] +#else + .csect [PR],5 + .file "abc.c" + .globl abc[DS] # -- Begin function abc + .globl .abc + .align 4 + .csect abc[DS],2 + .vbyte 4, .abc # @abc + .vbyte 4, TOC[TC0] + .vbyte 4, 0 + .csect [PR],5 +.abc: +# %bb.0: # %entry + mflr 0 + stw 0, 8(1) + lwz 3, L..C0(2) # @badp + bl $+4 + lwz 4, 0(3) + li 3, 0 + stw 3, 0(4) + lwz 0, 8(1) + mtlr 0 + blr +L..abc0: + .vbyte 4, 0x00000000 # Traceback table begin + .byte 0x00 # Version = 0 + .byte 0x09 # Language = CPlusPlus + .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue + # +HasTraceBackTableOffset, -IsInternalProcedure + # -HasControlledStorage, -IsTOCless + # -IsFloatingPointPresent + # -IsFloatingPointOperationLogOrAbortEnabled + .byte 0x61 # -IsInterruptHandler, +IsFunctionNamePresent, +IsAllocaUsed + # OnConditionDirective = 0, -IsCRSaved, +IsLRSaved + .byte 0x00 # -IsBackChainStored, -IsFixup, NumOfFPRsSaved = 0 + .byte 0x01 # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 1 + .byte 0x00 # NumberOfFixedParms = 0 + .byte 0x01 # NumberOfFPParms = 0, +HasParmsOnStack + .vbyte 4, L..abc0-.abc # Function size + .vbyte 2, 0x0003 # Function name len = 3 + .byte "abc" # Function Name + .byte 0x1f # AllocaUsed + # -- End function + .csect badp[RW],2 + .globl badp[RW] # @badp + .align 2 + .vbyte 4, 0 + .toc +L..C0: + .tc badp[TC],badp[RW] +#endif // __64BIT__ +#endif // CXX_CODE From 0b570ad969b8b03b366198b395b7d21c3f8fe40c Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Mon, 16 Oct 2023 09:31:09 -0700 Subject: [PATCH 231/720] [CodeGen] Remove LiveVariables::{isPHIJoin,setPHIJoin} (#69128) The last use of isPHIJoin was removed by: commit fac770b865f59cbe615241dad153ad20d5138b9e Author: Jakob Stoklund Olesen Date: Sat Feb 9 00:04:07 2013 +0000 so there is no reason to maintain PHIJoins. --- llvm/include/llvm/CodeGen/LiveVariables.h | 11 ----------- llvm/lib/CodeGen/LiveVariables.cpp | 1 - llvm/lib/CodeGen/PHIElimination.cpp | 3 --- 3 files changed, 15 deletions(-) diff --git a/llvm/include/llvm/CodeGen/LiveVariables.h b/llvm/include/llvm/CodeGen/LiveVariables.h index 90aeb8ceda559..9ed4c7bdf7b17 100644 --- a/llvm/include/llvm/CodeGen/LiveVariables.h +++ b/llvm/include/llvm/CodeGen/LiveVariables.h @@ -118,11 +118,6 @@ class LiveVariables : public MachineFunctionPass { /// IndexedMap VirtRegInfo; - /// PHIJoins - list of virtual registers that are PHI joins. These registers - /// may have multiple definitions, and they require special handling when - /// building live intervals. - SparseBitVector<> PHIJoins; - private: // Intermediate data structures MachineFunction *MF = nullptr; @@ -302,12 +297,6 @@ class LiveVariables : public MachineFunctionPass { MachineBasicBlock *DomBB, MachineBasicBlock *SuccBB, std::vector> &LiveInSets); - - /// isPHIJoin - Return true if Reg is a phi join register. - bool isPHIJoin(Register Reg) { return PHIJoins.test(Reg.id()); } - - /// setPHIJoin - Mark Reg as a phi join register. - void setPHIJoin(Register Reg) { PHIJoins.set(Reg.id()); } }; } // End llvm namespace diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp index 077276b64aa22..6b983b6320c71 100644 --- a/llvm/lib/CodeGen/LiveVariables.cpp +++ b/llvm/lib/CodeGen/LiveVariables.cpp @@ -601,7 +601,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { PhysRegDef.assign(NumRegs, nullptr); PhysRegUse.assign(NumRegs, nullptr); PHIVarInfo.resize(MF->getNumBlockIDs()); - PHIJoins.clear(); // FIXME: LiveIntervals will be updated to remove its dependence on // LiveVariables to improve compilation time and eliminate bizarre pass diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp index dbb9a9ffdf60b..10d8378ce58d1 100644 --- a/llvm/lib/CodeGen/PHIElimination.cpp +++ b/llvm/lib/CodeGen/PHIElimination.cpp @@ -330,9 +330,6 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (IncomingReg) { LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg); - // Increment use count of the newly created virtual register. - LV->setPHIJoin(IncomingReg); - MachineInstr *OldKill = nullptr; bool IsPHICopyAfterOldKill = false; From 9cc6f492f20ffc7bd1b7c9e5ef696aa921bcdef9 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Mon, 16 Oct 2023 09:33:26 -0700 Subject: [PATCH 232/720] [GlobalISel] Remove unused function narrowToSmallerAndWidenToSmallest (#69130) The last use was removed by: commit b163efae3312abe1227cff1d7704325138b4e538 Author: Simon Pilgrim Date: Thu Jun 15 13:56:53 2023 +0100 --- .../llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h index 08233dba20411..0b167ce9650d0 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h @@ -240,16 +240,6 @@ class LegacyLegalizerInfo { Unsupported); } - static SizeAndActionsVec - narrowToSmallerAndWidenToSmallest(const SizeAndActionsVec &v) { - using namespace LegacyLegalizeActions; - assert(v.size() > 0 && - "At least one size that can be legalized towards is needed" - " for this SizeChangeStrategy"); - return decreaseToSmallerTypesAndIncreaseToSmallest(v, NarrowScalar, - WidenScalar); - } - /// A SizeChangeStrategy for the common case where legalization for a /// particular vector operation consists of having more elements in the /// vector, to a type that is legal. Unless there is no such type and then From e32cde6f41cd93b7a20b64a1abc8d5c488c8fe51 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Thu, 25 May 2023 11:32:38 -0700 Subject: [PATCH 233/720] [clang] Use IgnoreParensSingleStep in more places Addresses a post-commit comment on D146764. Reviewed By: hans Differential Revision: https://reviews.llvm.org/D151479 --- clang/lib/Sema/SemaInit.cpp | 37 +++++++------------------------------ 1 file changed, 7 insertions(+), 30 deletions(-) diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index fd95b16b84b6e..8f945bc764bef 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -15,6 +15,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" +#include "clang/AST/IgnoreExpr.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/SourceManager.h" @@ -170,22 +171,9 @@ static void updateStringLiteralType(Expr *E, QualType Ty) { while (true) { E->setType(Ty); E->setValueKind(VK_PRValue); - if (isa(E) || isa(E)) { - break; - } else if (ParenExpr *PE = dyn_cast(E)) { - E = PE->getSubExpr(); - } else if (UnaryOperator *UO = dyn_cast(E)) { - assert(UO->getOpcode() == UO_Extension); - E = UO->getSubExpr(); - } else if (GenericSelectionExpr *GSE = dyn_cast(E)) { - E = GSE->getResultExpr(); - } else if (ChooseExpr *CE = dyn_cast(E)) { - E = CE->getChosenSubExpr(); - } else if (PredefinedExpr *PE = dyn_cast(E)) { - E = PE->getFunctionName(); - } else { - llvm_unreachable("unexpected expr in string literal init"); - } + if (isa(E) || isa(E)) + break; + E = IgnoreParensSingleStep(E); } } @@ -194,20 +182,9 @@ static void updateStringLiteralType(Expr *E, QualType Ty) { static void updateGNUCompoundLiteralRValue(Expr *E) { while (true) { E->setValueKind(VK_PRValue); - if (isa(E)) { - break; - } else if (ParenExpr *PE = dyn_cast(E)) { - E = PE->getSubExpr(); - } else if (UnaryOperator *UO = dyn_cast(E)) { - assert(UO->getOpcode() == UO_Extension); - E = UO->getSubExpr(); - } else if (GenericSelectionExpr *GSE = dyn_cast(E)) { - E = GSE->getResultExpr(); - } else if (ChooseExpr *CE = dyn_cast(E)) { - E = CE->getChosenSubExpr(); - } else { - llvm_unreachable("unexpected expr in array compound literal init"); - } + if (isa(E)) + break; + E = IgnoreParensSingleStep(E); } } From 468d3b1b78c96991d7c2cd2eec8176bc6a132721 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Mon, 16 Oct 2023 09:35:50 -0700 Subject: [PATCH 234/720] [flang][openacc][NFC] Simplify lowering of recipe (#68836) Refactor some of the lowering in the reduction and firstprivate recipe to avoid duplicated code. --- flang/lib/Lower/OpenACC.cpp | 175 +++++++++++++++--------------------- 1 file changed, 74 insertions(+), 101 deletions(-) diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 61a1b9fd86717..e09266121cdb9 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -463,7 +463,7 @@ bool isConstantBound(mlir::acc::DataBoundsOp &op) { } /// Return true iff all the bounds are expressed with constant values. -bool areAllBoundConstant(llvm::SmallVector &bounds) { +bool areAllBoundConstant(const llvm::SmallVector &bounds) { for (auto bound : bounds) { auto dataBound = mlir::dyn_cast(bound.getDefiningOp()); @@ -474,27 +474,6 @@ bool areAllBoundConstant(llvm::SmallVector &bounds) { return true; } -static fir::ShapeOp -genShapeFromBounds(mlir::Location loc, fir::FirOpBuilder &builder, - const llvm::SmallVector &args) { - assert(args.size() % 3 == 0 && "Triplets must be a multiple of 3"); - llvm::SmallVector extents; - mlir::Type idxTy = builder.getIndexType(); - mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); - mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0); - for (unsigned i = 0; i < args.size(); i += 3) { - mlir::Value s1 = - builder.create(loc, args[i + 1], args[0]); - mlir::Value s2 = builder.create(loc, s1, one); - mlir::Value s3 = builder.create(loc, s2, args[i + 2]); - mlir::Value cmp = builder.create( - loc, mlir::arith::CmpIPredicate::sgt, s3, zero); - mlir::Value ext = builder.create(loc, cmp, s3, zero); - extents.push_back(ext); - } - return builder.create(loc, extents); -} - static llvm::SmallVector genConstantBounds(fir::FirOpBuilder &builder, mlir::Location loc, mlir::acc::DataBoundsOp &dataBound) { @@ -520,6 +499,63 @@ genConstantBounds(fir::FirOpBuilder &builder, mlir::Location loc, return {lb, ub, step}; } +static fir::ShapeOp genShapeFromBoundsOrArgs( + mlir::Location loc, fir::FirOpBuilder &builder, fir::SequenceType seqTy, + const llvm::SmallVector &bounds, mlir::ValueRange arguments) { + llvm::SmallVector args; + if (areAllBoundConstant(bounds)) { + for (auto bound : llvm::reverse(bounds)) { + auto dataBound = + mlir::cast(bound.getDefiningOp()); + args.append(genConstantBounds(builder, loc, dataBound)); + } + } else { + assert(((arguments.size() - 2) / 3 == seqTy.getDimension()) && + "Expect 3 block arguments per dimension"); + for (auto arg : arguments.drop_front(2)) + args.push_back(arg); + } + + assert(args.size() % 3 == 0 && "Triplets must be a multiple of 3"); + llvm::SmallVector extents; + mlir::Type idxTy = builder.getIndexType(); + mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); + mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0); + for (unsigned i = 0; i < args.size(); i += 3) { + mlir::Value s1 = + builder.create(loc, args[i + 1], args[0]); + mlir::Value s2 = builder.create(loc, s1, one); + mlir::Value s3 = builder.create(loc, s2, args[i + 2]); + mlir::Value cmp = builder.create( + loc, mlir::arith::CmpIPredicate::sgt, s3, zero); + mlir::Value ext = builder.create(loc, cmp, s3, zero); + extents.push_back(ext); + } + return builder.create(loc, extents); +} + +static hlfir::DesignateOp::Subscripts +getSubscriptsFromArgs(mlir::ValueRange args) { + hlfir::DesignateOp::Subscripts triplets; + for (unsigned i = 2; i < args.size(); i += 3) + triplets.emplace_back( + hlfir::DesignateOp::Triplet{args[i], args[i + 1], args[i + 2]}); + return triplets; +} + +static hlfir::Entity genDesignateWithTriplets( + fir::FirOpBuilder &builder, mlir::Location loc, hlfir::Entity &entity, + hlfir::DesignateOp::Subscripts &triplets, mlir::Value shape) { + llvm::SmallVector lenParams; + hlfir::genLengthParameters(loc, builder, entity, lenParams); + auto designate = builder.create( + loc, entity.getBase().getType(), entity, /*component=*/"", + /*componentShape=*/mlir::Value{}, triplets, + /*substring=*/mlir::ValueRange{}, /*complexPartAttr=*/std::nullopt, shape, + lenParams); + return hlfir::Entity{designate.getResult()}; +} + mlir::acc::FirstprivateRecipeOp Fortran::lower::createOrGetFirstprivateRecipe( mlir::OpBuilder &builder, llvm::StringRef recipeName, mlir::Location loc, mlir::Type ty, llvm::SmallVector &bounds) { @@ -600,47 +636,16 @@ mlir::acc::FirstprivateRecipeOp Fortran::lower::createOrGetFirstprivateRecipe( if (!seqTy) TODO(loc, "Unsupported boxed type in OpenACC firstprivate"); - if (allConstantBound) { - for (auto bound : llvm::reverse(bounds)) { - auto dataBound = - mlir::cast(bound.getDefiningOp()); - tripletArgs.append(genConstantBounds(firBuilder, loc, dataBound)); - } - } else { - assert(((recipe.getCopyRegion().getArguments().size() - 2) / 3 == - seqTy.getDimension()) && - "Expect 3 block arguments per dimension"); - for (auto arg : recipe.getCopyRegion().getArguments().drop_front(2)) - tripletArgs.push_back(arg); - } - auto shape = genShapeFromBounds(loc, firBuilder, tripletArgs); - hlfir::DesignateOp::Subscripts triplets; - for (unsigned i = 2; i < recipe.getCopyRegion().getArguments().size(); - i += 3) - triplets.emplace_back(hlfir::DesignateOp::Triplet{ - recipe.getCopyRegion().getArgument(i), - recipe.getCopyRegion().getArgument(i + 1), - recipe.getCopyRegion().getArgument(i + 2)}); - - llvm::SmallVector lenParamsLeft; + auto shape = genShapeFromBoundsOrArgs( + loc, firBuilder, seqTy, bounds, recipe.getCopyRegion().getArguments()); + hlfir::DesignateOp::Subscripts triplets = + getSubscriptsFromArgs(recipe.getCopyRegion().getArguments()); auto leftEntity = hlfir::Entity{recipe.getCopyRegion().getArgument(0)}; - hlfir::genLengthParameters(loc, firBuilder, leftEntity, lenParamsLeft); - auto leftDesignate = firBuilder.create( - loc, leftEntity.getBase().getType(), leftEntity, /*component=*/"", - /*componentShape=*/mlir::Value{}, triplets, - /*substring=*/mlir::ValueRange{}, /*complexPartAttr=*/std::nullopt, - shape, lenParamsLeft); - auto left = hlfir::Entity{leftDesignate.getResult()}; - - llvm::SmallVector lenParamsRight; + auto left = + genDesignateWithTriplets(firBuilder, loc, leftEntity, triplets, shape); auto rightEntity = hlfir::Entity{recipe.getCopyRegion().getArgument(1)}; - hlfir::genLengthParameters(loc, firBuilder, rightEntity, lenParamsRight); - auto rightDesignate = firBuilder.create( - loc, rightEntity.getBase().getType(), rightEntity, /*component=*/"", - /*componentShape=*/mlir::Value{}, triplets, - /*substring=*/mlir::ValueRange{}, /*complexPartAttr=*/std::nullopt, - shape, lenParamsRight); - auto right = hlfir::Entity{rightDesignate.getResult()}; + auto right = + genDesignateWithTriplets(firBuilder, loc, rightEntity, triplets, shape); firBuilder.create(loc, left, right); } @@ -1110,48 +1115,16 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, if (!seqTy) TODO(loc, "Unsupported boxed type in OpenACC reduction"); - if (allConstantBound) { - for (auto bound : llvm::reverse(bounds)) { - auto dataBound = - mlir::cast(bound.getDefiningOp()); - tripletArgs.append(genConstantBounds(builder, loc, dataBound)); - } - } else { - assert(((recipe.getCombinerRegion().getArguments().size() - 2) / 3 == - seqTy.getDimension()) && - "Expect 3 block arguments per dimension"); - for (auto arg : recipe.getCombinerRegion().getArguments().drop_front(2)) - tripletArgs.push_back(arg); - } - auto shape = genShapeFromBounds(loc, builder, tripletArgs); - - hlfir::DesignateOp::Subscripts triplets; - for (unsigned i = 2; i < recipe.getCombinerRegion().getArguments().size(); - i += 3) - triplets.emplace_back(hlfir::DesignateOp::Triplet{ - recipe.getCombinerRegion().getArgument(i), - recipe.getCombinerRegion().getArgument(i + 1), - recipe.getCombinerRegion().getArgument(i + 2)}); - - llvm::SmallVector lenParamsLeft; + auto shape = genShapeFromBoundsOrArgs( + loc, builder, seqTy, bounds, recipe.getCombinerRegion().getArguments()); + hlfir::DesignateOp::Subscripts triplets = + getSubscriptsFromArgs(recipe.getCombinerRegion().getArguments()); auto leftEntity = hlfir::Entity{value1}; - hlfir::genLengthParameters(loc, builder, leftEntity, lenParamsLeft); - auto leftDesignate = builder.create( - loc, value1.getType(), leftEntity, /*component=*/"", - /*componentShape=*/mlir::Value{}, triplets, - /*substring=*/mlir::ValueRange{}, /*complexPartAttr=*/std::nullopt, - shape, lenParamsLeft); - auto left = hlfir::Entity{leftDesignate.getResult()}; - - llvm::SmallVector lenParamsRight; + auto left = + genDesignateWithTriplets(builder, loc, leftEntity, triplets, shape); auto rightEntity = hlfir::Entity{value2}; - hlfir::genLengthParameters(loc, builder, rightEntity, lenParamsRight); - auto rightDesignate = builder.create( - loc, value2.getType(), rightEntity, /*component=*/"", - /*componentShape=*/mlir::Value{}, triplets, - /*substring=*/mlir::ValueRange{}, /*complexPartAttr=*/std::nullopt, - shape, lenParamsRight); - auto right = hlfir::Entity{rightDesignate.getResult()}; + auto right = + genDesignateWithTriplets(builder, loc, rightEntity, triplets, shape); llvm::SmallVector typeParams; auto genKernel = [&builder, &loc, op, seqTy, &left, &right]( From 1e8ab993baf4e28590bdac71130f48ebd5f57675 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Mon, 16 Oct 2023 09:39:22 -0700 Subject: [PATCH 235/720] [flang][openacc] Fix missing bounds for allocatable and pointer array component (#68914) Bounds were not gathered correctly for pointer and allocatable array components. This patch fixes the issues pointed in https://reviews.llvm.org/D158732. The change should also enable correct bounds gathering for the OpenMP implementation. A new test file `acc-bounds.f90` is added and bounds specific tests currently in `acc-enter-data.f90` can be moved there in a follow up patch. --- flang/lib/Lower/DirectivesCommon.h | 11 ++- flang/test/Lower/OpenACC/acc-bounds.f90 | 89 +++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 flang/test/Lower/OpenACC/acc-bounds.f90 diff --git a/flang/lib/Lower/DirectivesCommon.h b/flang/lib/Lower/DirectivesCommon.h index 535ec1c03b54d..ed44598bc9252 100644 --- a/flang/lib/Lower/DirectivesCommon.h +++ b/flang/lib/Lower/DirectivesCommon.h @@ -879,8 +879,17 @@ mlir::Value gatherDataOperandAddrAndBounds( builder, operandLocation, converter, compExv, baseAddr); asFortran << (*expr).AsFortran(); + if (auto loadOp = mlir::dyn_cast_or_null( + baseAddr.getDefiningOp())) { + if (fir::isAllocatableType(loadOp.getType()) || + fir::isPointerType(loadOp.getType())) + baseAddr = builder.create(operandLocation, + baseAddr); + } + // If the component is an allocatable or pointer the result of - // genExprAddr will be the result of a fir.box_addr operation. + // genExprAddr will be the result of a fir.box_addr operation or + // a fir.box_addr has been inserted just before. // Retrieve the box so we handle it like other descriptor. if (auto boxAddrOp = mlir::dyn_cast_or_null( baseAddr.getDefiningOp())) { diff --git a/flang/test/Lower/OpenACC/acc-bounds.f90 b/flang/test/Lower/OpenACC/acc-bounds.f90 new file mode 100644 index 0000000000000..c63c9aacf5c2c --- /dev/null +++ b/flang/test/Lower/OpenACC/acc-bounds.f90 @@ -0,0 +1,89 @@ +! This test checks lowering of OpenACC data bounds operation. + +! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s --check-prefixes=CHECK,FIR +! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s --check-prefixes=CHECK,HLFIR + +module openacc_bounds + +type t1 + integer, pointer, dimension(:) :: array_comp +end type + +type t2 + integer, dimension(10) :: array_comp +end type + +type t3 + integer, allocatable, dimension(:) :: array_comp +end type + +contains + subroutine acc_derived_type_component_pointer_array() + type(t1) :: d + !$acc enter data create(d%array_comp) + end subroutine + +! CHECK-LABEL: func.func @_QMopenacc_boundsPacc_derived_type_component_pointer_array() { +! CHECK: %[[D:.*]] = fir.alloca !fir.type<_QMopenacc_boundsTt1{array_comp:!fir.box>>}> {bindc_name = "d", uniq_name = "_QMopenacc_boundsFacc_derived_type_component_pointer_arrayEd"} +! HLFIR: %[[DECL_D:.*]]:2 = hlfir.declare %[[D]] {uniq_name = "_QMopenacc_boundsFacc_derived_type_component_pointer_arrayEd"} : (!fir.ref>>}>>) -> (!fir.ref>>}>>, !fir.ref>>}>>) +! FIR: %[[FIELD:.*]] = fir.field_index array_comp, !fir.type<_QMopenacc_boundsTt1{array_comp:!fir.box>>}> +! FIR: %[[COORD:.*]] = fir.coordinate_of %[[D]], %[[FIELD]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> +! HLFIR: %[[COORD:.*]] = hlfir.designate %[[DECL_D]]#0{"array_comp"} {fortran_attrs = #fir.var_attrs} : (!fir.ref>>}>>) -> !fir.ref>>> +! CHECK: %[[LOAD:.*]] = fir.load %[[COORD]] : !fir.ref>>> +! CHECK: %[[BOX_DIMS0:.*]]:3 = fir.box_dims %[[LOAD]], %c0{{.*}} : (!fir.box>>, index) -> (index, index, index) +! CHECK: %[[C1:.*]] = arith.constant 1 : index +! CHECK: %[[BOX_DIMS1:.*]]:3 = fir.box_dims %[[LOAD]], %c0{{.*}} : (!fir.box>>, index) -> (index, index, index) +! CHECK: %[[UB:.*]] = arith.subi %[[BOX_DIMS1]]#1, %[[C1]] : index +! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%c0{{.*}} : index) upperbound(%[[UB]] : index) stride(%[[BOX_DIMS1]]#2 : index) startIdx(%[[BOX_DIMS0]]#0 : index) {strideInBytes = true} +! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box>>) -> !fir.ptr> +! CHECK: %[[CREATE:.*]] = acc.create varPtr(%[[BOX_ADDR]] : !fir.ptr>) bounds(%[[BOUND]]) -> !fir.ptr> {name = "d%array_comp", structured = false} +! CHECK: acc.enter_data dataOperands(%[[CREATE]] : !fir.ptr>) +! CHECK: return +! CHECK: } + + subroutine acc_derived_type_component_array() + type(t2) :: d + !$acc enter data create(d%array_comp) + end subroutine + +! CHECK-LABEL: func.func @_QMopenacc_boundsPacc_derived_type_component_array() +! CHECK: %[[D:.*]] = fir.alloca !fir.type<_QMopenacc_boundsTt2{array_comp:!fir.array<10xi32>}> {bindc_name = "d", uniq_name = "_QMopenacc_boundsFacc_derived_type_component_arrayEd"} +! HLFIR: %[[DECL_D:.*]]:2 = hlfir.declare %[[D]] {uniq_name = "_QMopenacc_boundsFacc_derived_type_component_arrayEd"} : (!fir.ref}>>) -> (!fir.ref}>>, !fir.ref}>>) +! FIR: %[[FIELD:.*]] = fir.field_index array_comp, !fir.type<_QMopenacc_boundsTt2{array_comp:!fir.array<10xi32>}> +! FIR: %[[COORD:.*]] = fir.coordinate_of %[[D]], %[[FIELD]] : (!fir.ref}>>, !fir.field) -> !fir.ref> +! CHECK: %[[C10:.*]] = arith.constant 10 : index +! HLFIR: %[[SHAPE:.*]] = fir.shape %[[C10]] : (index) -> !fir.shape<1> +! HLFIR: %[[COORD:.*]] = hlfir.designate %[[DECL_D]]#0{"array_comp"} shape %[[SHAPE]] : (!fir.ref}>>, !fir.shape<1>) -> !fir.ref> +! CHECK: %[[C1:.*]] = arith.constant 1 : index +! CHECK: %[[C0:.*]] = arith.constant 0 : index +! CHECK: %[[UB:.*]] = arith.subi %[[C10]], %[[C1]] : index +! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[C0]] : index) upperbound(%[[UB]] : index) extent(%[[C10]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index) +! CHECK: %[[CREATE:.*]] = acc.create varPtr(%[[COORD]] : !fir.ref>) bounds(%[[BOUND]]) -> !fir.ref> {name = "d%array_comp", structured = false} +! CHECK: acc.enter_data dataOperands(%[[CREATE]] : !fir.ref>) +! CHECK: return +! CHECK: } + + subroutine acc_derived_type_component_allocatable_array() + type(t3) :: d + !$acc enter data create(d%array_comp) + end subroutine + +! CHECK-LABEL: func.func @_QMopenacc_boundsPacc_derived_type_component_allocatable_array() { +! CHECK: %[[D:.*]] = fir.alloca !fir.type<_QMopenacc_boundsTt3{array_comp:!fir.box>>}> {bindc_name = "d", uniq_name = "_QMopenacc_boundsFacc_derived_type_component_allocatable_arrayEd"} +! HLFIR: %[[DECL_D:.*]]:2 = hlfir.declare %[[D]] {uniq_name = "_QMopenacc_boundsFacc_derived_type_component_allocatable_arrayEd"} : (!fir.ref>>}>>) -> (!fir.ref>>}>>, !fir.ref>>}>>) +! FIR: %[[FIELD:.*]] = fir.field_index array_comp, !fir.type<_QMopenacc_boundsTt3{array_comp:!fir.box>>}> +! FIR: %[[COORD:.*]] = fir.coordinate_of %[[D]], %[[FIELD]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> +! HLFIR: %[[COORD:.*]] = hlfir.designate %[[DECL_D]]#0{"array_comp"} {fortran_attrs = #fir.var_attrs} : (!fir.ref>>}>>) -> !fir.ref>>> +! CHECK: %[[LOAD:.*]] = fir.load %[[COORD]] : !fir.ref>>> +! CHECK: %[[BOX_DIMS0:.*]]:3 = fir.box_dims %[[LOAD]], %c0{{.*}} : (!fir.box>>, index) -> (index, index, index) +! CHECK: %[[C1:.*]] = arith.constant 1 : index +! CHECK: %[[BOX_DIMS1:.*]]:3 = fir.box_dims %[[LOAD]], %c0{{.*}} : (!fir.box>>, index) -> (index, index, index) +! CHECK: %[[UB:.*]] = arith.subi %[[BOX_DIMS1]]#1, %[[C1]] : index +! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%c0{{.*}} : index) upperbound(%[[UB]] : index) stride(%[[BOX_DIMS1]]#2 : index) startIdx(%[[BOX_DIMS0]]#0 : index) {strideInBytes = true} +! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box>>) -> !fir.heap> +! CHECK: %[[CREATE:.*]] = acc.create varPtr(%[[BOX_ADDR]] : !fir.heap>) bounds(%[[BOUND]]) -> !fir.heap> {name = "d%array_comp", structured = false} +! CHECK: acc.enter_data dataOperands(%[[CREATE]] : !fir.heap>) +! CHECK: return +! CHECK: } + +end module From 182a65adcf8af922246cac80ea6f3fdb159cd89e Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 16 Oct 2023 12:56:32 -0400 Subject: [PATCH 236/720] [RISCV] Refactor performCONCAT_VECTORSCombine. NFC (#69068) Instead of doing a forward pass for positive strides and a reverse pass for negative strides, we can just do one pass by negating the offset if the pointers do happen to be in reverse order. We can extend getPtrDiff later in #68726 to handle more constant offset sequences. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 92 ++++++++------------- 1 file changed, 34 insertions(+), 58 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index ed1f7b6c50a4d..6eb253cc51466 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -13785,11 +13785,10 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); EVT BaseLdVT = BaseLd->getValueType(0); - SDValue BasePtr = BaseLd->getBasePtr(); // Go through the loads and check that they're strided - SmallVector Ptrs; - Ptrs.push_back(BasePtr); + SmallVector Lds; + Lds.push_back(BaseLd); Align Align = BaseLd->getAlign(); for (SDValue Op : N->ops().drop_front()) { auto *Ld = dyn_cast(Op); @@ -13798,60 +13797,38 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, Ld->getValueType(0) != BaseLdVT) return SDValue(); - Ptrs.push_back(Ld->getBasePtr()); + Lds.push_back(Ld); // The common alignment is the most restrictive (smallest) of all the loads Align = std::min(Align, Ld->getAlign()); } - auto matchForwardStrided = [](ArrayRef Ptrs) { - SDValue Stride; - for (auto Idx : enumerate(Ptrs)) { - if (Idx.index() == 0) - continue; - SDValue Ptr = Idx.value(); - // Check that each load's pointer is (add LastPtr, Stride) - if (Ptr.getOpcode() != ISD::ADD || - Ptr.getOperand(0) != Ptrs[Idx.index()-1]) - return SDValue(); - SDValue Offset = Ptr.getOperand(1); - if (!Stride) - Stride = Offset; - else if (Offset != Stride) - return SDValue(); - } - return Stride; - }; - auto matchReverseStrided = [](ArrayRef Ptrs) { - SDValue Stride; - for (auto Idx : enumerate(Ptrs)) { - if (Idx.index() == Ptrs.size() - 1) - continue; - SDValue Ptr = Idx.value(); - // Check that each load's pointer is (add NextPtr, Stride) - if (Ptr.getOpcode() != ISD::ADD || - Ptr.getOperand(0) != Ptrs[Idx.index()+1]) - return SDValue(); - SDValue Offset = Ptr.getOperand(1); - if (!Stride) - Stride = Offset; - else if (Offset != Stride) - return SDValue(); - } - return Stride; + using PtrDiff = std::pair; + auto GetPtrDiff = [](LoadSDNode *Ld1, + LoadSDNode *Ld2) -> std::optional { + SDValue P1 = Ld1->getBasePtr(); + SDValue P2 = Ld2->getBasePtr(); + if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1) + return {{P2.getOperand(1), false}}; + if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2) + return {{P1.getOperand(1), true}}; + + return std::nullopt; }; - bool Reversed = false; - SDValue Stride = matchForwardStrided(Ptrs); - if (!Stride) { - Stride = matchReverseStrided(Ptrs); - Reversed = true; - // TODO: At this point, we've successfully matched a generalized gather - // load. Maybe we should emit that, and then move the specialized - // matchers above and below into a DAG combine? - if (!Stride) + // Get the distance between the first and second loads + auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]); + if (!BaseDiff) + return SDValue(); + + // Check all the loads are the same distance apart + for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++) + if (GetPtrDiff(*It, *std::next(It)) != BaseDiff) return SDValue(); - } + + // TODO: At this point, we've successfully matched a generalized gather + // load. Maybe we should emit that, and then move the specialized + // matchers above and below into a DAG combine? // Get the widened scalar type, e.g. v4i8 -> i64 unsigned WideScalarBitWidth = @@ -13867,26 +13844,25 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, if (!TLI.isLegalStridedLoadStore(WideVecVT, Align)) return SDValue(); + auto [Stride, MustNegateStride] = *BaseDiff; + if (MustNegateStride) + Stride = DAG.getNegative(Stride, DL, Stride.getValueType()); + SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other}); SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL, Subtarget.getXLenVT()); - if (Reversed) - Stride = DAG.getNegative(Stride, DL, Stride->getValueType(0)); + SDValue AllOneMask = DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL, DAG.getConstant(1, DL, MVT::i1)); - SDValue Ops[] = {BaseLd->getChain(), - IntID, - DAG.getUNDEF(WideVecVT), - BasePtr, - Stride, - AllOneMask}; + SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT), + BaseLd->getBasePtr(), Stride, AllOneMask}; uint64_t MemSize; if (auto *ConstStride = dyn_cast(Stride); - ConstStride && !Reversed && ConstStride->getSExtValue() >= 0) + ConstStride && ConstStride->getSExtValue() >= 0) // total size = (elsize * n) + (stride - elsize) * (n-1) // = elsize + stride * (n-1) MemSize = WideScalarVT.getSizeInBits() + From d392073f6747e4c522d6c6a3c49eb42859312034 Mon Sep 17 00:00:00 2001 From: Aart Bik <39774503+aartbik@users.noreply.github.com> Date: Mon, 16 Oct 2023 10:25:37 -0700 Subject: [PATCH 237/720] [mlir][sparse] simplify reader construction of new sparse tensor (#69036) Making the materialize-from-reader method part of the Swiss army knife suite again removes a lot of redundant boiler plate code and unifies the parameter setup into a single centralized utility. Furthermore, we now have minimized the number of entry points into the library that need a non-permutation map setup, simplifying what comes next --- .../mlir/Dialect/SparseTensor/IR/Enums.h | 1 + .../ExecutionEngine/SparseTensorRuntime.h | 25 ---- .../Transforms/SparseTensorConversion.cpp | 33 ++--- .../ExecutionEngine/SparseTensorRuntime.cpp | 137 +----------------- .../test/Dialect/SparseTensor/conversion.mlir | 30 ++-- 5 files changed, 31 insertions(+), 195 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h index 1434c649acd29..0caf83a63b531 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -146,6 +146,7 @@ enum class Action : uint32_t { kEmptyForward = 1, kFromCOO = 2, kSparseToSparse = 3, + kFromReader = 4, kToCOO = 5, kPack = 7, kSortCOOInPlace = 8, diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index e8dd50d6730c7..a470afc2f0c8c 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -115,16 +115,6 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_createCheckedSparseTensorReader( char *filename, StridedMemRefType *dimShapeRef, PrimaryType valTp); -/// Constructs a new sparse-tensor storage object with the given encoding, -/// initializes it by reading all the elements from the file, and then -/// closes the file. -MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( - void *p, StridedMemRefType *lvlSizesRef, - StridedMemRefType *lvlTypesRef, - StridedMemRefType *dim2lvlRef, - StridedMemRefType *lvl2dimRef, OverheadType posTp, - OverheadType crdTp, PrimaryType valTp); - /// SparseTensorReader method to obtain direct access to the /// dimension-sizes array. MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( @@ -197,24 +187,9 @@ MLIR_SPARSETENSOR_FOREVERY_V(DECL_DELCOO) /// defined with the naming convention ${TENSOR0}, ${TENSOR1}, etc. MLIR_CRUNNERUTILS_EXPORT char *getTensorFilename(index_type id); -/// Helper function to read the header of a file and return the -/// shape/sizes, without parsing the elements of the file. -MLIR_CRUNNERUTILS_EXPORT void readSparseTensorShape(char *filename, - std::vector *out); - -/// Returns the rank of the sparse tensor being read. -MLIR_CRUNNERUTILS_EXPORT index_type getSparseTensorReaderRank(void *p); - -/// Returns the is_symmetric bit for the sparse tensor being read. -MLIR_CRUNNERUTILS_EXPORT bool getSparseTensorReaderIsSymmetric(void *p); - /// Returns the number of stored elements for the sparse tensor being read. MLIR_CRUNNERUTILS_EXPORT index_type getSparseTensorReaderNSE(void *p); -/// Returns the size of a dimension for the sparse tensor being read. -MLIR_CRUNNERUTILS_EXPORT index_type getSparseTensorReaderDimSize(void *p, - index_type d); - /// Releases the SparseTensorReader and closes the associated file. MLIR_CRUNNERUTILS_EXPORT void delSparseTensorReader(void *p); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index a76f81410aa87..73f5e3eeb7d51 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -199,12 +199,15 @@ class NewCallParams final { /// type-level information such as the encoding and sizes), generating /// MLIR buffers as needed, and returning `this` for method chaining. NewCallParams &genBuffers(SparseTensorType stt, - ArrayRef dimSizesValues) { + ArrayRef dimSizesValues, + Value dimSizesBuffer = Value()) { assert(dimSizesValues.size() == static_cast(stt.getDimRank())); // Sparsity annotations. params[kParamLvlTypes] = genLvlTypesBuffer(builder, loc, stt); // Construct dimSizes, lvlSizes, dim2lvl, and lvl2dim buffers. - params[kParamDimSizes] = allocaBuffer(builder, loc, dimSizesValues); + params[kParamDimSizes] = dimSizesBuffer + ? dimSizesBuffer + : allocaBuffer(builder, loc, dimSizesValues); params[kParamLvlSizes] = genMapBuffers(builder, loc, stt, dimSizesValues, params[kParamDimSizes], params[kParamDim2Lvl], params[kParamLvl2Dim]); @@ -342,33 +345,15 @@ class SparseTensorNewConverter : public OpConversionPattern { const auto stt = getSparseTensorType(op); if (!stt.hasEncoding()) return failure(); - // Construct the reader opening method calls. + // Construct the `reader` opening method calls. SmallVector dimShapesValues; Value dimSizesBuffer; Value reader = genReader(rewriter, loc, stt, adaptor.getOperands()[0], dimShapesValues, dimSizesBuffer); - // Now construct the lvlSizes, dim2lvl, and lvl2dim buffers. - Value dim2lvlBuffer; - Value lvl2dimBuffer; - Value lvlSizesBuffer = - genMapBuffers(rewriter, loc, stt, dimShapesValues, dimSizesBuffer, - dim2lvlBuffer, lvl2dimBuffer); // Use the `reader` to parse the file. - Type opaqueTp = getOpaquePointerType(rewriter); - Type eltTp = stt.getElementType(); - Value valTp = constantPrimaryTypeEncoding(rewriter, loc, eltTp); - SmallVector params{ - reader, - lvlSizesBuffer, - genLvlTypesBuffer(rewriter, loc, stt), - dim2lvlBuffer, - lvl2dimBuffer, - constantPosTypeEncoding(rewriter, loc, stt.getEncoding()), - constantCrdTypeEncoding(rewriter, loc, stt.getEncoding()), - valTp}; - Value tensor = createFuncCall(rewriter, loc, "newSparseTensorFromReader", - opaqueTp, params, EmitCInterface::On) - .getResult(0); + Value tensor = NewCallParams(rewriter, loc) + .genBuffers(stt, dimShapesValues, dimSizesBuffer) + .genNewCall(Action::kFromReader, reader); // Free the memory for `reader`. createFuncCall(rewriter, loc, "delSparseTensorReader", {}, {reader}, EmitCInterface::Off); diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index ae33a869497a0..74ab65c143d63 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -138,6 +138,12 @@ extern "C" { dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim, \ dimRank, tensor); \ } \ + case Action::kFromReader: { \ + assert(ptr && "Received nullptr for SparseTensorReader object"); \ + SparseTensorReader &reader = *static_cast(ptr); \ + return static_cast(reader.readSparseTensor( \ + lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim)); \ + } \ case Action::kToCOO: { \ assert(ptr && "Received nullptr for SparseTensorStorage object"); \ auto &tensor = *static_cast *>(ptr); \ @@ -442,113 +448,6 @@ void _mlir_ciface_getSparseTensorReaderDimSizes( MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) #undef IMPL_GETNEXT -void *_mlir_ciface_newSparseTensorFromReader( - void *p, StridedMemRefType *lvlSizesRef, - StridedMemRefType *lvlTypesRef, - StridedMemRefType *dim2lvlRef, - StridedMemRefType *lvl2dimRef, OverheadType posTp, - OverheadType crdTp, PrimaryType valTp) { - assert(p); - SparseTensorReader &reader = *static_cast(p); - ASSERT_NO_STRIDE(lvlSizesRef); - ASSERT_NO_STRIDE(lvlTypesRef); - ASSERT_NO_STRIDE(dim2lvlRef); - ASSERT_NO_STRIDE(lvl2dimRef); - const uint64_t dimRank = reader.getRank(); - const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef); - ASSERT_USIZE_EQ(lvlTypesRef, lvlRank); - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); - ASSERT_USIZE_EQ(lvl2dimRef, lvlRank); - (void)dimRank; - const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); - const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); -#define CASE(p, c, v, P, C, V) \ - if (posTp == OverheadType::p && crdTp == OverheadType::c && \ - valTp == PrimaryType::v) \ - return static_cast(reader.readSparseTensor( \ - lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim)); -#define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) - // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. - // This is safe because of the static_assert above. - if (posTp == OverheadType::kIndex) - posTp = OverheadType::kU64; - if (crdTp == OverheadType::kIndex) - crdTp = OverheadType::kU64; - // Double matrices with all combinations of overhead storage. - CASE(kU64, kU64, kF64, uint64_t, uint64_t, double); - CASE(kU64, kU32, kF64, uint64_t, uint32_t, double); - CASE(kU64, kU16, kF64, uint64_t, uint16_t, double); - CASE(kU64, kU8, kF64, uint64_t, uint8_t, double); - CASE(kU32, kU64, kF64, uint32_t, uint64_t, double); - CASE(kU32, kU32, kF64, uint32_t, uint32_t, double); - CASE(kU32, kU16, kF64, uint32_t, uint16_t, double); - CASE(kU32, kU8, kF64, uint32_t, uint8_t, double); - CASE(kU16, kU64, kF64, uint16_t, uint64_t, double); - CASE(kU16, kU32, kF64, uint16_t, uint32_t, double); - CASE(kU16, kU16, kF64, uint16_t, uint16_t, double); - CASE(kU16, kU8, kF64, uint16_t, uint8_t, double); - CASE(kU8, kU64, kF64, uint8_t, uint64_t, double); - CASE(kU8, kU32, kF64, uint8_t, uint32_t, double); - CASE(kU8, kU16, kF64, uint8_t, uint16_t, double); - CASE(kU8, kU8, kF64, uint8_t, uint8_t, double); - // Float matrices with all combinations of overhead storage. - CASE(kU64, kU64, kF32, uint64_t, uint64_t, float); - CASE(kU64, kU32, kF32, uint64_t, uint32_t, float); - CASE(kU64, kU16, kF32, uint64_t, uint16_t, float); - CASE(kU64, kU8, kF32, uint64_t, uint8_t, float); - CASE(kU32, kU64, kF32, uint32_t, uint64_t, float); - CASE(kU32, kU32, kF32, uint32_t, uint32_t, float); - CASE(kU32, kU16, kF32, uint32_t, uint16_t, float); - CASE(kU32, kU8, kF32, uint32_t, uint8_t, float); - CASE(kU16, kU64, kF32, uint16_t, uint64_t, float); - CASE(kU16, kU32, kF32, uint16_t, uint32_t, float); - CASE(kU16, kU16, kF32, uint16_t, uint16_t, float); - CASE(kU16, kU8, kF32, uint16_t, uint8_t, float); - CASE(kU8, kU64, kF32, uint8_t, uint64_t, float); - CASE(kU8, kU32, kF32, uint8_t, uint32_t, float); - CASE(kU8, kU16, kF32, uint8_t, uint16_t, float); - CASE(kU8, kU8, kF32, uint8_t, uint8_t, float); - // Two-byte floats with both overheads of the same type. - CASE_SECSAME(kU64, kF16, uint64_t, f16); - CASE_SECSAME(kU64, kBF16, uint64_t, bf16); - CASE_SECSAME(kU32, kF16, uint32_t, f16); - CASE_SECSAME(kU32, kBF16, uint32_t, bf16); - CASE_SECSAME(kU16, kF16, uint16_t, f16); - CASE_SECSAME(kU16, kBF16, uint16_t, bf16); - CASE_SECSAME(kU8, kF16, uint8_t, f16); - CASE_SECSAME(kU8, kBF16, uint8_t, bf16); - // Integral matrices with both overheads of the same type. - CASE_SECSAME(kU64, kI64, uint64_t, int64_t); - CASE_SECSAME(kU64, kI32, uint64_t, int32_t); - CASE_SECSAME(kU64, kI16, uint64_t, int16_t); - CASE_SECSAME(kU64, kI8, uint64_t, int8_t); - CASE_SECSAME(kU32, kI64, uint32_t, int64_t); - CASE_SECSAME(kU32, kI32, uint32_t, int32_t); - CASE_SECSAME(kU32, kI16, uint32_t, int16_t); - CASE_SECSAME(kU32, kI8, uint32_t, int8_t); - CASE_SECSAME(kU16, kI64, uint16_t, int64_t); - CASE_SECSAME(kU16, kI32, uint16_t, int32_t); - CASE_SECSAME(kU16, kI16, uint16_t, int16_t); - CASE_SECSAME(kU16, kI8, uint16_t, int8_t); - CASE_SECSAME(kU8, kI64, uint8_t, int64_t); - CASE_SECSAME(kU8, kI32, uint8_t, int32_t); - CASE_SECSAME(kU8, kI16, uint8_t, int16_t); - CASE_SECSAME(kU8, kI8, uint8_t, int8_t); - // Complex matrices with wide overhead. - CASE_SECSAME(kU64, kC64, uint64_t, complex64); - CASE_SECSAME(kU64, kC32, uint64_t, complex32); - - // Unsupported case (add above if needed). - MLIR_SPARSETENSOR_FATAL( - "unsupported combination of types: \n", - static_cast(posTp), static_cast(crdTp), - static_cast(valTp)); -#undef CASE_SECSAME -#undef CASE -} - void _mlir_ciface_outSparseTensorWriterMetaData( void *p, index_type dimRank, index_type nse, StridedMemRefType *dimSizesRef) { @@ -635,34 +534,10 @@ char *getTensorFilename(index_type id) { return env; } -void readSparseTensorShape(char *filename, std::vector *out) { - assert(out && "Received nullptr for out-parameter"); - SparseTensorReader reader(filename); - reader.openFile(); - reader.readHeader(); - reader.closeFile(); - const uint64_t dimRank = reader.getRank(); - const uint64_t *dimSizes = reader.getDimSizes(); - out->reserve(dimRank); - out->assign(dimSizes, dimSizes + dimRank); -} - -index_type getSparseTensorReaderRank(void *p) { - return static_cast(p)->getRank(); -} - -bool getSparseTensorReaderIsSymmetric(void *p) { - return static_cast(p)->isSymmetric(); -} - index_type getSparseTensorReaderNSE(void *p) { return static_cast(p)->getNSE(); } -index_type getSparseTensorReaderDimSize(void *p, index_type d) { - return static_cast(p)->getDimSize(d); -} - void delSparseTensorReader(void *p) { delete static_cast(p); } diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 96300a98a6a4b..2ff4887dae7b8 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -78,11 +78,11 @@ func.func @sparse_dim3d_const(%arg0: tensor<10x20x30xf64, #SparseTensor>) -> ind // CHECK-DAG: %[[DimShape0:.*]] = memref.alloca() : memref<1xindex> // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<1xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) -// CHECK-DAG: %[[Iota0:.*]] = memref.alloca() : memref<1xindex> -// CHECK-DAG: %[[Iota:.*]] = memref.cast %[[Iota0]] : memref<1xindex> to memref // CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<1xi8> // CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<1xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[DimShape]], %[[LvlTypes]], %[[Iota]], %[[Iota]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK-DAG: %[[Iota0:.*]] = memref.alloca() : memref<1xindex> +// CHECK-DAG: %[[Iota:.*]] = memref.cast %[[Iota0]] : memref<1xindex> to memref +// CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimShape]], %[[DimShape]], %[[LvlTypes]], %[[Iota]], %[[Iota]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[Reader]]) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new1d(%arg0: !llvm.ptr) -> tensor<128xf64, #SparseVector> { @@ -96,11 +96,11 @@ func.func @sparse_new1d(%arg0: !llvm.ptr) -> tensor<128xf64, #SparseVector> // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<2xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) // CHECK: %[[DimSizes:.*]] = call @getSparseTensorReaderDimSizes(%[[Reader]]) -// CHECK-DAG: %[[Iota0:.*]] = memref.alloca() : memref<2xindex> -// CHECK-DAG: %[[Iota:.*]] = memref.cast %[[Iota0]] : memref<2xindex> to memref // CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<2xi8> // CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<2xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[DimSizes]], %[[LvlTypes]], %[[Iota]], %[[Iota]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK-DAG: %[[Iota0:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[Iota:.*]] = memref.cast %[[Iota0]] : memref<2xindex> to memref +// CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizes]], %[[DimSizes]], %[[LvlTypes]], %[[Iota]], %[[Iota]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[Reader]]) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { @@ -114,15 +114,15 @@ func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<3xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) // CHECK: %[[DimSizes:.*]] = call @getSparseTensorReaderDimSizes(%[[Reader]]) -// CHECK: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> -// CHECK: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref -// CHECK: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> -// CHECK: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref -// CHECK: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> -// CHECK: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref -// CHECK: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> -// CHECK: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> +// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref +// CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> +// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref +// CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> +// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref +// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref +// CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizes]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[Reader]]) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { From 5e4ec53b8efaa2a5215dd68f970d3c913ce07a20 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 16 Oct 2023 10:41:20 -0700 Subject: [PATCH 238/720] [llc][PPC] Move PIC check into TargetMachine (#66727) Matches other code like the code model checking. --- llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 5 +++-- llvm/test/tools/llc/aix-pic-setting.ll | 2 +- llvm/tools/llc/llc.cpp | 14 -------------- 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 42f052cb15d5c..b09975172bf5e 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -265,8 +265,9 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, static Reloc::Model getEffectiveRelocModel(const Triple &TT, std::optional RM) { - assert((!TT.isOSAIX() || !RM || *RM == Reloc::PIC_) && - "Invalid relocation model for AIX."); + if (TT.isOSAIX() && RM && *RM != Reloc::PIC_) + report_fatal_error("invalid relocation model, AIX only supports PIC", + false); if (RM) return *RM; diff --git a/llvm/test/tools/llc/aix-pic-setting.ll b/llvm/test/tools/llc/aix-pic-setting.ll index 70e08e2513eeb..3537baf1cdebe 100644 --- a/llvm/test/tools/llc/aix-pic-setting.ll +++ b/llvm/test/tools/llc/aix-pic-setting.ll @@ -6,4 +6,4 @@ ; RUN: not llc -mtriple=powerpc64-ibm-aix --relocation-model=ropi-rwpi < %s 2>&1 | FileCheck --check-prefix=CHECK-NON-PIC %s ; CHECK-NOT: {{.}} -; CHECK-NON-PIC: error: '': invalid relocation model, AIX only supports PIC +; CHECK-NON-PIC: invalid relocation model, AIX only supports PIC diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index 0ca06cda20b6e..0b174afc22ddc 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -558,12 +558,6 @@ static int compileModule(char **argv, LLVMContext &Context) { exit(1); } - // On AIX, setting the relocation model to anything other than PIC is - // considered a user error. - if (TheTriple.isOSAIX() && RM && *RM != Reloc::PIC_) - reportError("invalid relocation model, AIX only supports PIC", - InputFilename); - InitializeOptions(TheTriple); Target = std::unique_ptr(TheTarget->createTargetMachine( TheTriple.getTriple(), CPUStr, FeaturesStr, Options, RM, CM, OLvl)); @@ -607,14 +601,6 @@ static int compileModule(char **argv, LLVMContext &Context) { return 1; } - // On AIX, setting the relocation model to anything other than PIC is - // considered a user error. - if (TheTriple.isOSAIX() && RM && *RM != Reloc::PIC_) { - WithColor::error(errs(), argv[0]) - << "invalid relocation model, AIX only supports PIC.\n"; - return 1; - } - InitializeOptions(TheTriple); Target = std::unique_ptr(TheTarget->createTargetMachine( TheTriple.getTriple(), CPUStr, FeaturesStr, Options, RM, CM, OLvl)); From 5fab20bc7e5513d197e19cee8ce4e2706b7dd3b3 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 16 Oct 2023 10:42:22 -0700 Subject: [PATCH 239/720] [NFC] Move StableHashing.h from CodeGen to ADT (#67704) --- llvm/include/llvm/{CodeGen => ADT}/StableHashing.h | 6 +++--- llvm/include/llvm/CodeGen/MachineStableHash.h | 2 +- llvm/lib/CodeGen/MachineOperand.cpp | 2 +- llvm/lib/CodeGen/MachineStableHash.cpp | 2 +- llvm/lib/Passes/StandardInstrumentations.cpp | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) rename llvm/include/llvm/{CodeGen => ADT}/StableHashing.h (95%) diff --git a/llvm/include/llvm/CodeGen/StableHashing.h b/llvm/include/llvm/ADT/StableHashing.h similarity index 95% rename from llvm/include/llvm/CodeGen/StableHashing.h rename to llvm/include/llvm/ADT/StableHashing.h index caf27e152e78f..884b5752d9bb0 100644 --- a/llvm/include/llvm/CodeGen/StableHashing.h +++ b/llvm/include/llvm/ADT/StableHashing.h @@ -1,4 +1,4 @@ -//===- llvm/CodeGen/StableHashing.h - Utilities for stable hashing * C++ *-===// +//===- llvm/ADT/StableHashing.h - Utilities for stable hashing * C++ *-----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_STABLEHASHING_H -#define LLVM_CODEGEN_STABLEHASHING_H +#ifndef LLVM_ADT_STABLEHASHING_H +#define LLVM_ADT_STABLEHASHING_H #include "llvm/ADT/StringRef.h" diff --git a/llvm/include/llvm/CodeGen/MachineStableHash.h b/llvm/include/llvm/CodeGen/MachineStableHash.h index 43571b7b8afd2..743615d136aef 100644 --- a/llvm/include/llvm/CodeGen/MachineStableHash.h +++ b/llvm/include/llvm/CodeGen/MachineStableHash.h @@ -14,7 +14,7 @@ #ifndef LLVM_CODEGEN_MACHINESTABLEHASH_H #define LLVM_CODEGEN_MACHINESTABLEHASH_H -#include "llvm/CodeGen/StableHashing.h" +#include "llvm/ADT/StableHashing.h" namespace llvm { class MachineBasicBlock; diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index 788c134b6ee84..b6d6a7532d340 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -11,13 +11,13 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/ADT/StableHashing.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/Loads.h" #include "llvm/CodeGen/MIRFormatter.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/StableHashing.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Config/llvm-config.h" diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp index 9628e4c5aeb5d..8fb9a6bfd86a6 100644 --- a/llvm/lib/CodeGen/MachineStableHash.cpp +++ b/llvm/lib/CodeGen/MachineStableHash.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StableHashing.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/ilist_iterator.h" #include "llvm/ADT/iterator_range.h" @@ -30,7 +31,6 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Register.h" -#include "llvm/CodeGen/StableHashing.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" #include "llvm/MC/MCSymbol.h" diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index 879a5bd805363..95920305f9830 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -14,11 +14,11 @@ #include "llvm/Passes/StandardInstrumentations.h" #include "llvm/ADT/Any.h" +#include "llvm/ADT/StableHashing.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/CodeGen/StableHashing.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" From fcc13c04bbd1489e016a3910eefcefb596001249 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Mon, 16 Oct 2023 19:48:01 +0200 Subject: [PATCH 240/720] [libc++] Simplify the tuple constructor overload set This uses conditional explicit to avoid having two overloads for implicit/explicit conversions. Reviewed By: ldionne, #libc Spies: jrtc27, dblaikie, #clang-vendors, #libc_vendors, aaron.ballman, libcxx-commits Differential Revision: https://reviews.llvm.org/D148432 --- libcxx/include/tuple | 257 ++++++------------------------------------- 1 file changed, 34 insertions(+), 223 deletions(-) diff --git a/libcxx/include/tuple b/libcxx/include/tuple index e7fc1e28fb6e0..138c132ff15ae 100644 --- a/libcxx/include/tuple +++ b/libcxx/include/tuple @@ -592,51 +592,31 @@ class _LIBCPP_TEMPLATE_VIS tuple public: // [tuple.cnstr] - // tuple() constructors (including allocator_arg_t variants) - template class _IsImpDefault = __is_implicitly_default_constructible, __enable_if_t< - _And< - _IsImpDefault<_Tp>... // explicit check - >::value - , int> = 0> - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR - tuple() - _NOEXCEPT_(_And...>::value) - { } +_LIBCPP_DIAGNOSTIC_PUSH +_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++20-extensions") +_LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wc++20-extensions") + // tuple() constructors (including allocator_arg_t variants) template class _IsImpDefault = __is_implicitly_default_constructible, template class _IsDefault = is_default_constructible, __enable_if_t< _And< - _IsDefault<_Tp>..., - _Not<_Lazy<_And, _IsImpDefault<_Tp>...> > // explicit check + _IsDefault<_Tp>... >::value , int> = 0> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR - explicit tuple() + explicit(_Not<_Lazy<_And, _IsImpDefault<_Tp>...> >::value) tuple() _NOEXCEPT_(_And...>::value) { } - template class _IsImpDefault = __is_implicitly_default_constructible, __enable_if_t< - _And< - _IsImpDefault<_Tp>... // explicit check - >::value - , int> = 0> - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 - tuple(allocator_arg_t, _Alloc const& __a) - : __base_(allocator_arg_t(), __a, - __tuple_indices<>(), __tuple_types<>(), - typename __make_tuple_indices::type(), - __tuple_types<_Tp...>()) {} - template class _IsImpDefault = __is_implicitly_default_constructible, template class _IsDefault = is_default_constructible, __enable_if_t< _And< - _IsDefault<_Tp>..., - _Not<_Lazy<_And, _IsImpDefault<_Tp>...> > // explicit check + _IsDefault<_Tp>... >::value , int> = 0> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 - explicit tuple(allocator_arg_t, _Alloc const& __a) + explicit(_Not<_Lazy<_And, _IsImpDefault<_Tp>...> >::value) tuple(allocator_arg_t, _Alloc const& __a) : __base_(allocator_arg_t(), __a, __tuple_indices<>(), __tuple_types<>(), typename __make_tuple_indices::type(), @@ -646,29 +626,11 @@ public: template class _And = _And, __enable_if_t< _And< _BoolConstant= 1>, - is_copy_constructible<_Tp>..., - is_convertible... // explicit check - >::value - , int> = 0> - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX14 - tuple(const _Tp& ... __t) - _NOEXCEPT_(_And...>::value) - : __base_(typename __make_tuple_indices::type(), - typename __make_tuple_types::type(), - typename __make_tuple_indices<0>::type(), - typename __make_tuple_types::type(), - __t... - ) {} - - template class _And = _And, __enable_if_t< - _And< - _BoolConstant= 1>, - is_copy_constructible<_Tp>..., - _Not<_Lazy<_And, is_convertible...> > // explicit check + is_copy_constructible<_Tp>... >::value , int> = 0> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX14 - explicit tuple(const _Tp& ... __t) + explicit(_Not<_Lazy<_And, is_convertible...> >::value) tuple(const _Tp& ... __t) _NOEXCEPT_(_And...>::value) : __base_(typename __make_tuple_indices::type(), typename __make_tuple_types::type(), @@ -680,29 +642,11 @@ public: template class _And = _And, __enable_if_t< _And< _BoolConstant= 1>, - is_copy_constructible<_Tp>..., - is_convertible... // explicit check + is_copy_constructible<_Tp>... >::value , int> = 0> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 - tuple(allocator_arg_t, const _Alloc& __a, const _Tp& ... __t) - : __base_(allocator_arg_t(), __a, - typename __make_tuple_indices::type(), - typename __make_tuple_types::type(), - typename __make_tuple_indices<0>::type(), - typename __make_tuple_types::type(), - __t... - ) {} - - template class _And = _And, __enable_if_t< - _And< - _BoolConstant= 1>, - is_copy_constructible<_Tp>..., - _Not<_Lazy<_And, is_convertible...> > // explicit check - >::value - , int> = 0> - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 - explicit tuple(allocator_arg_t, const _Alloc& __a, const _Tp& ... __t) + explicit(_Not<_Lazy<_And, is_convertible...> >::value) tuple(allocator_arg_t, const _Alloc& __a, const _Tp& ... __t) : __base_(allocator_arg_t(), __a, typename __make_tuple_indices::type(), typename __make_tuple_types::type(), @@ -725,12 +669,11 @@ public: template , - _EnableUTypesCtor<_Up...>, - is_convertible<_Up, _Tp>... // explicit check + _EnableUTypesCtor<_Up...> >::value , int> = 0> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX14 - tuple(_Up&&... __u) + explicit(_Not<_Lazy<_And, is_convertible<_Up, _Tp>...> >::value) tuple(_Up&&... __u) _NOEXCEPT_((_And...>::value)) : __base_(typename __make_tuple_indices::type(), typename __make_tuple_types::type(), @@ -738,47 +681,14 @@ public: typename __make_tuple_types::type(), _VSTD::forward<_Up>(__u)...) {} - template , - _EnableUTypesCtor<_Up...>, - _Not<_Lazy<_And, is_convertible<_Up, _Tp>...> > // explicit check - >::value - , int> = 0> - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX14 - explicit tuple(_Up&&... __u) - _NOEXCEPT_((_And...>::value)) - : __base_(typename __make_tuple_indices::type(), - typename __make_tuple_types::type(), - typename __make_tuple_indices::type(), - typename __make_tuple_types::type(), - _VSTD::forward<_Up>(__u)...) {} - - template , - _EnableUTypesCtor<_Up...>, - is_convertible<_Up, _Tp>... // explicit check - >::value - , int> = 0> - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 - tuple(allocator_arg_t, const _Alloc& __a, _Up&&... __u) - : __base_(allocator_arg_t(), __a, - typename __make_tuple_indices::type(), - typename __make_tuple_types::type(), - typename __make_tuple_indices::type(), - typename __make_tuple_types::type(), - _VSTD::forward<_Up>(__u)...) {} - template , - _EnableUTypesCtor<_Up...>, - _Not<_Lazy<_And, is_convertible<_Up, _Tp>...> > // explicit check + _EnableUTypesCtor<_Up...> >::value , int> = 0> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 - explicit tuple(allocator_arg_t, const _Alloc& __a, _Up&&... __u) + explicit(_Not<_Lazy<_And, is_convertible<_Up, _Tp>...> >::value) tuple(allocator_arg_t, const _Alloc& __a, _Up&&... __u) : __base_(allocator_arg_t(), __a, typename __make_tuple_indices::type(), typename __make_tuple_types::type(), @@ -833,47 +743,22 @@ public: template &>, - is_convertible... // explicit check + _EnableCtorFromUTypesTuple&> >::value , int> = 0> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX14 - tuple(const tuple<_Up...>& __t) - _NOEXCEPT_((_And...>::value)) - : __base_(__t) - { } - - template &>, - _Not<_Lazy<_And, is_convertible...> > // explicit check - >::value - , int> = 0> - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX14 - explicit tuple(const tuple<_Up...>& __t) + explicit(_Not<_Lazy<_And, is_convertible...> >::value) tuple(const tuple<_Up...>& __t) _NOEXCEPT_((_And...>::value)) : __base_(__t) { } template &>, - is_convertible... // explicit check + _EnableCtorFromUTypesTuple&> >::value , int> = 0> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 - tuple(allocator_arg_t, const _Alloc& __a, const tuple<_Up...>& __t) - : __base_(allocator_arg_t(), __a, __t) - { } - - template &>, - _Not<_Lazy<_And, is_convertible...> > // explicit check - >::value - , int> = 0> - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 - explicit tuple(allocator_arg_t, const _Alloc& __a, const tuple<_Up...>& __t) + explicit(_Not<_Lazy<_And, is_convertible...> >::value) tuple(allocator_arg_t, const _Alloc& __a, const tuple<_Up...>& __t) : __base_(allocator_arg_t(), __a, __t) { } @@ -894,50 +779,24 @@ public: #endif // _LIBCPP_STD_VER >= 23 // tuple(tuple&&) constructors (including allocator_arg_t variants) - template &&>, - is_convertible<_Up, _Tp>... // explicit check + _EnableCtorFromUTypesTuple&&> >::value , int> = 0> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX14 - tuple(tuple<_Up...>&& __t) + explicit(_Not<_Lazy<_And, is_convertible<_Up, _Tp>...> >::value) tuple(tuple<_Up...>&& __t) _NOEXCEPT_((_And...>::value)) : __base_(_VSTD::move(__t)) { } - template &&>, - _Not<_Lazy<_And, is_convertible<_Up, _Tp>...> > // explicit check - >::value - , int> = 0> - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX14 - explicit tuple(tuple<_Up...>&& __t) - _NOEXCEPT_((_And...>::value)) - : __base_(_VSTD::move(__t)) - { } - - template &&>, - is_convertible<_Up, _Tp>... // explicit check - >::value - , int> = 0> - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 - tuple(allocator_arg_t, const _Alloc& __a, tuple<_Up...>&& __t) - : __base_(allocator_arg_t(), __a, _VSTD::move(__t)) - { } - template &&>, - _Not<_Lazy<_And, is_convertible<_Up, _Tp>...> > // explicit check + _EnableCtorFromUTypesTuple&&> >::value , int> = 0> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 - explicit tuple(allocator_arg_t, const _Alloc& __a, tuple<_Up...>&& __t) + explicit(_Not<_Lazy<_And, is_convertible<_Up, _Tp>...> >::value) tuple(allocator_arg_t, const _Alloc& __a, tuple<_Up...>&& __t) : __base_(allocator_arg_t(), __a, _VSTD::move(__t)) { } @@ -986,47 +845,22 @@ public: template class _And = _And, __enable_if_t< _And< - _EnableCtorFromPair&>, - _BothImplicitlyConvertible&> // explicit check + _EnableCtorFromPair&> >::value , int> = 0> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX14 - tuple(const pair<_Up1, _Up2>& __p) - _NOEXCEPT_((_NothrowConstructibleFromPair&>::value)) - : __base_(__p) - { } - - template class _And = _And, __enable_if_t< - _And< - _EnableCtorFromPair&>, - _Not<_BothImplicitlyConvertible&> > // explicit check - >::value - , int> = 0> - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX14 - explicit tuple(const pair<_Up1, _Up2>& __p) + explicit(_Not<_BothImplicitlyConvertible&> >::value) tuple(const pair<_Up1, _Up2>& __p) _NOEXCEPT_((_NothrowConstructibleFromPair&>::value)) : __base_(__p) { } template class _And = _And, __enable_if_t< _And< - _EnableCtorFromPair&>, - _BothImplicitlyConvertible&> // explicit check + _EnableCtorFromPair&> >::value , int> = 0> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 - tuple(allocator_arg_t, const _Alloc& __a, const pair<_Up1, _Up2>& __p) - : __base_(allocator_arg_t(), __a, __p) - { } - - template class _And = _And, __enable_if_t< - _And< - _EnableCtorFromPair&>, - _Not<_BothImplicitlyConvertible&> > // explicit check - >::value - , int> = 0> - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 - explicit tuple(allocator_arg_t, const _Alloc& __a, const pair<_Up1, _Up2>& __p) + explicit(_Not<_BothImplicitlyConvertible&> >::value) tuple(allocator_arg_t, const _Alloc& __a, const pair<_Up1, _Up2>& __p) : __base_(allocator_arg_t(), __a, __p) { } @@ -1050,47 +884,22 @@ public: template class _And = _And, __enable_if_t< _And< - _EnableCtorFromPair&&>, - _BothImplicitlyConvertible&&> // explicit check + _EnableCtorFromPair&&> >::value , int> = 0> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX14 - tuple(pair<_Up1, _Up2>&& __p) - _NOEXCEPT_((_NothrowConstructibleFromPair&&>::value)) - : __base_(_VSTD::move(__p)) - { } - - template class _And = _And, __enable_if_t< - _And< - _EnableCtorFromPair&&>, - _Not<_BothImplicitlyConvertible&&> > // explicit check - >::value - , int> = 0> - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX14 - explicit tuple(pair<_Up1, _Up2>&& __p) + explicit(_Not<_BothImplicitlyConvertible&&> >::value) tuple(pair<_Up1, _Up2>&& __p) _NOEXCEPT_((_NothrowConstructibleFromPair&&>::value)) : __base_(_VSTD::move(__p)) { } template class _And = _And, __enable_if_t< _And< - _EnableCtorFromPair&&>, - _BothImplicitlyConvertible&&> // explicit check + _EnableCtorFromPair&&> >::value , int> = 0> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 - tuple(allocator_arg_t, const _Alloc& __a, pair<_Up1, _Up2>&& __p) - : __base_(allocator_arg_t(), __a, _VSTD::move(__p)) - { } - - template class _And = _And, __enable_if_t< - _And< - _EnableCtorFromPair&&>, - _Not<_BothImplicitlyConvertible&&> > // explicit check - >::value - , int> = 0> - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 - explicit tuple(allocator_arg_t, const _Alloc& __a, pair<_Up1, _Up2>&& __p) + explicit(_Not<_BothImplicitlyConvertible&&> >::value) tuple(allocator_arg_t, const _Alloc& __a, pair<_Up1, _Up2>&& __p) : __base_(allocator_arg_t(), __a, _VSTD::move(__p)) { } @@ -1111,6 +920,8 @@ public: : __base_(allocator_arg_t(), __alloc, std::move(__p)) {} #endif // _LIBCPP_STD_VER >= 23 +_LIBCPP_DIAGNOSTIC_POP + // [tuple.assign] _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple& operator=(_If<_And...>::value, tuple, __nat> const& __tuple) From eb14f47bf1ccfda500ba3c3092d70e269f6f0b56 Mon Sep 17 00:00:00 2001 From: Peiming Liu <36770114+PeimingLiu@users.noreply.github.com> Date: Mon, 16 Oct 2023 10:51:28 -0700 Subject: [PATCH 241/720] [mlir][sparse][NFC] fix variable naming convension (#69232) --- .../Transforms/SparseTensorRewriting.cpp | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp index f16d08b86a1a1..a1ab2495f5f7b 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -665,29 +665,27 @@ struct TensorReshapeRewriter : public OpRewritePattern { srcDcvs.push_back(srcLcvs[lvl]); } - Value collapsed_size = constantIndex(builder, loc, 1); + Value collapseSize = constantIndex(builder, loc, 1); for (Dimension d = 0; d < srcRank; d++) - collapsed_size = - builder.create(loc, collapsed_size, srcSizes[d]); - SmallVector collapsedSizes = {collapsed_size}; + collapseSize = + builder.create(loc, collapseSize, srcSizes[d]); + SmallVector collapsedSizes = {collapseSize}; - ReassociationIndices collapse_indices; + ReassociationIndices collapseIdx; for (Dimension i = 0; i < srcRank; i++) - collapse_indices.push_back(i); - SmallVector collapse_reassociation = { - collapse_indices}; + collapseIdx.push_back(i); + SmallVector collapseReass = {collapseIdx}; SmallVector collapsedDcvs; - reshapeCvs(builder, loc, collapse_reassociation, srcSizes, srcDcvs, + reshapeCvs(builder, loc, collapseReass, srcSizes, srcDcvs, collapsedSizes, collapsedDcvs); - ReassociationIndices expand_indices; + ReassociationIndices expandIdx; for (Dimension i = 0; i < dstTp.getDimRank(); i++) - expand_indices.push_back(i); - SmallVector expand_reassociation = { - expand_indices}; + expandIdx.push_back(i); + SmallVector expandReass = {expandIdx}; SmallVector dstDcvs; - reshapeCvs(builder, loc, expand_reassociation, collapsedSizes, - collapsedDcvs, dstSizes, dstDcvs); + reshapeCvs(builder, loc, expandReass, collapsedSizes, collapsedDcvs, + dstSizes, dstDcvs); auto t = builder.create(loc, v, reduc.front(), dstDcvs); builder.create(loc, t); From 342dca7528116439f3e9c8492a452765d802681a Mon Sep 17 00:00:00 2001 From: Yitzhak Mandelbaum Date: Mon, 16 Oct 2023 14:07:16 -0400 Subject: [PATCH 242/720] [clang][dataflow] Check for backedges directly (instead of loop statements). (#68923) Widen on backedge nodes, instead of nodes with a loop statement as terminator. This fixes #67834 and a precision loss from assignment in a loop condition. The commit contains tests for both of these issues. --- .../TypeErasedDataflowAnalysis.cpp | 35 ++++++++----------- .../Analysis/FlowSensitive/TransferTest.cpp | 14 ++++++++ .../TypeErasedDataflowAnalysisTest.cpp | 23 ++++++++++++ 3 files changed, 52 insertions(+), 20 deletions(-) diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index 6b167891c1a3a..72d807fc36705 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include -#include #include #include #include @@ -33,8 +32,8 @@ #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" @@ -53,19 +52,14 @@ static int blockIndexInPredecessor(const CFGBlock &Pred, return BlockPos - Pred.succ_begin(); } -static bool isLoopHead(const CFGBlock &B) { - if (const auto *T = B.getTerminatorStmt()) - switch (T->getStmtClass()) { - case Stmt::WhileStmtClass: - case Stmt::DoStmtClass: - case Stmt::ForStmtClass: - case Stmt::CXXForRangeStmtClass: - return true; - default: - return false; - } - - return false; +// A "backedge" node is a block introduced in the CFG exclusively to indicate a +// loop backedge. They are exactly identified by the presence of a non-null +// pointer to the entry block of the loop condition. Note that this is not +// necessarily the block with the loop statement as terminator, because +// short-circuit operators will result in multiple blocks encoding the loop +// condition, only one of which will contain the loop statement as terminator. +static bool isBackedgeNode(const CFGBlock &B) { + return B.getLoopTarget() != nullptr; } namespace { @@ -502,14 +496,15 @@ runTypeErasedDataflowAnalysis( PostVisitCFG) { PrettyStackTraceAnalysis CrashInfo(CFCtx, "runTypeErasedDataflowAnalysis"); - PostOrderCFGView POV(&CFCtx.getCFG()); - ForwardDataflowWorklist Worklist(CFCtx.getCFG(), &POV); + const clang::CFG &CFG = CFCtx.getCFG(); + PostOrderCFGView POV(&CFG); + ForwardDataflowWorklist Worklist(CFG, &POV); std::vector> BlockStates( - CFCtx.getCFG().size()); + CFG.size()); // The entry basic block doesn't contain statements so it can be skipped. - const CFGBlock &Entry = CFCtx.getCFG().getEntry(); + const CFGBlock &Entry = CFG.getEntry(); BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(), InitEnv.fork()}; Worklist.enqueueSuccessors(&Entry); @@ -553,7 +548,7 @@ runTypeErasedDataflowAnalysis( llvm::errs() << "Old Env:\n"; OldBlockState->Env.dump(); }); - if (isLoopHead(*Block)) { + if (isBackedgeNode(*Block)) { LatticeJoinEffect Effect1 = Analysis.widenTypeErased( NewBlockState.Lattice, OldBlockState->Lattice); LatticeJoinEffect Effect2 = diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index 632632a1b30e7..ea36a3f705ee9 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -4099,6 +4099,20 @@ TEST(TransferTest, LoopDereferencingChangingRecordPointerConverges) { ASSERT_THAT_ERROR(checkDataflowWithNoopAnalysis(Code), llvm::Succeeded()); } +TEST(TransferTest, LoopWithShortCircuitedConditionConverges) { + std::string Code = R"cc( + bool foo(); + + void target() { + bool c = false; + while (foo() || foo()) { + c = true; + } + } + )cc"; + ASSERT_THAT_ERROR(checkDataflowWithNoopAnalysis(Code), llvm::Succeeded()); +} + TEST(TransferTest, DoesNotCrashOnUnionThisExpr) { std::string Code = R"( union Union { diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp index edd87b798198b..8422f3804db54 100644 --- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp @@ -912,6 +912,29 @@ TEST_F(FlowConditionTest, WhileStmt) { }); } +TEST_F(FlowConditionTest, WhileStmtWithAssignmentInCondition) { + std::string Code = R"( + void target(bool Foo) { + // This test checks whether the analysis preserves the connection between + // the value of `Foo` and the assignment expression, despite widening. + // The equality operator generates a fresh boolean variable on each + // interpretation, which forces use of widening. + while ((Foo = (3 == 4))) { + (void)0; + /*[[p]]*/ + } + } + )"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + auto &FooVal = getValueForDecl(ASTCtx, Env, "Foo").formula(); + EXPECT_TRUE(Env.flowConditionImplies(FooVal)); + }); +} + TEST_F(FlowConditionTest, Conjunction) { std::string Code = R"( void target(bool Foo, bool Bar) { From 5b07de1a5faf4a22ae6fd982b877c5e7e3a76559 Mon Sep 17 00:00:00 2001 From: 5chmidti <44101708+5chmidti@users.noreply.github.com> Date: Mon, 16 Oct 2023 20:09:46 +0200 Subject: [PATCH 243/720] [clang][ASTMatcher] fix hasAnyBase not binding submatchers (#67939) The BoundNodesTreeBuilder used in the BaseSpecMatcher was the original and was reset to its original state if a match occurred. The matcher now uses the local copy in the inner matcher. Fixes https://github.com/llvm/llvm-project/issues/65421 --- clang/docs/ReleaseNotes.rst | 2 ++ clang/lib/ASTMatchers/ASTMatchersInternal.cpp | 2 +- .../ASTMatchers/ASTMatchersTraversalTest.cpp | 13 +++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 9782c123f4c93..58c06edb6deea 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -521,6 +521,8 @@ Bug Fixes to AST Handling computed RecordLayout is incorrect if fields are not completely imported and should not be cached. `Issue 64170 `_ +- Fixed ``hasAnyBase`` not binding nodes in its submatcher. + (`#65421 `_) Miscellaneous Bug Fixes ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp index 40688107215f2..435bbdeda2206 100644 --- a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp +++ b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp @@ -87,7 +87,7 @@ bool matchesAnyBase(const CXXRecordDecl &Node, [Finder, Builder, &BaseSpecMatcher](const CXXBaseSpecifier *BaseSpec, CXXBasePath &IgnoredParam) { BoundNodesTreeBuilder Result(*Builder); - if (BaseSpecMatcher.matches(*BaseSpec, Finder, Builder)) { + if (BaseSpecMatcher.matches(*BaseSpec, Finder, &Result)) { *Builder = std::move(Result); return true; } diff --git a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp index 89954711804aa..d4a695b974bf0 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp @@ -8,6 +8,7 @@ #include "ASTMatchersTest.h" #include "clang/AST/Attrs.inc" +#include "clang/AST/DeclCXX.h" #include "clang/AST/PrettyPrinter.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" @@ -5457,6 +5458,18 @@ TEST(HasParent, NoDuplicateParents) { stmt().bind("node"), std::make_unique())); } +TEST(HasAnyBase, BindsInnerBoundNodes) { + EXPECT_TRUE(matchAndVerifyResultTrue( + "struct Inner {}; struct Proxy : Inner {}; struct Main : public " + "Proxy {};", + cxxRecordDecl(hasName("Main"), + hasAnyBase(cxxBaseSpecifier(hasType( + cxxRecordDecl(hasName("Inner")).bind("base-class"))))) + .bind("class"), + std::make_unique>("base-class", + "Inner"))); +} + TEST(TypeMatching, PointeeTypes) { EXPECT_TRUE(matches("int b; int &a = b;", referenceType(pointee(builtinType())))); From 1ebe73821f4cefa48d7e3d24e62303412ab9ad25 Mon Sep 17 00:00:00 2001 From: Peiming Liu <36770114+PeimingLiu@users.noreply.github.com> Date: Mon, 16 Oct 2023 11:35:09 -0700 Subject: [PATCH 244/720] [mlir][sparse] fix crash due to different std::sort implementation. (#69236) --- mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 0d95c60a08689..5e57facaf2376 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -531,7 +531,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { continue; return coordinates[l][lhs] < coordinates[l][rhs]; } - assert(false && "duplicate coordinates"); + assert(lhs == rhs && "duplicate coordinates"); return false; }); From 47401b6173459515014a1bafe9a02c0e726fbec0 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Mon, 16 Oct 2023 20:50:15 +0200 Subject: [PATCH 245/720] [lld] Add support for relocations in x86_64 objects on Arm64EC targets. (#69098) Since EC targets may combine various object types, we need to pick relocation format based on chunk type instead of global config. --- lld/COFF/Chunks.cpp | 4 ++-- lld/COFF/Chunks.h | 2 ++ lld/test/COFF/arm64ec-reloc.test | 37 ++++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 lld/test/COFF/arm64ec-reloc.test diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp index e17b64df869fe..4e845afa8947a 100644 --- a/lld/COFF/Chunks.cpp +++ b/lld/COFF/Chunks.cpp @@ -437,7 +437,7 @@ void SectionChunk::applyRelocation(uint8_t *off, // Compute the RVA of the relocation for relative relocations. uint64_t p = rva + rel.VirtualAddress; uint64_t imageBase = file->ctx.config.imageBase; - switch (file->ctx.config.machine) { + switch (getMachine()) { case AMD64: applyRelX64(off, rel.Type, os, s, p, imageBase); break; @@ -551,7 +551,7 @@ static uint8_t getBaserelType(const coff_relocation &rel, // Only called when base relocation is enabled. void SectionChunk::getBaserels(std::vector *res) { for (const coff_relocation &rel : getRelocs()) { - uint8_t ty = getBaserelType(rel, file->ctx.config.machine); + uint8_t ty = getBaserelType(rel, getMachine()); if (ty == IMAGE_REL_BASED_ABSOLUTE) continue; Symbol *target = file->getSymbol(rel.SymbolTableIndex); diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h index 3d605e6ab10c8..d14a258fc81e1 100644 --- a/lld/COFF/Chunks.h +++ b/lld/COFF/Chunks.h @@ -219,6 +219,8 @@ class SectionChunk final : public Chunk { ArrayRef getContents() const; void writeTo(uint8_t *buf) const; + MachineTypes getMachine() const { return file->getMachineType(); } + // Defend against unsorted relocations. This may be overly conservative. void sortRelocations(); diff --git a/lld/test/COFF/arm64ec-reloc.test b/lld/test/COFF/arm64ec-reloc.test new file mode 100644 index 0000000000000..3060891bfe02e --- /dev/null +++ b/lld/test/COFF/arm64ec-reloc.test @@ -0,0 +1,37 @@ +REQUIRES: aarch64, x86 +RUN: split-file %s %t.dir && cd %t.dir + +Link a mix of ARM64EC and x86_64 data and check that relocations work. + +RUN: llvm-mc -filetype=obj -triple=arm64ec-windows arm64ec-data-sym.s -o arm64ec-data-sym.obj +RUN: llvm-mc -filetype=obj -triple=x86_64-windows x86_64-data-sym.s -o x86_64-data-sym.obj +RUN: lld-link -out:test.dll -machine:arm64ec arm64ec-data-sym.obj x86_64-data-sym.obj -dll -noentry + +RUN: llvm-readobj --hex-dump=.data test.dll | FileCheck -check-prefix=ARM64EC-DATA %s +ARM64EC-DATA: 0x180001000 00100080 01000000 08100080 01000000 + +RUN: llvm-readobj --coff-basereloc test.dll | FileCheck -check-prefix=RELOCS %s +RELOCS: BaseReloc [ +RELOCS-NEXT: Entry { +RELOCS-NEXT: Type: DIR64 +RELOCS-NEXT: Address: 0x1000 +RELOCS-NEXT: } +RELOCS-NEXT: Entry { +RELOCS-NEXT: Type: DIR64 +RELOCS-NEXT: Address: 0x1008 +RELOCS-NEXT: } +RELOCS-NEXT: ] + +#--- arm64ec-data-sym.s + .data + .globl arm64ec_data_sym + .p2align 2, 0x0 +arm64ec_data_sym: + .xword arm64ec_data_sym + +#--- x86_64-data-sym.s + .data + .globl x86_64_data_sym + .p2align 2, 0x0 +x86_64_data_sym: + .quad x86_64_data_sym From 38f8b7cbe472921fd8e9c15ca6ab430c1e9be0c3 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 16 Oct 2023 19:53:43 +0100 Subject: [PATCH 246/720] [LV] Replace value numbers with patterns in tests (NFC). Replace some hardcoded value numbers in CHECK-LINES to use patterns, to make the tests more robust wrt renumbering. --- .../RISCV/riscv-vector-reverse.ll | 28 +++++++++---------- .../LoopVectorize/vplan-printing.ll | 14 +++++----- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index d2534d7d18ea7..cad64f5c7e2be 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -51,7 +51,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' { -; CHECK-NEXT: Live-in vp<%0> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<%1> = original trip-count ; CHECK: ph: ; CHECK-NEXT: EMIT vp<%1> = EXPAND SCEV (zext i32 %n to i64) @@ -60,18 +60,18 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: Successor(s): vector loop ; CHECK: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%n> + vp<%2> * ir<-1> -; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<-1> -; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<%4>, ir<-1> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<-1> +; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> ; CHECK-NEXT: WIDEN ir<%1> = load ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<%add9> = add ir<%1>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> ; CHECK-NEXT: WIDEN store ir<%arrayidx3>, ir<%add9> -; CHECK-NEXT: EMIT vp<%11> = VF * UF + nuw vp<%2> -; CHECK-NEXT: EMIT branch-on-count vp<%11>, vp<%0> +; CHECK-NEXT: EMIT vp<[[IV_INC:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[IV_INC]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -188,7 +188,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' { -; CHECK-NEXT: Live-in vp<%0> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<%1> = original trip-count ; CHECK: ph: ; CHECK-NEXT: EMIT vp<%1> = EXPAND SCEV (zext i32 %n to i64) @@ -197,18 +197,18 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: Successor(s): vector loop ; CHECK: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%n> + vp<%2> * ir<-1> -; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<-1> -; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<%4>, ir<-1> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1> +; CHECK-NEXT: vp<[[STEPS]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<-1> +; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> ; CHECK-NEXT: WIDEN ir<%1> = load ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<%1>, ir<1.000000e+00> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> ; CHECK-NEXT: WIDEN store ir<%arrayidx3>, ir<%conv1> -; CHECK-NEXT: EMIT vp<%11> = VF * UF + nuw vp<%2> -; CHECK-NEXT: EMIT branch-on-count vp<%11>, vp<%0> +; CHECK-NEXT: EMIT vp<[[IV_INC:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[IV_INC]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index bbede3f79d749..ced2dc1655fb7 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -691,7 +691,7 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> -; CHECK-NEXT: CLONE ir<%ld.addr> = getelementptr inbounds ir<%src>, vp<%2> +; CHECK-NEXT: CLONE ir<%ld.addr> = getelementptr inbounds ir<%src>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%ld.value> = load ir<%ld.addr> ; CHECK-NEXT: WIDEN ir<%ifcond> = fcmp oeq ir<%ld.value>, ir<5.000000e+00> ; CHECK-NEXT: Successor(s): pred.call @@ -707,17 +707,17 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-NEXT: Successor(s): pred.call.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.call.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%foo.ret.1> -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%foo.ret.2> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PHI1:%.+]]> = ir<%foo.ret.1> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PHI2:%.+]]> = ir<%foo.ret.2> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): if.then.1 ; CHECK-EMPTY: ; CHECK-NEXT: if.then.1: -; CHECK-NEXT: WIDEN ir<%fadd> = fadd vp<%8>, vp<%9> -; CHECK-NEXT: EMIT vp<%11> = not ir<%ifcond> -; CHECK-NEXT: BLEND ir<%st.value> = ir<%ld.value>/vp<%11> ir<%fadd>/ir<%ifcond> -; CHECK-NEXT: CLONE ir<%st.addr> = getelementptr inbounds ir<%dest>, vp<%2> +; CHECK-NEXT: WIDEN ir<%fadd> = fadd vp<[[PHI1]]>, vp<[[PHI2]]> +; CHECK-NEXT: EMIT vp<[[NOT_COND:%.+]]> = not ir<%ifcond> +; CHECK-NEXT: BLEND ir<%st.value> = ir<%ld.value>/vp<[[NOT_COND]]> ir<%fadd>/ir<%ifcond> +; CHECK-NEXT: CLONE ir<%st.addr> = getelementptr inbounds ir<%dest>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN store ir<%st.addr>, ir<%st.value> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> From f6f944e77f741861e641e1dd46c30dcbaf8c83b7 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Mon, 16 Oct 2023 21:00:13 +0200 Subject: [PATCH 247/720] [lld][NFC] Factor out isCodeSection helper. (#69193) --- lld/COFF/Writer.cpp | 6 +----- lld/COFF/Writer.h | 6 ++++++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 4f6c2a57f5335..d4f6ee6fde495 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -1403,11 +1403,7 @@ void Writer::assignAddresses() { // If /FUNCTIONPADMIN is used, functions are padded in order to create a // hotpatchable image. - const bool isCodeSection = - (sec->header.Characteristics & IMAGE_SCN_CNT_CODE) && - (sec->header.Characteristics & IMAGE_SCN_MEM_READ) && - (sec->header.Characteristics & IMAGE_SCN_MEM_EXECUTE); - uint32_t padding = isCodeSection ? config->functionPadMin : 0; + uint32_t padding = sec->isCodeSection() ? config->functionPadMin : 0; for (Chunk *c : sec->chunks) { if (padding && c->isHotPatchable()) diff --git a/lld/COFF/Writer.h b/lld/COFF/Writer.h index 4a74aa7ada59d..9004bb310d073 100644 --- a/lld/COFF/Writer.h +++ b/lld/COFF/Writer.h @@ -64,6 +64,12 @@ class OutputSection { // Used only when the name is longer than 8 bytes. void setStringTableOff(uint32_t v) { stringTableOff = v; } + bool isCodeSection() const { + return (header.Characteristics & llvm::COFF::IMAGE_SCN_CNT_CODE) && + (header.Characteristics & llvm::COFF::IMAGE_SCN_MEM_READ) && + (header.Characteristics & llvm::COFF::IMAGE_SCN_MEM_EXECUTE); + } + // N.B. The section index is one based. uint32_t sectionIndex = 0; From dd0fba11690f9fef304d5f48cde646e5eca8d3c0 Mon Sep 17 00:00:00 2001 From: antangelo Date: Mon, 16 Oct 2023 15:17:36 -0400 Subject: [PATCH 248/720] [clang][Sema] Use original template pattern when declaring implicit deduction guides for nested template classes (#68379) When a nested template is instantiated, the template pattern of the inner class is not copied into the outer class ClassTemplateSpecializationDecl. The specialization contains a ClassTemplateDecl with an empty record that points to the original template pattern instead. As a result, when looking up the constructors of the inner class, no results are returned. This patch finds the original template pattern and uses that for the lookup instead. Based on CWG2471 we must also substitute the known outer template arguments when creating deduction guides for the inner class. Fixes #46200 Fixes #57812 --- clang/docs/ReleaseNotes.rst | 5 +++++ clang/lib/Sema/SemaTemplate.cpp | 22 ++++++++++++++++++- .../nested-implicit-deduction-guides.cpp | 12 ++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 clang/test/SemaTemplate/nested-implicit-deduction-guides.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 58c06edb6deea..ff66d2c272098 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -513,6 +513,11 @@ Bug Fixes to C++ Support rather than prefer the non-templated constructor as specified in [standard.group]p3. +- Fix a bug where implicit deduction guides are not correctly generated for nested template + classes. Fixes: + (`#46200 `_) + (`#57812 `_) + Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ - Fixed an import failure of recursive friend class template. diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index ff370dd1e080b..fba5b22139170 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -2250,6 +2250,7 @@ struct ConvertConstructorToDeductionGuideTransform { Sema &SemaRef; ClassTemplateDecl *Template; + ClassTemplateDecl *NestedPattern = nullptr; DeclContext *DC = Template->getDeclContext(); CXXRecordDecl *Primary = Template->getTemplatedDecl(); @@ -2327,6 +2328,8 @@ struct ConvertConstructorToDeductionGuideTransform { if (FTD) { Args.addOuterTemplateArguments(SubstArgs); Args.addOuterRetainedLevel(); + if (NestedPattern) + Args.addOuterRetainedLevels(NestedPattern->getTemplateDepth()); } FunctionProtoTypeLoc FPTL = CD->getTypeSourceInfo()->getTypeLoc() @@ -2438,10 +2441,17 @@ struct ConvertConstructorToDeductionGuideTransform { SmallVector ParamTypes; const FunctionProtoType *T = TL.getTypePtr(); + MultiLevelTemplateArgumentList OuterInstantiationArgs; + if (NestedPattern) + OuterInstantiationArgs = SemaRef.getTemplateInstantiationArgs(Template); + // -- The types of the function parameters are those of the constructor. for (auto *OldParam : TL.getParams()) { ParmVarDecl *NewParam = transformFunctionTypeParam(OldParam, Args, MaterializedTypedefs); + if (NestedPattern && NewParam) + NewParam = transformFunctionTypeParam(NewParam, OuterInstantiationArgs, + MaterializedTypedefs); if (!NewParam) return QualType(); ParamTypes.push_back(NewParam->getType()); @@ -2647,13 +2657,23 @@ void Sema::DeclareImplicitDeductionGuides(TemplateDecl *Template, if (BuildingDeductionGuides.isInvalid()) return; + // If the template is nested, then we need to use the original + // pattern to iterate over the constructors. + ClassTemplateDecl *Pattern = Transform.Template; + while (Pattern->getInstantiatedFromMemberTemplate()) { + if (Pattern->isMemberSpecialization()) + break; + Pattern = Pattern->getInstantiatedFromMemberTemplate(); + Transform.NestedPattern = Pattern; + } + // Convert declared constructors into deduction guide templates. // FIXME: Skip constructors for which deduction must necessarily fail (those // for which some class template parameter without a default argument never // appears in a deduced context). llvm::SmallPtrSet ProcessedCtors; bool AddedAny = false; - for (NamedDecl *D : LookupConstructors(Transform.Primary)) { + for (NamedDecl *D : LookupConstructors(Pattern->getTemplatedDecl())) { D = D->getUnderlyingDecl(); if (D->isInvalidDecl() || D->isImplicit()) continue; diff --git a/clang/test/SemaTemplate/nested-implicit-deduction-guides.cpp b/clang/test/SemaTemplate/nested-implicit-deduction-guides.cpp new file mode 100644 index 0000000000000..4915c687cf4c4 --- /dev/null +++ b/clang/test/SemaTemplate/nested-implicit-deduction-guides.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -std=c++17 -verify %s +// expected-no-diagnostics + +template struct S { + template struct N { + N(T) {} + N(T, U) {} + template N(V, U) {} + }; +}; + +S::N x{"a", 1}; From f7a8a78cb7c1a3c5cc9e6cd999e908f2725a4664 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 16 Oct 2023 20:28:22 +0100 Subject: [PATCH 249/720] [VPlan] Also print operands of canonical IV (NFC). Also print the operands of VPCanonicalIVPHIRecipe. That was missed earlier. --- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 3 +- .../LoopVectorize/vplan-dot-printing.ll | 2 +- .../LoopVectorize/vplan-printing.ll | 54 +++++++++---------- 3 files changed, 30 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 2a1213a980959..efc95c1cd08c6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1418,7 +1418,8 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "EMIT "; printAsOperand(O, SlotTracker); - O << " = CANONICAL-INDUCTION"; + O << " = CANONICAL-INDUCTION "; + printOperands(O, SlotTracker); } #endif diff --git a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll index db2ca36352f5a..6f54d37383895 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll @@ -26,7 +26,7 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: label="\ vector loop" ; CHECK-NEXT: N2 [label = ; CHECK-NEXT: "vector.body:\l" + -; CHECK-NEXT: " EMIT vp\<[[CAN_IV:%.+]]\> = CANONICAL-INDUCTION\l" + +; CHECK-NEXT: " EMIT vp\<[[CAN_IV:%.+]]\> = CANONICAL-INDUCTION ir\<0\>, vp\<%7\>\l" + ; CHECK-NEXT: " vp\<[[STEPS:%.+]]\> = SCALAR-STEPS vp\<[[CAN_IV]]\>, ir\<1\>\l" + ; CHECK-NEXT: " CLONE ir\<%arrayidx\> = getelementptr inbounds ir\<%y\>, vp\<[[STEPS]]\>\l" + ; CHECK-NEXT: " WIDEN ir\<%lv\> = load ir\<%arrayidx\>\l" + diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index ced2dc1655fb7..3626b8c9dce78 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -17,14 +17,14 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx> ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @llvm.sqrt.f32(ir<%lv>) ; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN store ir<%arrayidx2>, ir<%call> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -64,7 +64,7 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x, ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %iv.next, 0, ir<1> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: WIDEN-GEP Inv[Var] ir<%arrayidx> = getelementptr inbounds ir<%y>, ir<%iv> @@ -74,7 +74,7 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x, ; CHECK-NEXT: WIDEN ir<%add> = fadd ir<%lv>, ir<%sel> ; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN store ir<%arrayidx2>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -116,13 +116,13 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx> ; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -162,13 +162,13 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next> ; CHECK-NEXT: vp<[[IV:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[IV]]> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx> ; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) (with final reduction value stored in invariant address sank outside of loop) -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -211,7 +211,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-INDUCTION %i = phi 0, %i.next, ir<1> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: WIDEN ir<%cmp> = icmp ult ir<%i>, ir<5> @@ -237,7 +237,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-NEXT: BLEND ir<%d> = ir<0>/vp<[[NOT]]> vp<[[PRED]]>/ir<%cmp> ; CHECK-NEXT: CLONE ir<%idx> = getelementptr ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN store ir<%idx>, ir<%d> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -285,7 +285,7 @@ define void @print_interleave_groups(i32 %C, i32 %D) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<4> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<4> ; CHECK-NEXT: CLONE ir<%gep.AB.0> = getelementptr inbounds ir<@AB>, ir<0>, vp<[[STEPS]]> @@ -301,7 +301,7 @@ define void @print_interleave_groups(i32 %C, i32 %D) { ; CHECK-NEXT: store ir<1> to index 1 ; CHECK-NEXT: store ir<2> to index 2 ; CHECK-NEXT: store ir<%AB.3> to index 3 -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -353,7 +353,7 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%sum.07> = phi ir<0.000000e+00>, ir<%muladd> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> @@ -362,7 +362,7 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: WIDEN ir<%l.b> = load ir<%arrayidx2> ; CHECK-NEXT: EMIT vp<[[FMUL:%.+]]> = fmul nnan ninf nsz ir<%l.a>, ir<%l.b> ; CHECK-NEXT: REDUCE ir<[[MULADD:%.+]]> = ir<%sum.07> + nnan ninf nsz reduce.fadd (vp<[[FMUL]]>) -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -404,7 +404,7 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%isd> = getelementptr inbounds ir<%asd>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%lsd> = load ir<%isd> @@ -436,7 +436,7 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db ; CHECK-NEXT: EMIT vp<[[SEL2:%.+]]> = select vp<[[NOT1]]>, vp<[[NOT2]]>, ir ; CHECK-NEXT: BLEND ir<%ysd.0> = vp<[[PHI]]>/vp<[[OR1]]> ir<%psd>/vp<[[SEL2]]> ; CHECK-NEXT: WIDEN store ir<%isd>, ir<%ysd.0> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT:} @@ -495,7 +495,7 @@ define void @print_expand_scev(i64 %y, ptr %ptr) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-INDUCTION\l" + ; CHECK-NEXT: " %iv = phi %iv.next, 0\l" + ; CHECK-NEXT: " ir<%v2>, vp<[[EXP_SCEV]]> @@ -504,8 +504,8 @@ define void @print_expand_scev(i64 %y, ptr %ptr) { ; CHECK-NEXT: WIDEN ir<%v3> = add nuw ir<%v2>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr inbounds ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%v3>, ir<%gep> -; CHECK-NEXT: EMIT vp<[[CAN_INC:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_INC]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -545,13 +545,13 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr inbounds ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%add> = add ir<%iv>, ir<%off> ; CHECK-NEXT: WIDEN store ir<%gep>, ir<0> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -591,7 +591,7 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr % ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep.y> = getelementptr inbounds ir<%y>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%gep.y> @@ -600,7 +600,7 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr % ; CHECK-NEXT: WIDEN ir<%div> = fdiv reassoc nsz contract ir<%mul>, ir<2.000000e+00> ; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN store ir<%gep.x>, ir<%div> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -641,7 +641,7 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%gep.x> @@ -649,7 +649,7 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: WIDEN ir<%div.2> = udiv ir<%lv>, ir<60> ; CHECK-NEXT: WIDEN ir<%add> = add nuw nsw ir<%div.1>, ir<%div.2> ; CHECK-NEXT: WIDEN store ir<%gep.x>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -689,7 +689,7 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%ld.addr> = getelementptr inbounds ir<%src>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%ld.value> = load ir<%ld.addr> @@ -719,7 +719,7 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-NEXT: BLEND ir<%st.value> = ir<%ld.value>/vp<[[NOT_COND]]> ir<%fadd>/ir<%ifcond> ; CHECK-NEXT: CLONE ir<%st.addr> = getelementptr inbounds ir<%dest>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN store ir<%st.addr>, ir<%st.value> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } From 5db4779c3f07b6f562339722c176fb58329652ac Mon Sep 17 00:00:00 2001 From: Pete Steinfeld <47540744+psteinfeld@users.noreply.github.com> Date: Mon, 16 Oct 2023 12:37:57 -0700 Subject: [PATCH 250/720] [flang] Regularize TODO messages for coarray related features (#69227) I want to make "not yet implemented" messages for features related to coarrays easy to identify and make them easy for users to read. --- flang/lib/Lower/Allocatable.cpp | 4 ++-- flang/lib/Lower/Bridge.cpp | 18 +++++++++--------- flang/lib/Lower/CallInterface.cpp | 2 +- flang/lib/Lower/Coarray.cpp | 8 ++++---- flang/lib/Lower/ConvertExpr.cpp | 4 ++-- flang/lib/Lower/ConvertExprToHLFIR.cpp | 4 ++-- flang/lib/Lower/Runtime.cpp | 16 ++++++++-------- flang/lib/Lower/VectorSubscripts.cpp | 3 ++- 8 files changed, 30 insertions(+), 29 deletions(-) diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index a05f06aead173..898f34786a248 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -462,7 +462,7 @@ class AllocateStmtHelper { errorManager.genStatCheck(builder, loc); genAllocateObjectInit(box); if (alloc.hasCoarraySpec()) - TODO(loc, "coarray allocation"); + TODO(loc, "coarray: allocation of a coarray object"); if (alloc.type.IsPolymorphic()) genSetType(alloc, box, loc); genSetDeferredLengthParameters(alloc, box); @@ -582,7 +582,7 @@ class AllocateStmtHelper { errorManager.genStatCheck(builder, loc); genAllocateObjectInit(box); if (alloc.hasCoarraySpec()) - TODO(loc, "coarray allocation"); + TODO(loc, "coarray: allocation of a coarray object"); // Set length of the allocate object if it has. Otherwise, get the length // from source for the deferred length parameter. if (lenParams.empty() && box.isCharacter() && diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 5ac4d822faaae..ef8540c35a372 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2625,35 +2625,35 @@ class FirConverter : public Fortran::lower::AbstractConverter { } void genFIR(const Fortran::parser::ChangeTeamConstruct &construct) { - TODO(toLocation(), "ChangeTeamConstruct implementation"); + TODO(toLocation(), "coarray: ChangeTeamConstruct"); } void genFIR(const Fortran::parser::ChangeTeamStmt &stmt) { - TODO(toLocation(), "ChangeTeamStmt implementation"); + TODO(toLocation(), "coarray: ChangeTeamStmt"); } void genFIR(const Fortran::parser::EndChangeTeamStmt &stmt) { - TODO(toLocation(), "EndChangeTeamStmt implementation"); + TODO(toLocation(), "coarray: EndChangeTeamStmt"); } void genFIR(const Fortran::parser::CriticalConstruct &criticalConstruct) { setCurrentPositionAt(criticalConstruct); - TODO(toLocation(), "CriticalConstruct implementation"); + TODO(toLocation(), "coarray: CriticalConstruct"); } void genFIR(const Fortran::parser::CriticalStmt &) { - TODO(toLocation(), "CriticalStmt implementation"); + TODO(toLocation(), "coarray: CriticalStmt"); } void genFIR(const Fortran::parser::EndCriticalStmt &) { - TODO(toLocation(), "EndCriticalStmt implementation"); + TODO(toLocation(), "coarray: EndCriticalStmt"); } void genFIR(const Fortran::parser::SelectRankConstruct &selectRankConstruct) { setCurrentPositionAt(selectRankConstruct); - TODO(toLocation(), "SelectRankConstruct implementation"); + TODO(toLocation(), "coarray: SelectRankConstruct"); } void genFIR(const Fortran::parser::SelectRankStmt &) { - TODO(toLocation(), "SelectRankStmt implementation"); + TODO(toLocation(), "coarray: SelectRankStmt"); } void genFIR(const Fortran::parser::SelectRankCaseStmt &) { - TODO(toLocation(), "SelectRankCaseStmt implementation"); + TODO(toLocation(), "coarray: SelectRankCaseStmt"); } void genFIR(const Fortran::parser::SelectTypeConstruct &selectTypeConstruct) { diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp index 5299347e561ec..ea38b737a303a 100644 --- a/flang/lib/Lower/CallInterface.cpp +++ b/flang/lib/Lower/CallInterface.cpp @@ -952,7 +952,7 @@ class Fortran::lower::CallInterfaceImpl { if (shapeAttrs.test(ShapeAttr::AssumedRank)) TODO(loc, "assumed rank in procedure interface"); if (shapeAttrs.test(ShapeAttr::Coarray)) - TODO(loc, "coarray in procedure interface"); + TODO(loc, "coarray: dummy argument coarray in procedure interface"); // So far assume that if the argument cannot be passed by implicit interface // it must be by box. That may no be always true (e.g for simple optionals) diff --git a/flang/lib/Lower/Coarray.cpp b/flang/lib/Lower/Coarray.cpp index b5ab7b51fb00a..a84f65a5c49e8 100644 --- a/flang/lib/Lower/Coarray.cpp +++ b/flang/lib/Lower/Coarray.cpp @@ -27,27 +27,27 @@ void Fortran::lower::genChangeTeamConstruct( Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &, const Fortran::parser::ChangeTeamConstruct &) { - TODO(converter.getCurrentLocation(), "CHANGE TEAM construct"); + TODO(converter.getCurrentLocation(), "coarray: CHANGE TEAM construct"); } void Fortran::lower::genChangeTeamStmt( Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &, const Fortran::parser::ChangeTeamStmt &) { - TODO(converter.getCurrentLocation(), "CHANGE TEAM stmt"); + TODO(converter.getCurrentLocation(), "coarray: CHANGE TEAM statement"); } void Fortran::lower::genEndChangeTeamStmt( Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &, const Fortran::parser::EndChangeTeamStmt &) { - TODO(converter.getCurrentLocation(), "END CHANGE TEAM"); + TODO(converter.getCurrentLocation(), "coarray: END CHANGE TEAM statement"); } void Fortran::lower::genFormTeamStatement( Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &, const Fortran::parser::FormTeamStmt &) { - TODO(converter.getCurrentLocation(), "FORM TEAM"); + TODO(converter.getCurrentLocation(), "coarray: FORM TEAM statement"); } //===----------------------------------------------------------------------===// diff --git a/flang/lib/Lower/ConvertExpr.cpp b/flang/lib/Lower/ConvertExpr.cpp index 8788e82b59a8d..6d2ac62b61b74 100644 --- a/flang/lib/Lower/ConvertExpr.cpp +++ b/flang/lib/Lower/ConvertExpr.cpp @@ -3814,7 +3814,7 @@ class ArrayExprLowering { return false; } bool genShapeFromDataRef(const Fortran::evaluate::CoarrayRef &) { - TODO(getLoc(), "coarray ref"); + TODO(getLoc(), "coarray: reference to a coarray in an expression"); return false; } bool genShapeFromDataRef(const Fortran::evaluate::Component &x) { @@ -7091,7 +7091,7 @@ class ArrayExprLowering { } CC genarr(const Fortran::evaluate::CoarrayRef &x, ComponentPath &components) { - TODO(getLoc(), "coarray reference"); + TODO(getLoc(), "coarray: reference to a coarray in an expression"); } CC genarr(const Fortran::evaluate::NamedEntity &x, diff --git a/flang/lib/Lower/ConvertExprToHLFIR.cpp b/flang/lib/Lower/ConvertExprToHLFIR.cpp index 44c9eb1e9123b..236a3639d8dc2 100644 --- a/flang/lib/Lower/ConvertExprToHLFIR.cpp +++ b/flang/lib/Lower/ConvertExprToHLFIR.cpp @@ -371,11 +371,11 @@ class HlfirDesignatorBuilder { fir::FortranVariableOpInterface gen(const Fortran::evaluate::CoarrayRef &coarrayRef) { - TODO(getLoc(), "lowering CoarrayRef to HLFIR"); + TODO(getLoc(), "coarray: lowering a reference to a coarray object"); } mlir::Type visit(const Fortran::evaluate::CoarrayRef &, PartInfo &) { - TODO(getLoc(), "lowering CoarrayRef to HLFIR"); + TODO(getLoc(), "coarray: lowering a reference to a coarray object"); } fir::FortranVariableOpInterface diff --git a/flang/lib/Lower/Runtime.cpp b/flang/lib/Lower/Runtime.cpp index 2cf1e522d330d..8855cab8b5174 100644 --- a/flang/lib/Lower/Runtime.cpp +++ b/flang/lib/Lower/Runtime.cpp @@ -140,49 +140,49 @@ void Fortran::lower::genFailImageStatement( void Fortran::lower::genEventPostStatement( Fortran::lower::AbstractConverter &converter, const Fortran::parser::EventPostStmt &) { - TODO(converter.getCurrentLocation(), "EVENT POST runtime"); + TODO(converter.getCurrentLocation(), "coarray: EVENT POST runtime"); } void Fortran::lower::genEventWaitStatement( Fortran::lower::AbstractConverter &converter, const Fortran::parser::EventWaitStmt &) { - TODO(converter.getCurrentLocation(), "EVENT WAIT runtime"); + TODO(converter.getCurrentLocation(), "coarray: EVENT WAIT runtime"); } void Fortran::lower::genLockStatement( Fortran::lower::AbstractConverter &converter, const Fortran::parser::LockStmt &) { - TODO(converter.getCurrentLocation(), "LOCK runtime"); + TODO(converter.getCurrentLocation(), "coarray: LOCK runtime"); } void Fortran::lower::genUnlockStatement( Fortran::lower::AbstractConverter &converter, const Fortran::parser::UnlockStmt &) { - TODO(converter.getCurrentLocation(), "UNLOCK runtime"); + TODO(converter.getCurrentLocation(), "coarray: UNLOCK runtime"); } void Fortran::lower::genSyncAllStatement( Fortran::lower::AbstractConverter &converter, const Fortran::parser::SyncAllStmt &) { - TODO(converter.getCurrentLocation(), "SYNC ALL runtime"); + TODO(converter.getCurrentLocation(), "coarray: SYNC ALL runtime"); } void Fortran::lower::genSyncImagesStatement( Fortran::lower::AbstractConverter &converter, const Fortran::parser::SyncImagesStmt &) { - TODO(converter.getCurrentLocation(), "SYNC IMAGES runtime"); + TODO(converter.getCurrentLocation(), "coarray: SYNC IMAGES runtime"); } void Fortran::lower::genSyncMemoryStatement( Fortran::lower::AbstractConverter &converter, const Fortran::parser::SyncMemoryStmt &) { - TODO(converter.getCurrentLocation(), "SYNC MEMORY runtime"); + TODO(converter.getCurrentLocation(), "coarray: SYNC MEMORY runtime"); } void Fortran::lower::genSyncTeamStatement( Fortran::lower::AbstractConverter &converter, const Fortran::parser::SyncTeamStmt &) { - TODO(converter.getCurrentLocation(), "SYNC TEAM runtime"); + TODO(converter.getCurrentLocation(), "coarray: SYNC TEAM runtime"); } void Fortran::lower::genPauseStatement( diff --git a/flang/lib/Lower/VectorSubscripts.cpp b/flang/lib/Lower/VectorSubscripts.cpp index ca5dfc836e5dc..7439b9f7df8fd 100644 --- a/flang/lib/Lower/VectorSubscripts.cpp +++ b/flang/lib/Lower/VectorSubscripts.cpp @@ -212,7 +212,8 @@ class VectorSubscriptBoxBuilder { mlir::Type gen(const Fortran::evaluate::CoarrayRef &) { // Is this possible/legal ? - TODO(loc, "coarray ref with vector subscript in IO input"); + TODO(loc, "coarray: reference to coarray object with vector subscript in " + "IO input"); } template From 4718b4011f1d3038c73e2594e4651243f4a221e5 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 17 Oct 2023 03:49:39 +0800 Subject: [PATCH 251/720] [LV] Invalidate disposition of SCEV values after loop vectorization (#69230) This PR fixes the assertion failure of `SE.verify()` after loop vectorization. --- .../Transforms/Vectorize/LoopVectorize.cpp | 13 ++++++----- .../LoopVectorize/scev-invalidation.ll | 22 +++++++++++++++++++ 2 files changed, 30 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/scev-invalidation.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 2ca7e75f97f0f..aa435b0d47aa5 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3543,6 +3543,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, // Forget the original basic block. PSE.getSE()->forgetLoop(OrigLoop); + PSE.getSE()->forgetBlockAndLoopDispositions(); // After vectorization, the exit blocks of the original loop will have // additional predecessors. Invalidate SCEVs for the exit phis in case SE @@ -10339,8 +10340,14 @@ LoopVectorizeResult LoopVectorizePass::runImpl( Changed |= CFGChanged |= processLoop(L); - if (Changed) + if (Changed) { LAIs->clear(); + +#ifndef NDEBUG + if (VerifySCEV) + SE->verify(); +#endif + } } // Process each loop nest in the function. @@ -10388,10 +10395,6 @@ PreservedAnalyses LoopVectorizePass::run(Function &F, PA.preserve(); PA.preserve(); PA.preserve(); - -#ifdef EXPENSIVE_CHECKS - SE.verify(); -#endif } if (Result.MadeCFGChange) { diff --git a/llvm/test/Transforms/LoopVectorize/scev-invalidation.ll b/llvm/test/Transforms/LoopVectorize/scev-invalidation.ll new file mode 100644 index 0000000000000..08163293c14e8 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/scev-invalidation.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -passes="require,print,loop-vectorize" --verify-scev -force-vector-interleave=2 -force-vector-width=8 -S | FileCheck %s + +; CHECK-LABEL: @main( +; CHECK: vector.body +define i32 @main(i32 %.pre) { +entry: + br label %for.body + +for.body: + %g.019 = phi i16 [ 0, %entry ], [ %dec7, %for.body ] + %and = and i32 %.pre, 40 + %0 = sub i32 0, %and + %dec7 = add i16 %g.019, 1 + %cmp.not = icmp eq i16 %dec7, 0 + br i1 %cmp.not, label %for.inc16, label %for.body + +for.inc16: + %1 = phi i32 [ %inc, %for.inc16 ], [ 0, %for.body ] + %inc = add i32 %1, 1 + %add12 = add i32 %0, %1 + br label %for.inc16 +} From e2e32f091a903a57c9fd8778c88488d32330ca6e Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 16 Oct 2023 12:50:24 -0700 Subject: [PATCH 252/720] [gn build] Add rules for crtbegin/end (#66012) --- .../gn/secondary/compiler-rt/lib/BUILD.gn | 5 ++- .../compiler-rt/lib/builtins/BUILD.gn | 32 +++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn index d8c75a01c6945..398b95a06b805 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn @@ -5,7 +5,10 @@ group("lib") { "//compiler-rt/lib/cfi:ignorelist($host_toolchain)", ] if (current_os == "linux") { - deps += [ "//compiler-rt/lib/msan" ] + deps += [ + "//compiler-rt/lib/builtins:crt", + "//compiler-rt/lib/msan", + ] } if (current_os == "linux" || current_os == "android") { deps += [ "//compiler-rt/lib/ubsan_minimal" ] diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn index 303a6c29d7b91..a45795d194c61 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn @@ -574,6 +574,38 @@ static_library("builtins") { deps = lse_targets } +if (current_os == "linux") { + source_set("crt_src") { + sources = [ + "crtbegin.c", + "crtend.c", + ] + cflags = [ + "-std=c11", + "-fPIC", + ] + } + copy("crtbegin") { + # TODO: use get_target_outputs if it ever works with source_set to avoid hardcoding crt_src.crtbegin.o + input_dir = get_label_info(":crt_src", "target_out_dir") + sources = [ "$input_dir/crt_src.crtbegin.o" ] + outputs = [ "$crt_current_out_dir/clang_rt.crtbegin.o" ] + deps = [ ":crt_src" ] + } + copy("crtend") { + input_dir = get_label_info(":crt_src", "target_out_dir") + sources = [ "$input_dir/crt_src.crtend.o" ] + outputs = [ "$crt_current_out_dir/clang_rt.crtend.o" ] + deps = [ ":crt_src" ] + } + group("crt") { + deps = [ + ":crtbegin", + ":crtend", + ] + } +} + # Currently unused but necessary to make sync_source_lists_from_cmake.py happy. source_set("_unused") { sources = [ From f74b85c67827bd6b2115b5d0ccbf7f7de8cf5731 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Mon, 16 Oct 2023 12:50:39 -0700 Subject: [PATCH 253/720] [flang][openacc] Support array with dynamic extents in reduction recipe (#68829) Add support for array with dynamic extents in lowering of the reduction recipe. --- flang/lib/Lower/OpenACC.cpp | 99 ++++++++++++++++++++-- flang/test/Lower/OpenACC/acc-reduction.f90 | 64 +++++++++++--- 2 files changed, 143 insertions(+), 20 deletions(-) diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index e09266121cdb9..49db55047ff02 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -920,18 +920,27 @@ static mlir::Value genReductionInitRegion(fir::FirOpBuilder &builder, declareOp.getBase()); return declareOp.getBase(); } else if (auto seqTy = mlir::dyn_cast_or_null(ty)) { - if (seqTy.hasDynamicExtents()) - TODO(loc, "reduction recipe of array with dynamic extents"); if (fir::isa_trivial(seqTy.getEleTy())) { - mlir::Value alloca = builder.create(loc, seqTy); - auto shapeOp = genShapeOp(builder, seqTy, loc); + mlir::Value shape; + auto extents = builder.getBlock()->getArguments().drop_front(1); + if (seqTy.hasDynamicExtents()) + shape = builder.create(loc, extents); + else + shape = genShapeOp(builder, seqTy, loc); + mlir::Value alloca = builder.create( + loc, seqTy, /*typeparams=*/mlir::ValueRange{}, extents); auto declareOp = builder.create( - loc, alloca, accReductionInitName, shapeOp, + loc, alloca, accReductionInitName, shape, llvm::ArrayRef{}, fir::FortranVariableFlagsAttr{}); mlir::Type idxTy = builder.getIndexType(); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); llvm::SmallVector loops; llvm::SmallVector ivs; + + if (seqTy.hasDynamicExtents()) { + builder.create(loc, initValue, declareOp.getBase()); + return declareOp.getBase(); + } for (auto ext : llvm::reverse(seqTy.getShape())) { auto lb = builder.createIntegerConstant(loc, idxTy, 0); auto ub = builder.createIntegerConstant(loc, idxTy, ext - 1); @@ -1052,6 +1061,18 @@ static mlir::Value genScalarCombiner(fir::FirOpBuilder &builder, TODO(loc, "reduction operator"); } +static hlfir::DesignateOp::Subscripts +getTripletsFromArgs(mlir::acc::ReductionRecipeOp recipe) { + hlfir::DesignateOp::Subscripts triplets; + for (unsigned i = 2; i < recipe.getCombinerRegion().getArguments().size(); + i += 3) + triplets.emplace_back(hlfir::DesignateOp::Triplet{ + recipe.getCombinerRegion().getArgument(i), + recipe.getCombinerRegion().getArgument(i + 1), + recipe.getCombinerRegion().getArgument(i + 2)}); + return triplets; +} + static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, mlir::acc::ReductionOperator op, mlir::Type ty, mlir::Value value1, mlir::Value value2, @@ -1061,11 +1082,60 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, ty = fir::unwrapRefType(ty); if (auto seqTy = mlir::dyn_cast(ty)) { - assert(!seqTy.hasDynamicExtents() && - "Assumed shaped array should be boxed for reduction"); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); llvm::SmallVector loops; llvm::SmallVector ivs; + if (seqTy.hasDynamicExtents()) { + auto shape = + genShapeFromBoundsOrArgs(loc, builder, seqTy, bounds, + recipe.getCombinerRegion().getArguments()); + auto v1DeclareOp = builder.create( + loc, value1, llvm::StringRef{}, shape, llvm::ArrayRef{}, + fir::FortranVariableFlagsAttr{}); + auto v2DeclareOp = builder.create( + loc, value2, llvm::StringRef{}, shape, llvm::ArrayRef{}, + fir::FortranVariableFlagsAttr{}); + hlfir::DesignateOp::Subscripts triplets = getTripletsFromArgs(recipe); + + llvm::SmallVector lenParamsLeft; + auto leftEntity = hlfir::Entity{v1DeclareOp.getBase()}; + hlfir::genLengthParameters(loc, builder, leftEntity, lenParamsLeft); + auto leftDesignate = builder.create( + loc, v1DeclareOp.getBase().getType(), v1DeclareOp.getBase(), + /*component=*/"", + /*componentShape=*/mlir::Value{}, triplets, + /*substring=*/mlir::ValueRange{}, /*complexPartAttr=*/std::nullopt, + shape, lenParamsLeft); + auto left = hlfir::Entity{leftDesignate.getResult()}; + + llvm::SmallVector lenParamsRight; + auto rightEntity = hlfir::Entity{v2DeclareOp.getBase()}; + hlfir::genLengthParameters(loc, builder, rightEntity, lenParamsLeft); + auto rightDesignate = builder.create( + loc, v2DeclareOp.getBase().getType(), v2DeclareOp.getBase(), + /*component=*/"", + /*componentShape=*/mlir::Value{}, triplets, + /*substring=*/mlir::ValueRange{}, /*complexPartAttr=*/std::nullopt, + shape, lenParamsRight); + auto right = hlfir::Entity{rightDesignate.getResult()}; + + llvm::SmallVector typeParams; + auto genKernel = [&builder, &loc, op, seqTy, &left, &right]( + mlir::Location l, fir::FirOpBuilder &b, + mlir::ValueRange oneBasedIndices) -> hlfir::Entity { + auto leftElement = hlfir::getElementAt(l, b, left, oneBasedIndices); + auto rightElement = hlfir::getElementAt(l, b, right, oneBasedIndices); + auto leftVal = hlfir::loadTrivialScalar(l, b, leftElement); + auto rightVal = hlfir::loadTrivialScalar(l, b, rightElement); + return hlfir::Entity{genScalarCombiner( + builder, loc, op, seqTy.getEleTy(), leftVal, rightVal)}; + }; + mlir::Value elemental = hlfir::genElementalOp( + loc, builder, seqTy.getEleTy(), shape, typeParams, genKernel, + /*isUnordered=*/true); + builder.create(loc, elemental, v1DeclareOp.getBase()); + return; + } if (allConstantBound) { // Use the constant bound directly in the combiner region so they do not // need to be passed as block argument. @@ -1108,7 +1178,6 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder.create(loc, res, addr1); builder.setInsertionPointAfter(loops[0]); } else if (auto boxTy = mlir::dyn_cast(ty)) { - llvm::SmallVector tripletArgs; mlir::Type innerTy = extractSequenceType(boxTy); fir::SequenceType seqTy = mlir::dyn_cast_or_null(innerTy); @@ -1160,8 +1229,20 @@ mlir::acc::ReductionRecipeOp Fortran::lower::createOrGetReductionRecipe( mlir::OpBuilder modBuilder(mod.getBodyRegion()); auto recipe = modBuilder.create(loc, recipeName, ty, op); + llvm::SmallVector initArgsTy{ty}; + llvm::SmallVector initArgsLoc{loc}; + mlir::Type refTy = fir::unwrapRefType(ty); + if (auto seqTy = mlir::dyn_cast_or_null(refTy)) { + if (seqTy.hasDynamicExtents()) { + mlir::Type idxTy = builder.getIndexType(); + for (unsigned i = 0; i < seqTy.getDimension(); ++i) { + initArgsTy.push_back(idxTy); + initArgsLoc.push_back(loc); + } + } + } builder.createBlock(&recipe.getInitRegion(), recipe.getInitRegion().end(), - {ty}, {loc}); + initArgsTy, initArgsLoc); builder.setInsertionPointToEnd(&recipe.getInitRegion().back()); mlir::Value initValue = genReductionInitRegion(builder, loc, ty, op); builder.create(loc, initValue); diff --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90 index 07979445394d9..b874d5219625d 100644 --- a/flang/test/Lower/OpenACC/acc-reduction.f90 +++ b/flang/test/Lower/OpenACC/acc-reduction.f90 @@ -3,6 +3,35 @@ ! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s --check-prefixes=CHECK,FIR ! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s --check-prefixes=CHECK,HLFIR +! CHECK-LABEL: acc.reduction.recipe @reduction_max_ref_UxUxf32 : !fir.ref> reduction_operator init { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: index, %[[ARG2:.*]]: index): +! HLFIR: %[[CST:.*]] = arith.constant -1.401300e-45 : f32 +! HLFIR: %[[SHAPE:.*]] = fir.shape %arg1, %arg2 : (index, index) -> !fir.shape<2> +! HLFIR: %[[TEMP:.*]] = fir.alloca !fir.array, %arg1, %arg2 +! HLFIR: %[[DECL:.*]]:2 = hlfir.declare %[[TEMP]](%[[SHAPE]]) {uniq_name = "acc.reduction.init"} : (!fir.ref>, !fir.shape<2>) -> (!fir.box>, !fir.ref>) +! HLFIR: hlfir.assign %[[CST]] to %[[DECL]]#0 : f32, !fir.box> +! HLFIR: acc.yield %[[DECL]]#0 : !fir.box> +! CHECK: } combiner { +! CHECK: ^bb0(%[[V1:.*]]: !fir.ref>, %[[V2:.*]]: !fir.ref>, %[[LB0:.*]]: index, %[[UB0:.*]]: index, %[[STEP0:.*]]: index, %[[LB1:.*]]: index, %[[UB1:.*]]: index, %[[STEP1:.*]]: index): +! HLFIR: %[[SHAPE:.*]] = fir.shape %{{.*}}, %{{.*}} : (index, index) -> !fir.shape<2> +! HLFIR: %[[DECL_V1:.*]]:2 = hlfir.declare %[[V1]](%[[SHAPE]]) {uniq_name = ""} : (!fir.ref>, !fir.shape<2>) -> (!fir.box>, !fir.ref>) +! HLFIR: %[[DECL_V2:.*]]:2 = hlfir.declare %[[V2]](%[[SHAPE]]) {uniq_name = ""} : (!fir.ref>, !fir.shape<2>) -> (!fir.box>, !fir.ref>) +! HLFIR: %[[DES_V1:.*]] = hlfir.designate %[[DECL_V1]]#0 (%arg2:%arg3:%arg4, %arg5:%arg6:%arg7) shape %10 : (!fir.box>, index, index, index, index, index, index, !fir.shape<2>) -> !fir.box> +! HLFIR: %[[DES_V2:.*]] = hlfir.designate %[[DECL_V2]]#0 (%arg2:%arg3:%arg4, %arg5:%arg6:%arg7) shape %10 : (!fir.box>, index, index, index, index, index, index, !fir.shape<2>) -> !fir.box> +! HLFIR: %[[ELEMENTAL:.*]] = hlfir.elemental %[[SHAPE]] unordered : (!fir.shape<2>) -> !hlfir.expr { +! HLFIR: ^bb0(%[[ARG0:.*]]: index, %[[ARG1:.*]]: index): +! HLFIR: %[[D1:.*]] = hlfir.designate %13 (%[[ARG0]], %[[ARG1]]) : (!fir.box>, index, index) -> !fir.ref +! HLFIR: %[[D2:.*]] = hlfir.designate %14 (%[[ARG0]], %[[ARG1]]) : (!fir.box>, index, index) -> !fir.ref +! HLFIR: %[[LOAD1:.*]] = fir.load %[[D1]] : !fir.ref +! HLFIR: %[[LOAD2:.*]] = fir.load %[[D2]] : !fir.ref +! HLFIR: %[[CMP:.*]] = arith.cmpf ogt, %[[LOAD1]], %[[LOAD2]] : f32 +! HLFIR: %[[SELECT:.*]] = arith.select %[[CMP]], %[[LOAD1]], %[[LOAD2]] : f32 +! HLFIR: hlfir.yield_element %[[SELECT]] : f32 +! HLFIR: } +! HLFIR: hlfir.assign %[[ELEMENTAL]] to %[[DECL_V1]]#0 : !hlfir.expr, !fir.box> +! HLFIR: acc.yield %[[V1]] : !fir.ref> +! CHECK: } + ! CHECK-LABEL: acc.reduction.recipe @reduction_max_box_ptr_Uxf32 : !fir.box>> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.box>>): ! CHECK: } combiner { @@ -290,8 +319,8 @@ ! CHECK-LABEL: acc.reduction.recipe @reduction_max_section_ext100_ref_100xf32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>): ! CHECK: %[[INIT:.*]] = arith.constant -1.401300e-45 : f32 -! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100xf32> ! HLFIR: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1> +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100xf32> ! HLFIR: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]](%[[SHAPE]]) {uniq_name = "acc.reduction.init"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[LB:.*]] = arith.constant 0 : index ! CHECK: %[[UB:.*]] = arith.constant 99 : index @@ -338,8 +367,8 @@ ! CHECK-LABEL: acc.reduction.recipe @reduction_max_section_ext100xext10_ref_100x10xi32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%arg0: !fir.ref>): ! CHECK: %[[INIT:.*]] = arith.constant -2147483648 : i32 -! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100x10xi32> ! HLFIR: %[[SHAPE:.*]] = fir.shape %{{.*}}, %{{.*}} : (index, index) -> !fir.shape<2> +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100x10xi32> ! HLFIR: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]](%[[SHAPE]]) {uniq_name = "acc.reduction.init"} : (!fir.ref>, !fir.shape<2>) -> (!fir.ref>, !fir.ref>) ! HLFIR: acc.yield %[[DECLARE]]#0 : !fir.ref> ! CHECK: } combiner { @@ -384,8 +413,8 @@ ! CHECK-LABEL: acc.reduction.recipe @reduction_min_section_ext100xext10_ref_100x10xf32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>): ! CHECK: %[[INIT:.*]] = arith.constant 3.40282347E+38 : f32 -! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100x10xf32> ! HLFIR: %[[SHAPE:.*]] = fir.shape %{{.*}}, %{{.*}} : (index, index) -> !fir.shape<2> +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100x10xf32> ! HLFIR: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]](%[[SHAPE]]) {uniq_name = "acc.reduction.init"} : (!fir.ref>, !fir.shape<2>) -> (!fir.ref>, !fir.ref>) ! HLFIR: acc.yield %[[DECLARE]]#0 : !fir.ref> ! CHECK: } combiner { @@ -430,8 +459,8 @@ ! CHECK-LABEL: acc.reduction.recipe @reduction_min_section_ext100_ref_100xi32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>): ! CHECK: %[[INIT:.*]] = arith.constant 2147483647 : i32 -! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100xi32> ! HLFIR: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1> +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100xi32> ! HLFIR: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]](%[[SHAPE]]) {uniq_name = "acc.reduction.init"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) ! HLFIR: acc.yield %[[DECLARE]]#0 : !fir.ref> ! CHECK: } combiner { @@ -487,8 +516,8 @@ ! CHECK-LABEL: acc.reduction.recipe @reduction_mul_section_ext100_ref_100xi32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>): ! CHECK: %[[INIT:.*]] = arith.constant 1 : i32 -! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100xi32> ! HLFIR: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1> +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100xi32> ! HLFIR: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]](%[[SHAPE]]) {uniq_name = "acc.reduction.init"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) ! HLFIR: acc.yield %[[DECLARE]]#0 : !fir.ref> ! CHECK: } combiner { @@ -526,8 +555,8 @@ ! CHECK-LABEL: acc.reduction.recipe @reduction_add_section_ext100_ref_100xf32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>): ! CHECK: %[[INIT:.*]] = arith.constant 0.000000e+00 : f32 -! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100xf32> ! HLFIR: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1> +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100xf32> ! HLFIR: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]](%[[SHAPE]]) {uniq_name = "acc.reduction.init"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) ! HLFIR: acc.yield %[[DECLARE]]#0 : !fir.ref> ! CHECK: } combiner { @@ -565,8 +594,8 @@ ! CHECK-LABEL: acc.reduction.recipe @reduction_add_section_ext100xext10xext2_ref_100x10x2xi32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>): ! CHECK: %[[INIT:.*]] = arith.constant 0 : i32 -! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100x10x2xi32> ! HLFIR: %[[SHAPE:.*]] = fir.shape %{{.*}}, %{{.*}}, %{{.*}} : (index, index, index) -> !fir.shape<3> +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100x10x2xi32> ! HLFIR: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]](%[[SHAPE]]) {uniq_name = "acc.reduction.init"} : (!fir.ref>, !fir.shape<3>) -> (!fir.ref>, !fir.ref>) ! HLFIR: acc.yield %[[DECLARE]]#0 : !fir.ref> ! CHECK: } combiner { @@ -598,8 +627,8 @@ ! CHECK-LABEL: acc.reduction.recipe @reduction_add_section_ext100xext10_ref_100x10xi32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>): ! CHECK: %[[INIT:.*]] = arith.constant 0 : i32 -! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100x10xi32> ! HLFIR: %[[SHAPE:.*]] = fir.shape %{{.*}}, %{{.*}} : (index, index) -> !fir.shape<2> +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100x10xi32> ! HLFIR: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]](%[[SHAPE]]) {uniq_name = "acc.reduction.init"} : (!fir.ref>, !fir.shape<2>) -> (!fir.ref>, !fir.ref>) ! HLFIR: acc.yield %[[DECLARE]]#0 : !fir.ref> ! CHECK: } combiner { @@ -626,8 +655,8 @@ ! CHECK-LABEL: acc.reduction.recipe @reduction_add_section_ext100_ref_100xi32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>): ! CHECK: %[[INIT:.*]] = arith.constant 0 : i32 -! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100xi32> ! HLFIR: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1> +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100xi32> ! HLFIR: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]](%[[SHAPE]]) {uniq_name = "acc.reduction.init"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) ! HFLIR: acc.yield %[[DECLARE]]#0 : !fir.ref> ! CHECK: } combiner { @@ -1134,13 +1163,13 @@ subroutine acc_reduction_add_dynamic_extent_add(a) ! HLFIR: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) bounds(%{{.*}}) -> !fir.ref> {name = "a"} ! HLFIR: acc.parallel reduction(@reduction_add_box_Uxi32 -> %[[RED:.*]] : !fir.ref>) -subroutine acc_reduction_add_dynamic_extent_max(a) +subroutine acc_reduction_add_assumed_shape_max(a) real :: a(:) !$acc parallel reduction(max:a) !$acc end parallel end subroutine -! CHECK-LABEL: func.func @_QPacc_reduction_add_dynamic_extent_max( +! CHECK-LABEL: func.func @_QPacc_reduction_add_assumed_shape_max( ! CHECK-SAME: %[[ARG0:.*]]: !fir.box> {fir.bindc_name = "a"}) ! HLFIR: %[[DECLARG0:.*]]:2 = hlfir.declare %[[ARG0]] ! HLFIR: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) bounds(%{{.*}}) -> !fir.ref> {name = "a"} @@ -1189,3 +1218,16 @@ subroutine acc_reduction_add_pointer_array(a) ! HLFIR: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX]] : (!fir.box>>) -> !fir.ptr> ! HLFIR: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.ptr>) bounds(%[[BOUND]]) -> !fir.ptr> {name = "a"} ! HLFIR: acc.parallel reduction(@reduction_max_box_ptr_Uxf32 -> %[[RED]] : !fir.ptr>) + +subroutine acc_reduction_max_dynamic_extent_max(a, n) + integer :: n + real :: a(n, n) + !$acc parallel reduction(max:a) + !$acc end parallel +end subroutine + +! CHECK-LABEL: func.func @_QPacc_reduction_max_dynamic_extent_max( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref> {fir.bindc_name = "a"}, %{{.*}}: !fir.ref {fir.bindc_name = "n"}) +! HLFIR: %[[DECL_A:.*]]:2 = hlfir.declare %[[ARG0]](%{{.*}}) {uniq_name = "_QFacc_reduction_max_dynamic_extent_maxEa"} : (!fir.ref>, !fir.shape<2>) -> (!fir.box>, !fir.ref>) +! HLFIR: %[[RED:.*]] = acc.reduction varPtr(%[[DECL_A]]#1 : !fir.ref>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref> {name = "a"} +! HLFIR: acc.parallel reduction(@reduction_max_ref_UxUxf32 -> %[[RED]] : !fir.ref>) From d9568bd4aa46c10fcef823b29c7db649fe69d9e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Mon, 16 Oct 2023 12:51:01 -0700 Subject: [PATCH 254/720] [flang][openacc] Support array with dynamic extents in firstprivate recipe (#69026) Add lowering support for array with dynamic extents in the firstprivate recipe. Generalize the lowering so static shaped arrays and array with dynamic extents use the same path. Some cleaning code is taken from #68836 that is not landed yet. --- flang/lib/Lower/OpenACC.cpp | 62 +++++++++++-------- .../test/Lower/OpenACC/acc-parallel-loop.f90 | 27 -------- flang/test/Lower/OpenACC/acc-private.f90 | 60 ++++++++++++------ mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 2 +- mlir/test/Dialect/OpenACC/invalid.mlir | 12 ---- 5 files changed, 77 insertions(+), 86 deletions(-) diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 49db55047ff02..faa5164f52573 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -569,8 +569,20 @@ mlir::acc::FirstprivateRecipeOp Fortran::lower::createOrGetFirstprivateRecipe( mlir::OpBuilder modBuilder(mod.getBodyRegion()); auto recipe = modBuilder.create(loc, recipeName, ty); + llvm::SmallVector initArgsTy{ty}; + llvm::SmallVector initArgsLoc{loc}; + auto refTy = fir::unwrapRefType(ty); + if (auto seqTy = mlir::dyn_cast_or_null(refTy)) { + if (seqTy.hasDynamicExtents()) { + mlir::Type idxTy = builder.getIndexType(); + for (unsigned i = 0; i < seqTy.getDimension(); ++i) { + initArgsTy.push_back(idxTy); + initArgsLoc.push_back(loc); + } + } + } builder.createBlock(&recipe.getInitRegion(), recipe.getInitRegion().end(), - {ty}, {loc}); + initArgsTy, initArgsLoc); builder.setInsertionPointToEnd(&recipe.getInitRegion().back()); genPrivateLikeInitRegion(builder, recipe, ty, loc); @@ -601,32 +613,28 @@ mlir::acc::FirstprivateRecipeOp Fortran::lower::createOrGetFirstprivateRecipe( builder.create(loc, initValue, recipe.getCopyRegion().front().getArgument(1)); } else if (auto seqTy = mlir::dyn_cast_or_null(ty)) { - if (seqTy.hasDynamicExtents()) - TODO(loc, "firstprivate recipe of array with dynamic extents"); - mlir::Type idxTy = builder.getIndexType(); - mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); - mlir::Value arraySrc = recipe.getCopyRegion().front().getArgument(0); - mlir::Value arrayDst = recipe.getCopyRegion().front().getArgument(1); - llvm::SmallVector loops; - llvm::SmallVector ivs; - for (auto ext : llvm::reverse(seqTy.getShape())) { - auto lb = builder.create( - loc, idxTy, builder.getIntegerAttr(idxTy, 0)); - auto ub = builder.create( - loc, idxTy, builder.getIntegerAttr(idxTy, ext - 1)); - auto step = builder.create( - loc, idxTy, builder.getIntegerAttr(idxTy, 1)); - auto loop = builder.create(loc, lb, ub, step, - /*unordered=*/false); - builder.setInsertionPointToStart(loop.getBody()); - loops.push_back(loop); - ivs.push_back(loop.getInductionVar()); - } - auto addr1 = builder.create(loc, refTy, arraySrc, ivs); - auto addr2 = builder.create(loc, refTy, arrayDst, ivs); - auto loadedValue = builder.create(loc, addr1); - builder.create(loc, loadedValue, addr2); - builder.setInsertionPointAfter(loops[0]); + fir::FirOpBuilder firBuilder{builder, recipe.getOperation()}; + auto shape = genShapeFromBoundsOrArgs( + loc, firBuilder, seqTy, bounds, recipe.getCopyRegion().getArguments()); + + auto leftDeclOp = builder.create( + loc, recipe.getCopyRegion().getArgument(0), llvm::StringRef{}, shape, + llvm::ArrayRef{}, fir::FortranVariableFlagsAttr{}); + auto rightDeclOp = builder.create( + loc, recipe.getCopyRegion().getArgument(1), llvm::StringRef{}, shape, + llvm::ArrayRef{}, fir::FortranVariableFlagsAttr{}); + + hlfir::DesignateOp::Subscripts triplets = + getSubscriptsFromArgs(recipe.getCopyRegion().getArguments()); + auto leftEntity = hlfir::Entity{leftDeclOp.getBase()}; + auto left = + genDesignateWithTriplets(firBuilder, loc, leftEntity, triplets, shape); + auto rightEntity = hlfir::Entity{rightDeclOp.getBase()}; + auto right = + genDesignateWithTriplets(firBuilder, loc, rightEntity, triplets, shape); + + firBuilder.create(loc, left, right); + } else if (auto boxTy = mlir::dyn_cast_or_null(ty)) { fir::FirOpBuilder firBuilder{builder, recipe.getOperation()}; llvm::SmallVector tripletArgs; diff --git a/flang/test/Lower/OpenACC/acc-parallel-loop.f90 b/flang/test/Lower/OpenACC/acc-parallel-loop.f90 index 9a27a359e80b7..80b1272bd1b10 100644 --- a/flang/test/Lower/OpenACC/acc-parallel-loop.f90 +++ b/flang/test/Lower/OpenACC/acc-parallel-loop.f90 @@ -3,33 +3,6 @@ ! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s --check-prefixes=CHECK,FIR ! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s --check-prefixes=CHECK,HLFIR -! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_section_ext10_ref_10xf32 : !fir.ref> init { -! CHECK: ^bb0(%{{.*}}: !fir.ref>): -! HLFIR: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1> -! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<10xf32> -! HLFIR: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]](%[[SHAPE]]) {uniq_name = "acc.private.init"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) -! HLFIR: acc.yield %[[DECLARE]]#0 : !fir.ref> -! CHECK: } copy { -! CHECK: ^bb0(%[[SRC:.*]]: !fir.ref>, %[[DST:.*]]: !fir.ref>): -! CHECK: %[[LB0:.*]] = arith.constant 0 : index -! CHECK: %[[UB0:.*]] = arith.constant 9 : index -! CHECK: %[[STEP0:.*]] = arith.constant 1 : index -! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] { -! CHECK: %[[COORD0:.*]] = fir.coordinate_of %[[SRC]], %[[IV0]] : (!fir.ref>, index) -> !fir.ref -! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[DST]], %[[IV0]] : (!fir.ref>, index) -> !fir.ref -! CHECK: %[[LOAD:.*]] = fir.load %[[COORD0]] : !fir.ref -! CHECK: fir.store %[[LOAD]] to %[[COORD1]] : !fir.ref -! CHECK: } -! CHECK: acc.terminator -! CHECK: } - -! CHECK-LABEL: acc.private.recipe @privatization_ref_10xf32 : !fir.ref> init { -! CHECK: ^bb0(%{{.*}}: !fir.ref>): -! HLFIR: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1> -! HLFIR: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]](%[[SHAPE]]) {uniq_name = "acc.private.init"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) -! HLFIR: acc.yield %[[DECLARE]]#0 : !fir.ref> -! CHECK: } - ! CHECK-LABEL: func.func @_QPacc_parallel_loop() subroutine acc_parallel_loop diff --git a/flang/test/Lower/OpenACC/acc-private.f90 b/flang/test/Lower/OpenACC/acc-private.f90 index 10c1bfc7c3802..9ce1828e63ddf 100644 --- a/flang/test/Lower/OpenACC/acc-private.f90 +++ b/flang/test/Lower/OpenACC/acc-private.f90 @@ -3,6 +3,23 @@ ! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s --check-prefixes=CHECK,FIR ! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s --check-prefixes=CHECK,HLFIR +! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_ref_UxUx2xi32 : !fir.ref> init { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index): +! HLFIR: %[[SHAPE:.*]] = fir.shape %[[ARG1]], %[[ARG2]], %[[ARG3]] : (index, index, index) -> !fir.shape<3> +! HLFIR: %[[TEMP:.*]] = fir.alloca !fir.array, %[[ARG1]], %[[ARG2]], %[[ARG3]] +! HLFIR: %[[DECL:.*]]:2 = hlfir.declare %[[TEMP]](%[[SHAPE]]) {uniq_name = "acc.private.init"} : (!fir.ref>, !fir.shape<3>) -> (!fir.box>, !fir.ref>) +! HLFIR: acc.yield %[[DECL]]#0 : !fir.box> +! CHECK: } copy { +! CHECK: ^bb0(%[[SRC:.*]]: !fir.ref>, %[[DST:.*]]: !fir.ref>, %[[LB0:.*]]: index, %[[UB0:.*]]: index, %[[STEP0:.*]]: index, %[[LB1:.*]]: index, %[[UB1:.*]]: index, %[[STEP1:.*]]: index, %[[LB2:.*]]: index, %[[UB2:.*]]: index, %[[STEP2:.*]]: index): +! HLFIR: %[[SHAPE:.*]] = fir.shape %{{.*}}, %{{.*}}, %{{.*}} : (index, index, index) -> !fir.shape<3> +! HLFIR: %[[DECL_SRC:.*]]:2 = hlfir.declare %[[SRC]](%[[SHAPE]]) {uniq_name = ""} : (!fir.ref>, !fir.shape<3>) -> (!fir.box>, !fir.ref>) +! HLFIR: %[[DECL_DST:.*]]:2 = hlfir.declare %[[DST]](%[[SHAPE]]) {uniq_name = ""} : (!fir.ref>, !fir.shape<3>) -> (!fir.box>, !fir.ref>) +! HLFIR: %[[DES_SRC:.*]] = hlfir.designate %[[DECL_SRC]]#0 (%[[LB0]]:%[[UB0]]:%[[STEP0]], %[[LB1]]:%[[UB1]]:%[[STEP1]], %[[LB2]]:%[[UB2]]:%[[STEP2]]) shape %[[SHAPE]] : (!fir.box>, index, index, index, index, index, index, index, index, index, !fir.shape<3>) -> !fir.box> +! HLFIR: %[[DES_DST:.*]] = hlfir.designate %[[DECL_DST]]#0 (%[[LB0]]:%[[UB0]]:%[[STEP0]], %[[LB1]]:%[[UB1]]:%[[STEP1]], %[[LB2]]:%[[UB2]]:%[[STEP2]]) shape %[[SHAPE]] : (!fir.box>, index, index, index, index, index, index, index, index, index, !fir.shape<3>) -> !fir.box> +! HLFIR: hlfir.assign %[[DES_SRC]] to %[[DES_DST]] : !fir.box>, !fir.box> +! HLFIR: acc.terminator +! CHECK: } + ! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_section_lb4.ub9_box_Uxi32 : !fir.box> init { ! CHECK: ^bb0(%{{.*}}: !fir.box>): ! CHECK: } copy { @@ -87,16 +104,12 @@ ! HLFIR: acc.yield %[[DECLARE]]#0 : !fir.ref> ! CHECK: } copy { ! CHECK: ^bb0(%[[SRC:.*]]: !fir.ref>, %[[DST:.*]]: !fir.ref>): -! CHECK: %[[LB0:.*]] = arith.constant 0 : index -! CHECK: %[[UB0:.*]] = arith.constant 49 : index -! CHECK: %[[STEP0:.*]] = arith.constant 1 : index -! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] { -! CHECK: %[[COORD0:.*]] = fir.coordinate_of %[[SRC]], %[[IV0]] : (!fir.ref>, index) -> !fir.ref -! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[DST]], %[[IV0]] : (!fir.ref>, index) -> !fir.ref -! CHECK: %[[VALUE:.*]] = fir.load %[[COORD0]] : !fir.ref -! CHECK: fir.store %[[VALUE]] to %[[COORD1]] : !fir.ref -! CHECK: } -! CHECK: acc.terminator +! HLFIR: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1> +! HLFIR: %[[DECL_SRC:.*]]:2 = hlfir.declare %[[SRC]](%[[SHAPE]]) {uniq_name = ""} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +! HLFIR: %[[DECL_DST:.*]]:2 = hlfir.declare %[[DST]](%[[SHAPE]]) {uniq_name = ""} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +! HLFIR: %[[DES_SRC:.*]] = hlfir.designate %[[DECL_SRC]]#0 shape %[[SHAPE:.*]] : (!fir.ref>, !fir.shape<1>) -> !fir.ref> +! HLFIR: %[[DES_DST:.*]] = hlfir.designate %[[DECL_DST]]#0 shape %[[SHAPE:.*]] : (!fir.ref>, !fir.shape<1>) -> !fir.ref> +! HLFIR: hlfir.assign %[[DES_SRC]] to %[[DES_DST]] : !fir.ref>, !fir.ref> ! CHECK: } ! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_section_ext100_ref_100xf32 : !fir.ref> init { @@ -107,15 +120,12 @@ ! HLFIR: acc.yield %[[DECLARE]]#0 : !fir.ref> ! CHECK: } copy { ! CHECK: ^bb0(%[[SRC:.*]]: !fir.ref>, %[[DST:.*]]: !fir.ref>): -! CHECK: %[[LB0:.*]] = arith.constant 0 : index -! CHECK: %[[UB0:.*]] = arith.constant 99 : index -! CHECK: %[[STEP1:.*]] = arith.constant 1 : index -! CHECK: fir.do_loop %[[IV0:.*]] = %c0 to %c99 step %c1 { -! CHECK: %[[COORD0:.*]] = fir.coordinate_of %[[SRC]], %[[IV0]] : (!fir.ref>, index) -> !fir.ref -! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[DST]], %[[IV0]] : (!fir.ref>, index) -> !fir.ref -! CHECK: %[[VALUE:.*]] = fir.load %[[COORD0]] : !fir.ref -! CHECK: fir.store %[[VALUE]] to %[[COORD1]] : !fir.ref -! CHECK: } +! HLFIR: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1> +! HLFIR: %[[DECL_SRC:.*]]:2 = hlfir.declare %[[SRC]](%[[SHAPE]]) {uniq_name = ""} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +! HLFIR: %[[DECL_DST:.*]]:2 = hlfir.declare %[[DST]](%[[SHAPE]]) {uniq_name = ""} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +! HLFIR: %[[DES_SRC:.*]] = hlfir.designate %[[DECL_SRC]]#0 shape %[[SHAPE]] : (!fir.ref>, !fir.shape<1>) -> !fir.ref> +! HLFIR: %[[DES_DST:.*]] = hlfir.designate %[[DECL_DST]]#0 shape %[[SHAPE]] : (!fir.ref>, !fir.shape<1>) -> !fir.ref> +! HLFIR: hlfir.assign %[[DES_SRC]] to %[[DES_DST]] : !fir.ref>, !fir.ref> ! CHECK: acc.terminator ! CHECK: } @@ -337,3 +347,15 @@ subroutine acc_firstprivate_assumed_shape_with_section(a, n) a(i) = i end do end subroutine + +subroutine acc_firstprivate_dynamic_extent(a, n) + integer :: n, i + integer :: a(n, n, 2) + + !$acc parallel loop firstprivate(a) + do i = 1, n + a(i, i, 1) = i + end do +end subroutine + +! CHECK: acc.parallel firstprivate(@firstprivatization_ref_UxUx2xi32 -> %{{.*}} : !fir.ref>) diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index cea93b8a2ca8c..b7e2aec6a4e6a 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -452,7 +452,7 @@ LogicalResult acc::PrivateRecipeOp::verifyRegions() { LogicalResult acc::FirstprivateRecipeOp::verifyRegions() { if (failed(verifyInitLikeSingleArgRegion(*this, getInitRegion(), "privatization", "init", getType(), - /*verifyYield=*/true))) + /*verifyYield=*/false))) return failure(); if (getCopyRegion().empty()) diff --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir index 225a8766fc550..ff92eab478bb4 100644 --- a/mlir/test/Dialect/OpenACC/invalid.mlir +++ b/mlir/test/Dialect/OpenACC/invalid.mlir @@ -312,18 +312,6 @@ acc.firstprivate.recipe @privatization_i32 : !llvm.ptr init { // ----- -// expected-error@+1 {{expects init region to yield a value of the privatization type}} -acc.firstprivate.recipe @privatization_i32 : !llvm.ptr init { -^bb0(%arg0 : !llvm.ptr): - %c1 = arith.constant 1 : i32 - %c0 = arith.constant 0 : i32 - %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr - llvm.store %c0, %0 : !llvm.ptr - acc.yield %0 : !llvm.ptr -} copy {} - -// ----- - // expected-error@+1 {{expects non-empty copy region}} acc.firstprivate.recipe @privatization_i32 : !llvm.ptr init { ^bb0(%arg0 : !llvm.ptr): From b3fbb67379a4e67d54d7693e88c05697d01a9a5f Mon Sep 17 00:00:00 2001 From: Aart Bik <39774503+aartbik@users.noreply.github.com> Date: Mon, 16 Oct 2023 12:55:09 -0700 Subject: [PATCH 255/720] [mlir][sparse] cleanup of COO (#69239) Moves typedef to only file where it is used. Removes some deadcode. Some minor doc changes. --- .../mlir/ExecutionEngine/SparseTensor/COO.h | 21 +++++-------------- .../ExecutionEngine/SparseTensor/Storage.h | 5 +++++ 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h index f6eb45defcc1c..721e9bc69adac 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h @@ -58,21 +58,13 @@ struct ElementLT final { const uint64_t rank; }; -/// The type of callback functions which receive an element. -template -using ElementConsumer = - const std::function &, V)> &; - /// A memory-resident sparse tensor in coordinate-scheme representation -/// (a collection of `Element`s). This data structure is used as -/// an intermediate representation; e.g., for reading sparse tensors -/// from external formats into memory, or for certain conversions between -/// different `SparseTensorStorage` formats. +/// (a collection of `Element`s). This data structure is used as an +/// intermediate representation, e.g., for reading sparse tensors from +/// external formats into memory. template class SparseTensorCOO final { public: - using const_iterator = typename std::vector>::const_iterator; - /// Constructs a new coordinate-scheme sparse tensor with the given /// sizes and an optional initial storage capacity. explicit SparseTensorCOO(const std::vector &dimSizes, @@ -106,7 +98,7 @@ class SparseTensorCOO final { /// Returns the `operator<` closure object for the COO's element type. ElementLT getElementLT() const { return ElementLT(getRank()); } - /// Adds an element to the tensor. This method invalidates all iterators. + /// Adds an element to the tensor. void add(const std::vector &dimCoords, V val) { const uint64_t *base = coordinates.data(); const uint64_t size = coordinates.size(); @@ -135,12 +127,9 @@ class SparseTensorCOO final { elements.push_back(addedElem); } - const_iterator begin() const { return elements.cbegin(); } - const_iterator end() const { return elements.cend(); } - /// Sorts elements lexicographically by coordinates. If a coordinate /// is mapped to multiple values, then the relative order of those - /// values is unspecified. This method invalidates all iterators. + /// values is unspecified. void sort() { if (isSorted) return; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 5e57facaf2376..c5be3d1acc337 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -37,6 +37,11 @@ namespace mlir { namespace sparse_tensor { +/// The type of callback functions which receive an element. +template +using ElementConsumer = + const std::function &, V)> &; + // Forward references. template class SparseTensorEnumeratorBase; From 8e2b2c4181506efc5b9321c203dd107bbd63392b Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 16 Oct 2023 12:50:29 -0700 Subject: [PATCH 256/720] [SLP]Fix PR69196: Instruction does not dominate all uses During emission of the postponed gathers, need to insert them before user instruction to avoid use before definition crash. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 2 +- .../non-scheduled-inst-reused-as-last-inst.ll | 45 +++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 6a9bdc26bc88f..1482d83bad4f6 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11214,7 +11214,7 @@ Value *BoUpSLP::vectorizeTree( TE->VectorizedValue = nullptr; auto *UserI = cast(TE->UserTreeIndices.front().UserTE->VectorizedValue); - Builder.SetInsertPoint(PrevVec); + Builder.SetInsertPoint(UserI); Builder.SetCurrentDebugLocation(UserI->getDebugLoc()); Value *Vec = vectorizeTree(TE); PrevVec->replaceAllUsesWith(Vec); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll new file mode 100644 index 0000000000000..3a9eca2bf2e6b --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-9999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define void @foo() { +; CHECK-LABEL: define void @foo() { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 0, i32 0 +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0 +; CHECK-NEXT: [[CALL:%.*]] = call i64 null(i32 [[TMP7]]) +; CHECK-NEXT: br label [[BB4]] +; CHECK: bb4: +; CHECK-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1]] +; CHECK: bb5: +; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP4]], [[BB4]] ] +; CHECK-NEXT: ret void +; +bb: + br label %bb1 + +bb1: + %phi = phi i32 [ 0, %bb ], [ %or, %bb4 ] + %phi2 = phi i32 [ 0, %bb ], [ %or3, %bb4 ] + %and = and i32 0, 0 + %shl = shl i32 %phi, %and + %or = or i32 %shl, 0 + %call = call i64 null(i32 %or) + %or3 = or i32 %phi2, 0 + br label %bb4 + +bb4: + br i1 false, label %bb5, label %bb1 + +bb5: + %phi6 = phi i32 [ %shl, %bb4 ] + %phi7 = phi i32 [ %or3, %bb4 ] + ret void +} From 528b5e6743db4307fada9e379f31e028132dae4d Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 16 Oct 2023 13:04:18 -0700 Subject: [PATCH 257/720] Revert "[gn build] Add rules for crtbegin/end (#66012)" This reverts commit e2e32f091a903a57c9fd8778c88488d32330ca6e. Breaks bots, e.g. http://45.33.8.238/linux/120748/step_4.txt --- .../gn/secondary/compiler-rt/lib/BUILD.gn | 5 +-- .../compiler-rt/lib/builtins/BUILD.gn | 32 ------------------- 2 files changed, 1 insertion(+), 36 deletions(-) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn index 398b95a06b805..d8c75a01c6945 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn @@ -5,10 +5,7 @@ group("lib") { "//compiler-rt/lib/cfi:ignorelist($host_toolchain)", ] if (current_os == "linux") { - deps += [ - "//compiler-rt/lib/builtins:crt", - "//compiler-rt/lib/msan", - ] + deps += [ "//compiler-rt/lib/msan" ] } if (current_os == "linux" || current_os == "android") { deps += [ "//compiler-rt/lib/ubsan_minimal" ] diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn index a45795d194c61..303a6c29d7b91 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn @@ -574,38 +574,6 @@ static_library("builtins") { deps = lse_targets } -if (current_os == "linux") { - source_set("crt_src") { - sources = [ - "crtbegin.c", - "crtend.c", - ] - cflags = [ - "-std=c11", - "-fPIC", - ] - } - copy("crtbegin") { - # TODO: use get_target_outputs if it ever works with source_set to avoid hardcoding crt_src.crtbegin.o - input_dir = get_label_info(":crt_src", "target_out_dir") - sources = [ "$input_dir/crt_src.crtbegin.o" ] - outputs = [ "$crt_current_out_dir/clang_rt.crtbegin.o" ] - deps = [ ":crt_src" ] - } - copy("crtend") { - input_dir = get_label_info(":crt_src", "target_out_dir") - sources = [ "$input_dir/crt_src.crtend.o" ] - outputs = [ "$crt_current_out_dir/clang_rt.crtend.o" ] - deps = [ ":crt_src" ] - } - group("crt") { - deps = [ - ":crtbegin", - ":crtend", - ] - } -} - # Currently unused but necessary to make sync_source_lists_from_cmake.py happy. source_set("_unused") { sources = [ From 4bf10f3da7ab32d70d5c7c43b7705c06c108d326 Mon Sep 17 00:00:00 2001 From: akirchhoff-modular Date: Mon, 16 Oct 2023 13:14:17 -0700 Subject: [PATCH 258/720] [YAMLTraits] Fix std::optional input on empty documents (#68947) When the input document is non-empty, `mapOptional` works as expected, setting `std::optional` to `std::nullopt` when the field is not present. When the input document is empty, we hit a special case inside of `Input::preflightKey` that results in `UseDefault = false`, which results in the `std::optional` erroneously being set to a non-nullopt value. `preflightKey` is changed to set `UseDefault = true` in this case to make the behavior consistent between empty and non-empty documents. --- llvm/lib/Support/YAMLTraits.cpp | 2 ++ llvm/unittests/Support/YAMLIOTest.cpp | 3 +++ 2 files changed, 5 insertions(+) diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp index 9325a09faaea0..4aaf59be2ce50 100644 --- a/llvm/lib/Support/YAMLTraits.cpp +++ b/llvm/lib/Support/YAMLTraits.cpp @@ -156,6 +156,8 @@ bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault, if (!CurrentNode) { if (Required) EC = make_error_code(errc::invalid_argument); + else + UseDefault = true; return false; } diff --git a/llvm/unittests/Support/YAMLIOTest.cpp b/llvm/unittests/Support/YAMLIOTest.cpp index 90c09ed7f79ee..745d743b2b244 100644 --- a/llvm/unittests/Support/YAMLIOTest.cpp +++ b/llvm/unittests/Support/YAMLIOTest.cpp @@ -2392,6 +2392,7 @@ TEST(YAMLIO, TestMalformedMapFailsGracefully) { struct OptionalTest { std::vector Numbers; + std::optional MaybeNumber; }; struct OptionalTestSeq { @@ -2405,6 +2406,7 @@ namespace yaml { struct MappingTraits { static void mapping(IO& IO, OptionalTest &OT) { IO.mapOptional("Numbers", OT.Numbers); + IO.mapOptional("MaybeNumber", OT.MaybeNumber); } }; @@ -2466,6 +2468,7 @@ TEST(YAMLIO, TestEmptyStringSucceedsForMapWithOptionalFields) { Input yin(""); yin >> doc; EXPECT_FALSE(yin.error()); + EXPECT_FALSE(doc.MaybeNumber.has_value()); } TEST(YAMLIO, TestEmptyStringSucceedsForSequence) { From 6f41510d4f4848ca4dde203d24bae26587be1f8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 16 Oct 2023 23:16:25 +0300 Subject: [PATCH 259/720] [llvm-rc] Accept filenames provided as multiple string literals (#68881) GNU windres supports this, while MS rc.exe doesn't. MS rc.exe only supports treating consecutive string literals as if they were fused into one in a few fixed locations (most of which are already supported), while GNU windres supports this essentially anywhere in any string. See b989fcbae6f179ad887d19ceef83ace1c00b87cc for one recent change that extended support for this in one specific resource. A reasonable use case for multiple concatenated string literals that GNU windres accepts is `1 ICON DIR "/name.ico"`, where the directory is provided via the preprocessor, expanding to another string literal; this is https://github.com/llvm/llvm-project/issues/51286. Extend the parser to try to consume all consecutive string tokens, whenever reading a filename. Adjust the handling of user data resources read from a file to use the readFilename() helper. While this probably doesn't cover every single case where GNU windres might accept concatenated string literals, this is the primary missing case that has been reported so far. --- llvm/test/tools/llvm-rc/Inputs/split-path.rc | 2 ++ llvm/test/tools/llvm-rc/split-path.test | 7 ++++++ llvm/tools/llvm-rc/ResourceScriptParser.cpp | 26 +++++++++++++++++--- llvm/tools/llvm-rc/ResourceScriptParser.h | 4 +++ 4 files changed, 35 insertions(+), 4 deletions(-) create mode 100644 llvm/test/tools/llvm-rc/Inputs/split-path.rc create mode 100644 llvm/test/tools/llvm-rc/split-path.test diff --git a/llvm/test/tools/llvm-rc/Inputs/split-path.rc b/llvm/test/tools/llvm-rc/Inputs/split-path.rc new file mode 100644 index 0000000000000..fb510e89698f7 --- /dev/null +++ b/llvm/test/tools/llvm-rc/Inputs/split-path.rc @@ -0,0 +1,2 @@ +100 ICON "subdir" "/icon-new.ico" +101 24 "subdir" "/empty.manifest" diff --git a/llvm/test/tools/llvm-rc/split-path.test b/llvm/test/tools/llvm-rc/split-path.test new file mode 100644 index 0000000000000..a12fd2bc32c11 --- /dev/null +++ b/llvm/test/tools/llvm-rc/split-path.test @@ -0,0 +1,7 @@ +; RUN: rm -rf %t +; RUN: mkdir %t +; RUN: cd %t +; RUN: mkdir subdir +; RUN: cp %p/Inputs/icon-new.ico subdir +; RUN: touch subdir/empty.manifest +; RUN: llvm-windres --no-preprocess %p/Inputs/split-path.rc %t/split-path.res diff --git a/llvm/tools/llvm-rc/ResourceScriptParser.cpp b/llvm/tools/llvm-rc/ResourceScriptParser.cpp index 9e1047448831b..4f02fa502d24f 100644 --- a/llvm/tools/llvm-rc/ResourceScriptParser.cpp +++ b/llvm/tools/llvm-rc/ResourceScriptParser.cpp @@ -238,7 +238,24 @@ Expected RCParser::readString() { Expected RCParser::readFilename() { if (!isNextTokenKind(Kind::String) && !isNextTokenKind(Kind::Identifier)) return getExpectedError("string"); - return read().value(); + const RCToken &Token = read(); + StringRef Str = Token.value(); + if (Token.kind() != Kind::String) + return Str; + while (isNextTokenKind(Kind::String)) { + const RCToken &NextToken = read(); + StringRef Next = NextToken.value(); + bool IsWide = Str.consume_front_insensitive("L"); + Next.consume_front_insensitive("L"); + bool StrUnquoted = Str.consume_front("\"") && Str.consume_back("\""); + bool NextUnquoted = Next.consume_front("\"") && Next.consume_back("\""); + assert(StrUnquoted && NextUnquoted); + (void)StrUnquoted; + (void)NextUnquoted; + + Str = Saver.save(Twine(IsWide ? "L" : "") + "\"" + Str + Next + "\""); + } + return Str; } Expected RCParser::readIdentifier() { @@ -499,9 +516,10 @@ RCParser::ParseType RCParser::parseUserDefinedResource(IntOrString Type) { // Check if this is a file resource. switch (look().kind()) { case Kind::String: - case Kind::Identifier: - return std::make_unique(Type, read().value(), - MemoryFlags); + case Kind::Identifier: { + ASSIGN_OR_RETURN(Filename, readFilename()); + return std::make_unique(Type, *Filename, MemoryFlags); + } default: break; } diff --git a/llvm/tools/llvm-rc/ResourceScriptParser.h b/llvm/tools/llvm-rc/ResourceScriptParser.h index 5c01cec0f151e..603afd8d73fb1 100644 --- a/llvm/tools/llvm-rc/ResourceScriptParser.h +++ b/llvm/tools/llvm-rc/ResourceScriptParser.h @@ -18,6 +18,7 @@ #include "ResourceScriptToken.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/StringSaver.h" #include "llvm/Support/raw_ostream.h" #include @@ -185,6 +186,9 @@ class RCParser { std::vector Tokens; LocIter CurLoc; const LocIter End; + + BumpPtrAllocator Alloc; + StringSaver Saver{Alloc}; }; } // namespace rc From 750c8e39de3c132f4600b0351cb743abbfd14fed Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Mon, 16 Oct 2023 13:23:31 -0700 Subject: [PATCH 260/720] =?UTF-8?q?[flang][runtime]=20Handle=20incomplete?= =?UTF-8?q?=20NAMELIST=20input=20derived=20type=20compon=E2=80=A6=20(#6683?= =?UTF-8?q?1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …ent list When a derived type value appears in NAMELIST input, its components' values appear in sequence. This sequence can be truncated by a NAME= that begins the next NAMELIST input item, or by the terminal '/' that ends the NAMELIST group. Extend the mechanism already in place for truncated array item lists in NAMELIST input so that it also applies to derived type component sequences, and rename things appropriately. --- flang/runtime/descriptor-io.h | 6 +++++- flang/runtime/io-stmt.h | 9 ++++----- flang/runtime/namelist.cpp | 11 +++++++---- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/flang/runtime/descriptor-io.h b/flang/runtime/descriptor-io.h index 840d73b8e857c..e20f31e9b4431 100644 --- a/flang/runtime/descriptor-io.h +++ b/flang/runtime/descriptor-io.h @@ -288,7 +288,11 @@ static bool DefaultComponentwiseIO(IoStatementState &io, *compArray.Element(at)}; if (!DefaultComponentIO

( io, component, descriptor, subscripts, handler, table)) { - return false; + // Truncated nonempty namelist input sequence? + auto *listInput{ + io.get_if>()}; + return DIR == Direction::Input && (j > 0 || k > 0) && listInput && + listInput->inNamelistSequence(); } } } diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h index fa432d07a680d..d4ceb83265246 100644 --- a/flang/runtime/io-stmt.h +++ b/flang/runtime/io-stmt.h @@ -295,8 +295,7 @@ template <> class ListDirectedStatementState : public FormattedIoStatementState { public: - bool inNamelistArray() const { return inNamelistArray_; } - void set_inNamelistArray(bool yes = true) { inNamelistArray_ = yes; } + bool inNamelistSequence() const { return inNamelistSequence_; } // Skips value separators, handles repetition and null values. // Vacant when '/' appears; present with descriptor == ListDirectedNullValue @@ -308,11 +307,11 @@ class ListDirectedStatementState // input statement. This member function resets some state so that // repetition and null values work correctly for each successive // NAMELIST input item. - void ResetForNextNamelistItem(bool inNamelistArray) { + void ResetForNextNamelistItem(bool inNamelistSequence) { remaining_ = 0; eatComma_ = false; realPart_ = imaginaryPart_ = false; - inNamelistArray_ = inNamelistArray; + inNamelistSequence_ = inNamelistSequence; } private: @@ -322,7 +321,7 @@ class ListDirectedStatementState bool hitSlash_{false}; // once '/' is seen, nullify further items bool realPart_{false}; bool imaginaryPart_{false}; - bool inNamelistArray_{false}; + bool inNamelistSequence_{false}; }; template diff --git a/flang/runtime/namelist.cpp b/flang/runtime/namelist.cpp index 1b3207ef2f932..61815a7cc8a40 100644 --- a/flang/runtime/namelist.cpp +++ b/flang/runtime/namelist.cpp @@ -522,15 +522,18 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) { } io.HandleRelativePosition(byteCount); // Read the values into the descriptor. An array can be short. - listInput->ResetForNextNamelistItem(useDescriptor->rank() > 0); if (const auto *addendum{useDescriptor->Addendum()}; addendum && addendum->derivedType()) { const NonTbpDefinedIoTable *table{group.nonTbpDefinedIo}; + listInput->ResetForNextNamelistItem(/*inNamelistSequence=*/true); if (!IONAME(InputDerivedType)(cookie, *useDescriptor, table)) { return false; } - } else if (!descr::DescriptorIO(io, *useDescriptor)) { - return false; + } else { + listInput->ResetForNextNamelistItem(useDescriptor->rank() > 0); + if (!descr::DescriptorIO(io, *useDescriptor)) { + return false; + } } next = io.GetNextNonBlank(byteCount); if (next && *next == comma) { @@ -549,7 +552,7 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) { bool IsNamelistNameOrSlash(IoStatementState &io) { if (auto *listInput{ io.get_if>()}) { - if (listInput->inNamelistArray()) { + if (listInput->inNamelistSequence()) { SavedPosition savedPosition{io}; std::size_t byteCount{0}; if (auto ch{io.GetNextNonBlank(byteCount)}) { From 119b0f3895688173e262aaceaf90be8b303194f3 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 16 Oct 2023 13:28:48 -0700 Subject: [PATCH 261/720] Revert "[SLP]Fix PR69196: Instruction does not dominate all uses" This reverts commit 8e2b2c4181506efc5b9321c203dd107bbd63392b to fix a crash reported in https://lab.llvm.org/buildbot/#/builders/230/builds/19993. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 2 +- .../non-scheduled-inst-reused-as-last-inst.ll | 45 ------------------- 2 files changed, 1 insertion(+), 46 deletions(-) delete mode 100644 llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1482d83bad4f6..6a9bdc26bc88f 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11214,7 +11214,7 @@ Value *BoUpSLP::vectorizeTree( TE->VectorizedValue = nullptr; auto *UserI = cast(TE->UserTreeIndices.front().UserTE->VectorizedValue); - Builder.SetInsertPoint(UserI); + Builder.SetInsertPoint(PrevVec); Builder.SetCurrentDebugLocation(UserI->getDebugLoc()); Value *Vec = vectorizeTree(TE); PrevVec->replaceAllUsesWith(Vec); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll deleted file mode 100644 index 3a9eca2bf2e6b..0000000000000 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll +++ /dev/null @@ -1,45 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-9999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s - -define void @foo() { -; CHECK-LABEL: define void @foo() { -; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 0, i32 0 -; CHECK-NEXT: br label [[BB1:%.*]] -; CHECK: bb1: -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP1]], [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP1]], <2 x i32> -; CHECK-NEXT: [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0 -; CHECK-NEXT: [[CALL:%.*]] = call i64 null(i32 [[TMP7]]) -; CHECK-NEXT: br label [[BB4]] -; CHECK: bb4: -; CHECK-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1]] -; CHECK: bb5: -; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP4]], [[BB4]] ] -; CHECK-NEXT: ret void -; -bb: - br label %bb1 - -bb1: - %phi = phi i32 [ 0, %bb ], [ %or, %bb4 ] - %phi2 = phi i32 [ 0, %bb ], [ %or3, %bb4 ] - %and = and i32 0, 0 - %shl = shl i32 %phi, %and - %or = or i32 %shl, 0 - %call = call i64 null(i32 %or) - %or3 = or i32 %phi2, 0 - br label %bb4 - -bb4: - br i1 false, label %bb5, label %bb1 - -bb5: - %phi6 = phi i32 [ %shl, %bb4 ] - %phi7 = phi i32 [ %or3, %bb4 ] - ret void -} From 8a47ad4b67edfe0f1e5a84742cbbd6fee975a1dc Mon Sep 17 00:00:00 2001 From: michaelrj-google <71531609+michaelrj-google@users.noreply.github.com> Date: Mon, 16 Oct 2023 13:32:34 -0700 Subject: [PATCH 262/720] [libc] Add simple long double to printf float fuzz (#68449) Recent testing has uncovered some hard-to-find bugs in printf's long double support. This patch adds an extra long double path to the fuzzer with minimal extra effort. While a more thorough long double fuzzer would be useful, it would need to handle the non-standard cases of 80 bit long doubles such as unnormal and pseudo-denormal numbers. For that reason, a standalone long double fuzzer is left for future development. --- libc/fuzzing/stdio/printf_float_conv_fuzz.cpp | 30 +++++++++++++++---- .../stdio/printf_core/float_hex_converter.h | 5 ++-- libc/test/src/stdio/sprintf_test.cpp | 3 ++ 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/libc/fuzzing/stdio/printf_float_conv_fuzz.cpp b/libc/fuzzing/stdio/printf_float_conv_fuzz.cpp index dd3902eebda61..798e1a3866fdd 100644 --- a/libc/fuzzing/stdio/printf_float_conv_fuzz.cpp +++ b/libc/fuzzing/stdio/printf_float_conv_fuzz.cpp @@ -29,6 +29,14 @@ inline bool simple_streq(char *first, char *second, int length) { return true; } +inline int simple_strlen(const char *str) { + int i = 0; + for (; *str; ++str, ++i) { + ; + } + return i; +} + enum class TestResult { Success, BufferSizeFailed, @@ -36,7 +44,8 @@ enum class TestResult { StringsNotEqual, }; -inline TestResult test_vals(const char *fmt, double num, int prec, int width) { +template +inline TestResult test_vals(const char *fmt, F num, int prec, int width) { // Call snprintf on a nullptr to get the buffer size. int buffer_size = LIBC_NAMESPACE::snprintf(nullptr, 0, fmt, width, prec, num); @@ -70,10 +79,7 @@ inline TestResult test_vals(const char *fmt, double num, int prec, int width) { } constexpr char const *fmt_arr[] = { - "%*.*f", - "%*.*e", - "%*.*g", - "%*.*a", + "%*.*f", "%*.*e", "%*.*g", "%*.*a", "%*.*Lf", "%*.*Le", "%*.*Lg", "%*.*La", }; extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { @@ -100,6 +106,12 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { num = LIBC_NAMESPACE::fputil::FPBits(raw_num).get_val(); + // While we could create a "ld_raw_num" from additional bytes, it's much + // easier to stick with simply casting num to long double. This avoids the + // issues around 80 bit long doubles, especially unnormal and pseudo-denormal + // numbers, which MPFR doesn't handle well. + long double ld_num = static_cast(num); + if (width > MAX_SIZE) { width = MAX_SIZE; } else if (width < -MAX_SIZE) { @@ -114,7 +126,13 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { for (size_t cur_fmt = 0; cur_fmt < sizeof(fmt_arr) / sizeof(char *); ++cur_fmt) { - TestResult result = test_vals(fmt_arr[cur_fmt], num, prec, width); + int fmt_len = simple_strlen(fmt_arr[cur_fmt]); + TestResult result; + if (fmt_arr[cur_fmt][fmt_len - 2] == 'L') { + result = test_vals(fmt_arr[cur_fmt], ld_num, prec, width); + } else { + result = test_vals(fmt_arr[cur_fmt], num, prec, width); + } if (result != TestResult::Success) { __builtin_trap(); } diff --git a/libc/src/stdio/printf_core/float_hex_converter.h b/libc/src/stdio/printf_core/float_hex_converter.h index e264af9844bd2..6a980a74d4a6f 100644 --- a/libc/src/stdio/printf_core/float_hex_converter.h +++ b/libc/src/stdio/printf_core/float_hex_converter.h @@ -75,8 +75,9 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, // This is to handle situations where the mantissa isn't an even number of hex // digits. This is primarily relevant for x86 80 bit long doubles, which have - // 63 bit mantissas. - if (mantissa_width % BITS_IN_HEX_DIGIT != 0) { + // 63 bit mantissas. In the case where the mantissa is 0, however, the + // exponent should stay as 0. + if (mantissa_width % BITS_IN_HEX_DIGIT != 0 && mantissa > 0) { exponent -= mantissa_width % BITS_IN_HEX_DIGIT; } diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp index b7e8b75485881..f3d5dd698cbea 100644 --- a/libc/test/src/stdio/sprintf_test.cpp +++ b/libc/test/src/stdio/sprintf_test.cpp @@ -748,6 +748,9 @@ TEST_F(LlvmLibcSPrintfTest, FloatHexExpConv) { written = LIBC_NAMESPACE::sprintf(buff, "%.5a", nan); ASSERT_STREQ_LEN(written, buff, "nan"); + written = LIBC_NAMESPACE::sprintf(buff, "%La", 0.0L); + ASSERT_STREQ_LEN(written, buff, "0x0p+0"); + written = LIBC_NAMESPACE::sprintf(buff, "%.1La", 0.1L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0xc.dp-7"); From b7de1d07e5298bdd97816043360ea334378f5565 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 16 Oct 2023 15:39:53 -0500 Subject: [PATCH 263/720] [Clang][NFC] Use correct tool name for NVIDIA's 'nvlink' Summary: This step was incorrectly called 'fatbinary', so if it failed here it would say 'fatbinary' was the cause of the failure. This is actually 'nvlink' so we should adjust this. --- clang/lib/Driver/ToolChains/Cuda.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h index 39df6e06fb26d..f7c0c7ea1c98c 100644 --- a/clang/lib/Driver/ToolChains/Cuda.h +++ b/clang/lib/Driver/ToolChains/Cuda.h @@ -110,7 +110,7 @@ class LLVM_LIBRARY_VISIBILITY FatBinary : public Tool { // Runs nvlink, which links GPU object files ("cubin" files) into a single file. class LLVM_LIBRARY_VISIBILITY Linker : public Tool { public: - Linker(const ToolChain &TC) : Tool("NVPTX::Linker", "fatbinary", TC) {} + Linker(const ToolChain &TC) : Tool("NVPTX::Linker", "nvlink", TC) {} bool hasIntegratedCPP() const override { return false; } From 511236e07436c7469c63b4a23610439f0a2405c6 Mon Sep 17 00:00:00 2001 From: Hui Date: Mon, 16 Oct 2023 21:49:37 +0100 Subject: [PATCH 264/720] [libc++][test] Add `stop_token` benchmark (#69117) This is transforming the `stop_token` benchmark that Lewis Baker had created into Google Bench https://reviews.llvm.org/D154702 --- libcxx/benchmarks/CMakeLists.txt | 1 + libcxx/benchmarks/stop_token.bench.cpp | 108 +++++++++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 libcxx/benchmarks/stop_token.bench.cpp diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt index 80b2663fd8086..7591f34d938bf 100644 --- a/libcxx/benchmarks/CMakeLists.txt +++ b/libcxx/benchmarks/CMakeLists.txt @@ -213,6 +213,7 @@ set(BENCHMARK_TESTS map.bench.cpp monotonic_buffer.bench.cpp ordered_set.bench.cpp + stop_token.bench.cpp std_format_spec_string_unicode.bench.cpp string.bench.cpp stringstream.bench.cpp diff --git a/libcxx/benchmarks/stop_token.bench.cpp b/libcxx/benchmarks/stop_token.bench.cpp new file mode 100644 index 0000000000000..293d55ed82a08 --- /dev/null +++ b/libcxx/benchmarks/stop_token.bench.cpp @@ -0,0 +1,108 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "make_test_thread.h" + +using namespace std::chrono_literals; + +// We have a single thread created by std::jthread consuming the stop_token: +// registering/deregistering callbacks, one at a time. +void BM_stop_token_single_thread_reg_unreg_callback(benchmark::State& state) { + auto thread_func = [&](std::stop_token st, std::atomic* reg_count) { + while (!st.stop_requested()) { + std::stop_callback cb{st, [&]() noexcept {}}; + benchmark::DoNotOptimize(cb); + reg_count->fetch_add(1, std::memory_order_relaxed); + } + }; + + std::atomic reg_count(0); + std::uint64_t total_reg_test_param = state.range(0); + + auto thread = support::make_test_jthread(thread_func, ®_count); + + for (auto _ : state) { + auto start_total = reg_count.load(std::memory_order_relaxed); + + while (reg_count.load(std::memory_order_relaxed) - start_total < total_reg_test_param) { + std::this_thread::yield(); + } + } +} +BENCHMARK(BM_stop_token_single_thread_reg_unreg_callback)->RangeMultiplier(2)->Range(1 << 10, 1 << 24); + +// At startup, it creates a single stop_source which it will then pass an associated stop_token to every +// request. +// +// Assume a thread-pool handles these requests and for each request it polls for stop_requested(), then attaches a +// stop-callback, does some work, then detaches the stop-callback some time later. The lifetime of requests/callbacks +// would overlap with other requests/callback from the same thread. +// +// Say something like each thread keeping a circular buffer of N stop-callbacks and destroying the stop-callbacks in +// FIFO order +void BM_stop_token_async_reg_unreg_callback(benchmark::State& state) { + struct dummy_stop_callback { + void operator()() const noexcept {} + }; + + constexpr size_t thread_count = 20; + constexpr size_t concurrent_request_count = 1000; + std::atomic start{false}; + + std::uint64_t total_reg_test_param = state.range(0); + + std::stop_source ss; + std::vector threads; + threads.reserve(thread_count); + std::vector> reg_counts(thread_count); + + auto thread_func = [&start](std::atomic* count, std::stop_token st) { + std::vector>> cbs(concurrent_request_count); + + start.wait(false); + + std::uint32_t index = 0; + while (!st.stop_requested()) { + cbs[index].emplace(st, dummy_stop_callback{}); + index = (index + 1) % concurrent_request_count; + count->fetch_add(1, std::memory_order_relaxed); + } + }; + + for (size_t i = 0; i < thread_count; ++i) { + threads.emplace_back(support::make_test_jthread(thread_func, ®_counts[i], ss.get_token())); + } + + auto get_total_reg = [&] { + std::uint64_t total = 0; + for (const auto& reg_counts : reg_counts) { + total += reg_counts.load(std::memory_order_relaxed); + } + return total; + }; + + start = true; + start.notify_all(); + + for (auto _ : state) { + auto start_total = get_total_reg(); + + while (get_total_reg() - start_total < total_reg_test_param) { + std::this_thread::yield(); + } + } + + ss.request_stop(); +} +BENCHMARK(BM_stop_token_async_reg_unreg_callback)->RangeMultiplier(2)->Range(1 << 10, 1 << 24); + +BENCHMARK_MAIN(); From c5b617c5e53d7af81c621d200e2cd03324538541 Mon Sep 17 00:00:00 2001 From: Piotr Zegar Date: Mon, 16 Oct 2023 20:03:52 +0000 Subject: [PATCH 265/720] [clang-tidy][NFC] Clarify documentation for misc-definitions-in-headers Add information about what fixes are provided by the check, and how to enable them. Issue: #55093 --- .../docs/clang-tidy/checks/misc/definitions-in-headers.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc/definitions-in-headers.rst b/clang-tools-extra/docs/clang-tidy/checks/misc/definitions-in-headers.rst index 08aa9d884c239..9c90bf10217f4 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/misc/definitions-in-headers.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/misc/definitions-in-headers.rst @@ -88,6 +88,11 @@ from multiple translation units. template constexpr T pi = T(3.1415926L); +When :program:`clang-tidy` is invoked with the `--fix-notes` option, this check +provides fixes that automatically add the ``inline`` keyword to discovered +functions. Please note that the addition of the ``inline`` keyword to variables +is not currently supported by this check. + Options ------- From ea7e50cdf2f531d323a564590a22c7bb6e11aa3a Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Mon, 16 Oct 2023 13:56:07 -0700 Subject: [PATCH 266/720] [flang][runtime] Implement EX editing for input & output (#67208) Support the EX edit descriptor for hexadecimal real formatted output and hexadecimal real input for all forms of formatted input.. (We're possibly the first Fortran compiler to support this feature for input editing; only one other can handle EX output editing.) As true (not BOZ) hexadecimal floating-point constants are not supported in Fortran source code, only in formatted input, the implementation takes place in the I/O editing portion of the runtime, not as new conversions in the Decimal library. --- flang/include/flang/Common/real.h | 7 + .../flang/Decimal/binary-floating-point.h | 59 ++++ flang/include/flang/Decimal/decimal.h | 8 - flang/runtime/edit-input.cpp | 319 +++++++++++++----- flang/runtime/edit-output.cpp | 165 ++++++++- flang/runtime/edit-output.h | 24 +- .../unittests/Runtime/NumericalFormatTest.cpp | 62 +++- 7 files changed, 510 insertions(+), 134 deletions(-) diff --git a/flang/include/flang/Common/real.h b/flang/include/flang/Common/real.h index 036f665d3da61..50aab7d89a597 100644 --- a/flang/include/flang/Common/real.h +++ b/flang/include/flang/Common/real.h @@ -63,6 +63,10 @@ static constexpr int MaxDecimalConversionDigits(int binaryPrecision) { } } +static constexpr int MaxHexadecimalConversionDigits(int binaryPrecision) { + return binaryPrecision >= 0 ? (binaryPrecision + 3) / 4 : binaryPrecision; +} + static constexpr int RealKindForPrecision(int binaryPrecision) { switch (binaryPrecision) { case 8: // IEEE single (truncated): 1+8+7 with implicit bit @@ -132,6 +136,9 @@ template class RealDetails { static constexpr int maxDecimalConversionDigits{ MaxDecimalConversionDigits(binaryPrecision)}; + static constexpr int maxHexadecimalConversionDigits{ + MaxHexadecimalConversionDigits(binaryPrecision)}; + static_assert(binaryPrecision > 0); static_assert(exponentBits > 1); static_assert(exponentBits <= 15); diff --git a/flang/include/flang/Decimal/binary-floating-point.h b/flang/include/flang/Decimal/binary-floating-point.h index 28346e71828fd..b9346a8585e2d 100644 --- a/flang/include/flang/Decimal/binary-floating-point.h +++ b/flang/include/flang/Decimal/binary-floating-point.h @@ -21,10 +21,19 @@ namespace Fortran::decimal { +enum FortranRounding { + RoundNearest, /* RN and RP */ + RoundUp, /* RU */ + RoundDown, /* RD */ + RoundToZero, /* RZ - no rounding */ + RoundCompatible, /* RC: like RN, but ties go away from 0 */ +}; + template class BinaryFloatingPointNumber : public common::RealDetails { public: using Details = common::RealDetails; + using Details::binaryPrecision; using Details::bits; using Details::decimalPrecision; using Details::decimalRange; @@ -33,6 +42,7 @@ class BinaryFloatingPointNumber : public common::RealDetails { using Details::isImplicitMSB; using Details::maxDecimalConversionDigits; using Details::maxExponent; + using Details::maxHexadecimalConversionDigits; using Details::significandBits; using RawType = common::HostUnsignedIntType; @@ -120,6 +130,55 @@ class BinaryFloatingPointNumber : public common::RealDetails { InsertExplicitMSB(); } + static constexpr BinaryFloatingPointNumber Infinity(bool isNegative) { + RawType result{RawType{maxExponent} << significandBits}; + if (isNegative) { + result |= RawType{1} << (bits - 1); + } + return BinaryFloatingPointNumber{result}; + } + + // Returns true when the result is exact + constexpr bool RoundToBits(int keepBits, enum FortranRounding mode) { + if (IsNaN() || IsInfinite() || keepBits >= binaryPrecision) { + return true; + } + int lostBits{binaryPrecision - keepBits}; + RawType lostMask{static_cast((RawType{1} << lostBits) - 1)}; + if (RawType lost{static_cast(raw_ & lostMask)}; lost != 0) { + bool increase{false}; + switch (mode) { + case RoundNearest: + if (lost >> (lostBits - 1) != 0) { // >= tie + if ((lost & (lostMask >> 1)) != 0) { + increase = true; // > tie + } else { + increase = ((raw_ >> lostBits) & 1) != 0; // tie to even + } + } + break; + case RoundUp: + increase = !IsNegative(); + break; + case RoundDown: + increase = IsNegative(); + break; + case RoundToZero: + break; + case RoundCompatible: + increase = lost >> (lostBits - 1) != 0; // >= tie + break; + } + if (increase) { + raw_ |= lostMask; + Next(); + } + return false; // inexact + } else { + return true; // exact + } + } + private: constexpr void RemoveExplicitMSB() { if constexpr (!isImplicitMSB) { diff --git a/flang/include/flang/Decimal/decimal.h b/flang/include/flang/Decimal/decimal.h index b9ac6b71cd03a..a4e0ee7c84746 100644 --- a/flang/include/flang/Decimal/decimal.h +++ b/flang/include/flang/Decimal/decimal.h @@ -43,14 +43,6 @@ struct ConversionToDecimalResult { enum ConversionResultFlags flags; }; -enum FortranRounding { - RoundNearest, /* RN and RP */ - RoundUp, /* RU */ - RoundDown, /* RD */ - RoundToZero, /* RZ - no rounding */ - RoundCompatible, /* RC: like RN, but ties go away from 0 */ -}; - /* The "minimize" flag causes the fewest number of output digits * to be emitted such that reading them back into the same binary * floating-point format with RoundNearest will return the same diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp index 1861c9f8499b0..4e8c9aa868a69 100644 --- a/flang/runtime/edit-input.cpp +++ b/flang/runtime/edit-input.cpp @@ -125,7 +125,7 @@ static bool EditBOZInput( return CheckCompleteListDirectedField(io, edit); } -static inline char32_t GetDecimalPoint(const DataEdit &edit) { +static inline char32_t GetRadixPointChar(const DataEdit &edit) { return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'}; } @@ -229,17 +229,22 @@ bool EditIntegerInput( // Parses a REAL input number from the input source as a normalized // fraction into a supplied buffer -- there's an optional '-', a -// decimal point, and at least one digit. The adjusted exponent value -// is returned in a reference argument. The returned value is the number -// of characters that (should) have been written to the buffer -- this can -// be larger than the buffer size and can indicate overflow. Replaces -// blanks with zeroes if appropriate. -static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io, - const DataEdit &edit, int &exponent) { +// decimal point when the input is not hexadecimal, and at least one +// digit. Replaces blanks with zeroes where appropriate. +struct ScannedRealInput { + // Number of characters that (should) have been written to the + // buffer -- this can be larger than the buffer size, which + // indicates buffer overflow. Zero indicates an error. + int got{0}; + int exponent{0}; // adjusted as necessary; binary if isHexadecimal + bool isHexadecimal{false}; // 0X... +}; +static ScannedRealInput ScanRealInput( + char *buffer, int bufferSize, IoStatementState &io, const DataEdit &edit) { std::optional remaining; std::optional next; int got{0}; - std::optional decimalPoint; + std::optional radixPointOffset; auto Put{[&](char ch) -> void { if (got < bufferSize) { buffer[got] = ch; @@ -251,6 +256,7 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io, Put('-'); } bool bzMode{(edit.modes.editingFlags & blankZero) != 0}; + int exponent{0}; if (!next || (!bzMode && *next == ' ')) { if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) { // An empty/blank field means zero when not list-directed. @@ -259,10 +265,11 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io, // required to pass FCVS. Put('0'); } - return got; + return {got, exponent, false}; } - char32_t decimal{GetDecimalPoint(edit)}; + char32_t radixPointChar{GetRadixPointChar(edit)}; char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next}; + bool isHexadecimal{false}; if (first == 'N' || first == 'I') { // NaN or infinity - convert to upper case // Subtle: a blank field of digits could be followed by 'E' or 'D', @@ -283,7 +290,7 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io, if (depth == 0) { break; } else if (!next) { - return 0; // error + return {}; // error } else if (*next == '(') { ++depth; } else if (*next == ')') { @@ -292,34 +299,51 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io, Put(*next); } } - exponent = 0; - } else if (first == decimal || (first >= '0' && first <= '9') || + } else if (first == radixPointChar || (first >= '0' && first <= '9') || (bzMode && (first == ' ' || first == '\t')) || first == 'E' || first == 'D' || first == 'Q') { - Put('.'); // input field is normalized to a fraction + if (first == '0') { + next = io.NextInField(remaining, edit); + if (next && (*next == 'x' || *next == 'X')) { // 0X... + isHexadecimal = true; + next = io.NextInField(remaining, edit); + } else { + Put('0'); + } + } + // input field is normalized to a fraction + if (!isHexadecimal) { + Put('.'); + } auto start{got}; for (; next; next = io.NextInField(remaining, edit)) { char32_t ch{*next}; if (ch == ' ' || ch == '\t') { - if (bzMode) { + if (isHexadecimal) { + return {}; // error + } else if (bzMode) { ch = '0'; // BZ mode - treat blank as if it were zero } else { - continue; + continue; // ignore blank in fixed field } } - if (ch == '0' && got == start && !decimalPoint) { - // omit leading zeroes before the decimal + if (ch == '0' && got == start && !radixPointOffset) { + // omit leading zeroes before the radix point } else if (ch >= '0' && ch <= '9') { Put(ch); - } else if (ch == decimal && !decimalPoint) { - // the decimal point is *not* copied to the buffer - decimalPoint = got - start; // # of digits before the decimal point + } else if (ch == radixPointChar && !radixPointOffset) { + // The radix point character is *not* copied to the buffer. + radixPointOffset = got - start; // # of digits before the radix point + } else if (isHexadecimal && ch >= 'A' && ch <= 'F') { + Put(ch); + } else if (isHexadecimal && ch >= 'a' && ch <= 'f') { + Put(ch - 'a' + 'A'); // normalize to capitals } else { break; } } if (got == start) { - // Nothing but zeroes and maybe a decimal point. F'2018 requires + // Nothing but zeroes and maybe a radix point. F'2018 requires // at least one digit, but F'77 did not, and a bare "." shows up in // the FCVS suite. Put('0'); // emit at least one digit @@ -328,17 +352,22 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io, auto nextBeforeExponent{next}; auto startExponent{io.GetConnectionState().positionInRecord}; bool hasGoodExponent{false}; - if (next && - (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' || - *next == 'q' || *next == 'Q')) { - // Optional exponent letter. Blanks are allowed between the - // optional exponent letter and the exponent value. - io.SkipSpaces(remaining); - next = io.NextInField(remaining, edit); + if (next) { + if (isHexadecimal) { + if (*next == 'p' || *next == 'P') { + next = io.NextInField(remaining, edit); + } else { + // The binary exponent is not optional in the standard. + return {}; // error + } + } else if (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' || + *next == 'q' || *next == 'Q') { + // Optional exponent letter. Blanks are allowed between the + // optional exponent letter and the exponent value. + io.SkipSpaces(remaining); + next = io.NextInField(remaining, edit); + } } - // The default exponent is -kP, but the scale factor doesn't affect - // an explicit exponent. - exponent = -edit.modes.scale; if (next && (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') || *next == ' ' || *next == '\t')) { @@ -346,14 +375,16 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io, if (negExpo || *next == '+') { next = io.NextInField(remaining, edit); } - for (exponent = 0; next; next = io.NextInField(remaining, edit)) { + for (; next; next = io.NextInField(remaining, edit)) { if (*next >= '0' && *next <= '9') { hasGoodExponent = true; if (exponent < 10000) { exponent = 10 * exponent + *next - '0'; } } else if (*next == ' ' || *next == '\t') { - if (bzMode) { + if (isHexadecimal) { + break; + } else if (bzMode) { hasGoodExponent = true; exponent = 10 * exponent; } @@ -366,23 +397,29 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io, } } if (!hasGoodExponent) { + if (isHexadecimal) { + return {}; // error + } // There isn't a good exponent; do not consume it. next = nextBeforeExponent; io.HandleAbsolutePosition(startExponent); - } - if (decimalPoint) { - exponent += *decimalPoint; + // The default exponent is -kP, but the scale factor doesn't affect + // an explicit exponent. + exponent = -edit.modes.scale; + } + // Adjust exponent by number of digits before the radix point. + if (isHexadecimal) { + // Exponents for hexadecimal input are binary. + exponent += radixPointOffset.value_or(got - start) * 4; + } else if (radixPointOffset) { + exponent += *radixPointOffset; } else { - // When no decimal point (or comma) appears in the value, the 'd' + // When no redix point (or comma) appears in the value, the 'd' // part of the edit descriptor must be interpreted as the number of // digits in the value to be interpreted as being to the *right* of - // the assumed decimal point (13.7.2.3.2) + // the assumed radix point (13.7.2.3.2) exponent += got - start - edit.digits.value_or(0); } - } else { - // TODO: hex FP input - exponent = 0; - return 0; } // Consume the trailing ')' of a list-directed or NAMELIST complex // input value. @@ -403,10 +440,10 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io, next = io.NextInField(remaining, edit); } if (next) { - return 0; // error: unused nonblank character in fixed-width field + return {}; // error: unused nonblank character in fixed-width field } } - return got; + return {got, exponent, isHexadecimal}; } static void RaiseFPExceptions(decimal::ConversionResultFlags flags) { @@ -433,7 +470,7 @@ static void RaiseFPExceptions(decimal::ConversionResultFlags flags) { // converter without modification, this fast path for real input // saves time by avoiding memory copies and reformatting of the exponent. template -static bool TryFastPathRealInput( +static bool TryFastPathRealDecimalInput( IoStatementState &io, const DataEdit &edit, void *n) { if (edit.modes.editingFlags & (blankZero | decimalComma)) { return false; @@ -504,10 +541,103 @@ static bool TryFastPathRealInput( return true; } +template +decimal::ConversionToBinaryResult ConvertHexadecimal( + const char *&p, enum decimal::FortranRounding rounding, int expo) { + using RealType = decimal::BinaryFloatingPointNumber; + using RawType = typename RealType::RawType; + bool isNegative{*p == '-'}; + constexpr RawType one{1}; + RawType signBit{0}; + if (isNegative) { + ++p; + signBit = one << (RealType::bits - 1); + } + RawType fraction{0}; + // Adjust the incoming binary P+/- exponent to shift the radix point + // to below the LSB and add in the bias. + expo += binaryPrecision - 1 + RealType::exponentBias; + // Input the fraction. + int roundingBit{0}; + int guardBit{0}; + for (; *p; ++p) { + fraction <<= 4; + expo -= 4; + if (*p >= '0' && *p <= '9') { + fraction |= *p - '0'; + } else if (*p >= 'A' && *p <= 'F') { + fraction |= *p - 'A' + 10; // data were normalized to capitals + } else { + break; + } + while (fraction >> binaryPrecision) { + guardBit |= roundingBit; + roundingBit = (int)fraction & 1; + fraction >>= 1; + ++expo; + } + } + if (fraction) { + // Boost biased expo if too small + while (expo < 1) { + guardBit |= roundingBit; + roundingBit = (int)fraction & 1; + fraction >>= 1; + ++expo; + } + // Normalize + while (expo > 1 && !(fraction >> (binaryPrecision - 1))) { + fraction <<= 1; + --expo; + } + // Rounding + bool increase{false}; + switch (rounding) { + case decimal::RoundNearest: // RN & RP + increase = roundingBit && (guardBit | ((int)fraction & 1)); + break; + case decimal::RoundUp: // RU + increase = !isNegative && (roundingBit | guardBit); + break; + case decimal::RoundDown: // RD + increase = isNegative && (roundingBit | guardBit); + break; + case decimal::RoundToZero: // RZ + break; + case decimal::RoundCompatible: // RC + increase = roundingBit != 0; + break; + } + if (increase) { + ++fraction; + if (fraction >> binaryPrecision) { + fraction >>= 1; + ++expo; + } + } + } + // Package & return result + constexpr RawType significandMask{(one << RealType::significandBits) - 1}; + if (!fraction) { + expo = 0; + } else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) { + expo = 0; // subnormal + } else if (expo >= RealType::maxExponent) { + expo = RealType::maxExponent; // +/-Inf + fraction = 0; + } else { + fraction &= significandMask; // remove explicit normalization unless x87 + } + return decimal::ConversionToBinaryResult{ + RealType{static_cast(signBit | + static_cast(expo) << RealType::significandBits | fraction)}, + (roundingBit | guardBit) ? decimal::Inexact : decimal::Exact}; +} + template bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) { constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)}; - if (TryFastPathRealInput(io, edit, n)) { + if (TryFastPathRealDecimalInput(io, edit, n)) { return CheckCompleteListDirectedField(io, edit); } // Fast path wasn't available or didn't work; go the more general route @@ -515,8 +645,8 @@ bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) { common::MaxDecimalConversionDigits(binaryPrecision)}; static constexpr int bufferSize{maxDigits + 18}; char buffer[bufferSize]; - int exponent{0}; - int got{ScanRealInput(buffer, maxDigits + 2, io, edit, exponent)}; + auto scanned{ScanRealInput(buffer, maxDigits + 2, io, edit)}; + int got{scanned.got}; if (got >= maxDigits + 2) { io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small"); return false; @@ -529,48 +659,55 @@ bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) { static_cast(connection.currentRecordNumber)); return false; } - bool hadExtra{got > maxDigits}; - if (exponent != 0) { - buffer[got++] = 'e'; - if (exponent < 0) { - buffer[got++] = '-'; - exponent = -exponent; - } - if (exponent > 9999) { - exponent = 9999; // will convert to +/-Inf - } - if (exponent > 999) { - int dig{exponent / 1000}; - buffer[got++] = '0' + dig; - int rest{exponent - 1000 * dig}; - dig = rest / 100; - buffer[got++] = '0' + dig; - rest -= 100 * dig; - dig = rest / 10; - buffer[got++] = '0' + dig; - buffer[got++] = '0' + (rest - 10 * dig); - } else if (exponent > 99) { - int dig{exponent / 100}; - buffer[got++] = '0' + dig; - int rest{exponent - 100 * dig}; - dig = rest / 10; - buffer[got++] = '0' + dig; - buffer[got++] = '0' + (rest - 10 * dig); - } else if (exponent > 9) { - int dig{exponent / 10}; - buffer[got++] = '0' + dig; - buffer[got++] = '0' + (exponent - 10 * dig); - } else { - buffer[got++] = '0' + exponent; - } - } - buffer[got] = '\0'; + decimal::ConversionToBinaryResult converted; const char *p{buffer}; - decimal::ConversionToBinaryResult converted{ - decimal::ConvertToBinary(p, edit.modes.round)}; - if (hadExtra) { - converted.flags = static_cast( - converted.flags | decimal::Inexact); + if (scanned.isHexadecimal) { + buffer[got] = '\0'; + converted = ConvertHexadecimal( + p, edit.modes.round, scanned.exponent); + } else { + bool hadExtra{got > maxDigits}; + int exponent{scanned.exponent}; + if (exponent != 0) { + buffer[got++] = 'e'; + if (exponent < 0) { + buffer[got++] = '-'; + exponent = -exponent; + } + if (exponent > 9999) { + exponent = 9999; // will convert to +/-Inf + } + if (exponent > 999) { + int dig{exponent / 1000}; + buffer[got++] = '0' + dig; + int rest{exponent - 1000 * dig}; + dig = rest / 100; + buffer[got++] = '0' + dig; + rest -= 100 * dig; + dig = rest / 10; + buffer[got++] = '0' + dig; + buffer[got++] = '0' + (rest - 10 * dig); + } else if (exponent > 99) { + int dig{exponent / 100}; + buffer[got++] = '0' + dig; + int rest{exponent - 100 * dig}; + dig = rest / 10; + buffer[got++] = '0' + dig; + buffer[got++] = '0' + (rest - 10 * dig); + } else if (exponent > 9) { + int dig{exponent / 10}; + buffer[got++] = '0' + dig; + buffer[got++] = '0' + (exponent - 10 * dig); + } else { + buffer[got++] = '0' + exponent; + } + } + buffer[got] = '\0'; + converted = decimal::ConvertToBinary(p, edit.modes.round); + if (hadExtra) { + converted.flags = static_cast( + converted.flags | decimal::Inexact); + } } if (*p) { // unprocessed junk after value const auto &connection{io.GetConnectionState()}; diff --git a/flang/runtime/edit-output.cpp b/flang/runtime/edit-output.cpp index be0bb07f08bfe..18b209bc6798c 100644 --- a/flang/runtime/edit-output.cpp +++ b/flang/runtime/edit-output.cpp @@ -205,13 +205,20 @@ const char *RealOutputEditingBase::FormatExponent( } else if (exponent == eEnd) { *--exponent = '0'; // Ew.dE0 with zero-valued exponent } - } else { // ensure at least two exponent digits + } else if (edit.variation == 'X') { + if (expo == 0) { + *--exponent = '0'; // EX without Ee and zero-valued exponent + } + } else { + // Ensure at least two exponent digits unless EX while (exponent + 2 > eEnd) { *--exponent = '0'; } } *--exponent = expo < 0 ? '-' : '+'; - if (edit.expoDigits || edit.IsListDirected() || exponent + 3 == eEnd) { + if (edit.variation == 'X') { + *--exponent = 'P'; + } else if (edit.expoDigits || edit.IsListDirected() || exponent + 3 == eEnd) { *--exponent = edit.descriptor == 'D' ? 'D' : 'E'; // not 'G' or 'Q' } length = eEnd - exponent; @@ -251,19 +258,32 @@ bool RealOutputEditingBase::EmitSuffix(const DataEdit &edit) { } template -decimal::ConversionToDecimalResult RealOutputEditing::Convert( +decimal::ConversionToDecimalResult RealOutputEditing::ConvertToDecimal( int significantDigits, enum decimal::FortranRounding rounding, int flags) { auto converted{decimal::ConvertToDecimal(buffer_, sizeof buffer_, static_cast(flags), significantDigits, rounding, x_)}; if (!converted.str) { // overflow io_.GetIoErrorHandler().Crash( - "RealOutputEditing::Convert : buffer size %zd was insufficient", + "RealOutputEditing::ConvertToDecimal: buffer size %zd was insufficient", sizeof buffer_); } return converted; } +static bool IsInfOrNaN(const char *p, int length) { + if (!p || length < 1) { + return false; + } + if (*p == '-' || *p == '+') { + if (length == 1) { + return false; + } + ++p; + } + return *p == 'I' || *p == 'N'; +} + // 13.7.2.3.3 in F'2018 template bool RealOutputEditing::EditEorDOutput(const DataEdit &edit) { @@ -275,7 +295,6 @@ bool RealOutputEditing::EditEorDOutput(const DataEdit &edit) { if (edit.modes.editingFlags & signPlus) { flags |= decimal::AlwaysSign; } - bool noLeadingSpaces{editWidth == 0}; int scale{edit.modes.scale}; // 'kP' value if (editWidth == 0) { // "the processor selects the field width" if (edit.digits.has_value()) { // E0.d @@ -319,8 +338,8 @@ bool RealOutputEditing::EditEorDOutput(const DataEdit &edit) { // In EN editing, multiple attempts may be necessary, so this is a loop. while (true) { decimal::ConversionToDecimalResult converted{ - Convert(significantDigits, edit.modes.round, flags)}; - if (IsInfOrNaN(converted)) { + ConvertToDecimal(significantDigits, edit.modes.round, flags)}; + if (IsInfOrNaN(converted.str, static_cast(converted.length))) { return editWidth > 0 && converted.length > static_cast(editWidth) ? EmitRepeated(io_, '*', editWidth) @@ -380,7 +399,7 @@ bool RealOutputEditing::EditEorDOutput(const DataEdit &edit) { zeroesBeforePoint = 1; ++totalLength; } - if (totalLength < width && noLeadingSpaces) { + if (totalLength < width && editWidth == 0) { width = totalLength; } return EmitPrefix(edit, totalLength, width) && @@ -418,8 +437,8 @@ bool RealOutputEditing::EditFOutput(const DataEdit &edit) { bool canIncrease{true}; while (true) { decimal::ConversionToDecimalResult converted{ - Convert(extraDigits + fracDigits, rounding, flags)}; - if (IsInfOrNaN(converted)) { + ConvertToDecimal(extraDigits + fracDigits, rounding, flags)}; + if (IsInfOrNaN(converted.str, static_cast(converted.length))) { return editWidth > 0 && converted.length > static_cast(editWidth) ? EmitRepeated(io_, '*', editWidth) @@ -521,8 +540,8 @@ DataEdit RealOutputEditing::EditForGOutput(DataEdit edit) { flags |= decimal::AlwaysSign; } decimal::ConversionToDecimalResult converted{ - Convert(significantDigits, edit.modes.round, flags)}; - if (IsInfOrNaN(converted)) { + ConvertToDecimal(significantDigits, edit.modes.round, flags)}; + if (IsInfOrNaN(converted.str, static_cast(converted.length))) { return edit; // Inf/Nan -> Ew.d (same as Fw.d) } int expo{IsZero() ? 1 : converted.decimalExponent}; // 's' @@ -549,8 +568,9 @@ DataEdit RealOutputEditing::EditForGOutput(DataEdit edit) { // 13.10.4 in F'2018 template bool RealOutputEditing::EditListDirectedOutput(const DataEdit &edit) { - decimal::ConversionToDecimalResult converted{Convert(1, edit.modes.round)}; - if (IsInfOrNaN(converted)) { + decimal::ConversionToDecimalResult converted{ + ConvertToDecimal(1, edit.modes.round)}; + if (IsInfOrNaN(converted.str, static_cast(converted.length))) { return EditEorDOutput(edit); } int expo{converted.decimalExponent}; @@ -567,11 +587,120 @@ bool RealOutputEditing::EditListDirectedOutput(const DataEdit &edit) { return EditFOutput(edit); } -// 13.7.5.2.6 in F'2018 +// 13.7.2.3.6 in F'2023 +// The specification for hexadecimal output, unfortunately for implementors, +// leaves as "implementation dependent" the choice of how to emit values +// with multiple hexadecimal output possibilities that are numerically +// equivalent. The one working implementation of EX output that I can find +// apparently chooses to frame the nybbles from most to least significant, +// rather than trying to minimize the magnitude of the binary exponent. +// E.g., 2. is edited into 0X8.0P-2 rather than 0X2.0P0. This implementation +// follows that precedent so as to avoid a gratuitous incompatibility. template -bool RealOutputEditing::EditEXOutput(const DataEdit &) { - io_.GetIoErrorHandler().Crash( - "not yet implemented: EX output editing"); // TODO +auto RealOutputEditing::ConvertToHexadecimal( + int significantDigits, enum decimal::FortranRounding rounding, int flags) + -> ConvertToHexadecimalResult { + if (x_.IsNaN() || x_.IsInfinite()) { + auto converted{ConvertToDecimal(significantDigits, rounding, flags)}; + return {converted.str, static_cast(converted.length), 0}; + } + x_.RoundToBits(4 * significantDigits, rounding); + if (x_.IsInfinite()) { // rounded away to +/-Inf + auto converted{ConvertToDecimal(significantDigits, rounding, flags)}; + return {converted.str, static_cast(converted.length), 0}; + } + int len{0}; + if (x_.IsNegative()) { + buffer_[len++] = '-'; + } else if (flags & decimal::AlwaysSign) { + buffer_[len++] = '+'; + } + auto fraction{x_.Fraction()}; + if (fraction == 0) { + buffer_[len++] = '0'; + return {buffer_, len, 0}; + } else { + // Ensure that the MSB is set. + int expo{x_.UnbiasedExponent() - 3}; + while (!(fraction >> (x_.binaryPrecision - 1))) { + fraction <<= 1; + --expo; + } + // This is initially the right shift count needed to bring the + // most-significant hexadecimal digit's bits into the LSBs. + // x_.binaryPrecision is constant, so / can be used for readability. + int shift{x_.binaryPrecision - 4}; + typename BinaryFloatingPoint::RawType one{1}; + auto remaining{(one << shift) - one}; + for (int digits{0}; digits < significantDigits; ++digits) { + if ((flags & decimal::Minimize) && !(fraction & remaining)) { + break; + } + int hexDigit{0}; + if (shift >= 0) { + hexDigit = int(fraction >> shift) & 0xf; + } else if (shift >= -3) { + hexDigit = int(fraction << -shift) & 0xf; + } + if (hexDigit >= 10) { + buffer_[len++] = 'A' + hexDigit - 10; + } else { + buffer_[len++] = '0' + hexDigit; + } + shift -= 4; + remaining >>= 4; + } + return {buffer_, len, expo}; + } +} + +template +bool RealOutputEditing::EditEXOutput(const DataEdit &edit) { + addSpaceBeforeCharacter(io_); + int editDigits{edit.digits.value_or(0)}; // 'd' field + int significantDigits{editDigits + 1}; + int flags{0}; + if (edit.modes.editingFlags & signPlus) { + flags |= decimal::AlwaysSign; + } + int editWidth{edit.width.value_or(0)}; // 'w' field + if (editWidth == 0 && !edit.digits) { // EX0 (no .d) + flags |= decimal::Minimize; + significantDigits = 28; // enough for 128-bit F.P. + } + auto converted{ + ConvertToHexadecimal(significantDigits, edit.modes.round, flags)}; + if (IsInfOrNaN(converted.str, converted.length)) { + return editWidth > 0 && converted.length > editWidth + ? EmitRepeated(io_, '*', editWidth) + : (editWidth <= converted.length || + EmitRepeated(io_, ' ', editWidth - converted.length)) && + EmitAscii(io_, converted.str, converted.length); + } + int signLength{converted.length > 0 && + (converted.str[0] == '-' || converted.str[0] == '+') + ? 1 + : 0}; + int convertedDigits{converted.length - signLength}; + int expoLength{0}; + const char *exponent{FormatExponent(converted.exponent, edit, expoLength)}; + int trailingZeroes{flags & decimal::Minimize + ? 0 + : std::max(0, significantDigits - convertedDigits)}; + int totalLength{converted.length + trailingZeroes + expoLength + 3 /*0X.*/}; + int width{editWidth > 0 ? editWidth : totalLength}; + return totalLength > width || !exponent + ? EmitRepeated(io_, '*', width) + : EmitRepeated(io_, ' ', width - totalLength) && + EmitAscii(io_, converted.str, signLength) && + EmitAscii(io_, "0X", 2) && + EmitAscii(io_, converted.str + signLength, 1) && + EmitAscii( + io_, edit.modes.editingFlags & decimalComma ? "," : ".", 1) && + EmitAscii(io_, converted.str + signLength + 1, + converted.length - (signLength + 1)) && + EmitRepeated(io_, '0', trailingZeroes) && + EmitAscii(io_, exponent, expoLength); } template bool RealOutputEditing::Edit(const DataEdit &edit) { diff --git a/flang/runtime/edit-output.h b/flang/runtime/edit-output.h index 765e41f89827d..4e6d6b25b4dd2 100644 --- a/flang/runtime/edit-output.h +++ b/flang/runtime/edit-output.h @@ -38,20 +38,6 @@ class RealOutputEditingBase { protected: explicit RealOutputEditingBase(IoStatementState &io) : io_{io} {} - static bool IsInfOrNaN(const decimal::ConversionToDecimalResult &res) { - const char *p{res.str}; - if (!p || res.length < 1) { - return false; - } - if (*p == '-' || *p == '+') { - if (res.length == 1) { - return false; - } - ++p; - } - return *p < '0' || *p > '9'; - } - // Returns null when the exponent overflows a fixed-size output field. const char *FormatExponent(int, const DataEdit &edit, int &length); bool EmitPrefix(const DataEdit &, std::size_t length, std::size_t width); @@ -84,7 +70,15 @@ template class RealOutputEditing : public RealOutputEditingBase { bool IsZero() const { return x_.IsZero(); } - decimal::ConversionToDecimalResult Convert( + decimal::ConversionToDecimalResult ConvertToDecimal( + int significantDigits, enum decimal::FortranRounding, int flags = 0); + + struct ConvertToHexadecimalResult { + const char *str; + int length; + int exponent; + }; + ConvertToHexadecimalResult ConvertToHexadecimal( int significantDigits, enum decimal::FortranRounding, int flags = 0); BinaryFloatingPoint x_; diff --git a/flang/unittests/Runtime/NumericalFormatTest.cpp b/flang/unittests/Runtime/NumericalFormatTest.cpp index 833b16be0fc3f..219947fe4fbbb 100644 --- a/flang/unittests/Runtime/NumericalFormatTest.cpp +++ b/flang/unittests/Runtime/NumericalFormatTest.cpp @@ -290,6 +290,8 @@ TEST(IOApiTests, FormatZeroes) { {"(1P,G32.17,';')", " 0.0000000000000000 ;"}, {"(2P,E32.17,';')", " 00.0000000000000000E+00;"}, {"(-1P,E32.17,';')", " 0.00000000000000000E+00;"}, + {"(EX32.17,';')", " 0X0.00000000000000000P+0;"}, + {"(DC,EX32.17,';')", " 0X0,00000000000000000P+0;"}, {"(G0,';')", "0.;"}, }; @@ -321,6 +323,8 @@ TEST(IOApiTests, FormatOnes) { {"(2P,G32.17,';')", " 1.0000000000000000 ;"}, {"(-1P,E32.17,';')", " 0.01000000000000000E+02;"}, {"(-1P,G32.17,';')", " 1.0000000000000000 ;"}, + {"(EX32.17,';')", " 0X8.00000000000000000P-3;"}, + {"(DC,EX32.17,';')", " 0X8,00000000000000000P-3;"}, {"(G0,';')", "1.;"}, }; @@ -337,6 +341,7 @@ TEST(IOApiTests, FormatNegativeOnes) { {"(E32.17,';')", " -0.10000000000000000E+01;"}, {"(F32.17,';')", " -1.00000000000000000;"}, {"(G32.17,';')", " -1.0000000000000000 ;"}, + {"(EX32.17,';')", " -0X8.00000000000000000P-3;"}, {"(G0,';')", "-1.;"}, }; for (auto const &[format, expect] : negOnes) { @@ -365,6 +370,7 @@ TEST(IOApiTests, FormatDoubleValues) { {"(G8.1,';')", " -0. ;"}, {"(G0,';')", "-0.;"}, {"(E9.1,';')", " -0.0E+00;"}, + {"(EX9.1,';')", "-0X0.0P+0;"}, }}, {// +Inf 0x7ff0000000000000, @@ -372,9 +378,11 @@ TEST(IOApiTests, FormatDoubleValues) { {"(E9.1,';')", " Inf;"}, {"(F9.1,';')", " Inf;"}, {"(G9.1,';')", " Inf;"}, + {"(EX9.1,';')", " Inf;"}, {"(SP,E9.1,';')", " +Inf;"}, {"(SP,F9.1,';')", " +Inf;"}, {"(SP,G9.1,';')", " +Inf;"}, + {"(SP,EX9.1,';')", " +Inf;"}, {"(G0,';')", "Inf;"}, }}, {// -Inf @@ -383,6 +391,7 @@ TEST(IOApiTests, FormatDoubleValues) { {"(E9.1,';')", " -Inf;"}, {"(F9.1,';')", " -Inf;"}, {"(G9.1,';')", " -Inf;"}, + {"(EX9.1,';')", " -Inf;"}, {"(G0,';')", "-Inf;"}, }}, {// NaN @@ -391,6 +400,7 @@ TEST(IOApiTests, FormatDoubleValues) { {"(E9.1,';')", " NaN;"}, {"(F9.1,';')", " NaN;"}, {"(G9.1,';')", " NaN;"}, + {"(EX9.1,';')", " NaN;"}, {"(G0,';')", "NaN;"}, }}, {// NaN (sign irrelevant) @@ -402,6 +412,7 @@ TEST(IOApiTests, FormatDoubleValues) { {"(SP,E9.1,';')", " NaN;"}, {"(SP,F9.1,';')", " NaN;"}, {"(SP,G9.1,';')", " NaN;"}, + {"(SP,EX9.1,';')", " NaN;"}, {"(G0,';')", "NaN;"}, }}, {// 0.1 rounded @@ -429,6 +440,7 @@ TEST(IOApiTests, FormatDoubleValues) { {"(G0.55,';')", ".1000000000000000055511151231257827021181583404541015625;"}, {"(G0,';')", ".1;"}, + {"(EX20.12,';')", " 0XC.CCCCCCCCCCCDP-7;"}, }}, {// 1.5 0x3ff8000000000000, @@ -436,6 +448,7 @@ TEST(IOApiTests, FormatDoubleValues) { {"(E9.2,';')", " 0.15E+01;"}, {"(F4.1,';')", " 1.5;"}, {"(G7.1,';')", " 2. ;"}, + {"(EX9.1,';')", " 0XC.0P-3;"}, {"(RN,E8.1,';')", " 0.2E+01;"}, {"(RN,F3.0,';')", " 2.;"}, {"(RN,G7.0,';')", " 0.E+01;"}, @@ -465,6 +478,7 @@ TEST(IOApiTests, FormatDoubleValues) { {"(RU,E8.1,';')", "-0.1E+01;"}, {"(RZ,E8.1,';')", "-0.1E+01;"}, {"(RC,E8.1,';')", "-0.2E+01;"}, + {"(EX9.1,';')", "-0XC.0P-3;"}, }}, {// 2.5 0x4004000000000000, @@ -475,6 +489,7 @@ TEST(IOApiTests, FormatDoubleValues) { {"(RU,E8.1,';')", " 0.3E+01;"}, {"(RZ,E8.1,';')", " 0.2E+01;"}, {"(RC,E8.1,';')", " 0.3E+01;"}, + {"(EX9.1,';')", " 0XA.0P-2;"}, }}, {// -2.5 0xc004000000000000, @@ -485,6 +500,7 @@ TEST(IOApiTests, FormatDoubleValues) { {"(RU,E8.1,';')", "-0.2E+01;"}, {"(RZ,E8.1,';')", "-0.2E+01;"}, {"(RC,E8.1,';')", "-0.3E+01;"}, + {"(EX9.1,';')", "-0XA.0P-2;"}, }}, {// least positive nonzero subnormal 1, @@ -583,6 +599,7 @@ TEST(IOApiTests, FormatDoubleValues) { "701797267771758512566055119913150489110145103786273816725095" "583738973359899366480994116420570263709027924276754456522908" "753868250641971826553344726563-323;"}, + {"(EX24.13,';')", " 0X8.0000000000000P-1077;"}, }}, {// least positive nonzero normal 0x10000000000000, @@ -603,6 +620,7 @@ TEST(IOApiTests, FormatDoubleValues) { "61364675687023986783152906809846172109246253967285156250-" "307;"}, {"(G0,';')", ".22250738585072014E-307;"}, + {"(EX24.13,';')", " 0X8.0000000000000P-1025;"}, }}, {// greatest finite 0x7fefffffffffffffuLL, @@ -633,6 +651,31 @@ TEST(IOApiTests, FormatDoubleValues) { "123348274797826204144723168738177180919299881250404026184124" "8583680000+306;"}, {"(G0,';')", ".17976931348623157E+309;"}, + {"(EX24.13,';')", " 0XF.FFFFFFFFFFFF8P+1020;"}, + }}, + {// EX rounding + 0x3ff1000000000000uLL, // 1.0625 + { + {"(F7.4,';')", " 1.0625;"}, + {"(EX9.1,';')", " 0X8.8P-3;"}, + {"(EX9.0,';')", " 0X8.P-3;"}, + {"(RN,EX9.0,';')", " 0X8.P-3;"}, + {"(RU,EX9.0,';')", " 0X9.P-3;"}, + {"(RD,EX9.0,';')", " 0X8.P-3;"}, + {"(RZ,EX9.0,';')", " 0X8.P-3;"}, + {"(RC,EX9.0,';')", " 0X9.P-3;"}, + }}, + {// EX rounding + 0xbff1000000000000uLL, // -1.0625 + { + {"(F7.4,';')", "-1.0625;"}, + {"(EX9.1,';')", "-0X8.8P-3;"}, + {"(EX9.0,';')", " -0X8.P-3;"}, + {"(RN,EX9.0,';')", " -0X8.P-3;"}, + {"(RU,EX9.0,';')", " -0X8.P-3;"}, + {"(RD,EX9.0,';')", " -0X9.P-3;"}, + {"(RZ,EX9.0,';')", " -0X8.P-3;"}, + {"(RC,EX9.0,';')", " -0X9.P-3;"}, }}, }; @@ -775,11 +818,11 @@ TEST(IOApiTests, FormatIntegerValues) { } //------------------------------------------------------------------------------ -/// Tests for input formatting real values +/// Tests for input editing real values //------------------------------------------------------------------------------ // Ensure double input values correctly map to raw uint64 values -TEST(IOApiTests, FormatDoubleInputValues) { +TEST(IOApiTests, EditDoubleInputValues) { using TestCaseTy = std::tuple; static const std::vector testCases{ {"(F18.0)", " 0", 0x0}, @@ -806,6 +849,21 @@ TEST(IOApiTests, FormatDoubleInputValues) { {"(BZ,F18.0)", " . ", 0x0}, {"(BZ,F18.0)", " . e +1 ", 0x0}, {"(DC,F18.0)", " 12,5", 0x4029000000000000}, + {"(EX22.0)", "0X0P0 ", 0x0}, // +0. + {"(EX22.0)", "-0X0P0 ", 0x8000000000000000}, // -0. + {"(EX22.0)", "0X.8P1 ", 0x3ff0000000000000}, // 1.0 + {"(EX22.0)", "0X8.P-3 ", 0x3ff0000000000000}, // 1.0 + {"(EX22.0)", "0X.1P4 ", 0x3ff0000000000000}, // 1.0 + {"(EX22.0)", "0X10.P-4 ", 0x3ff0000000000000}, // 1.0 + {"(EX22.0)", "0X8.00P-3 ", 0x3ff0000000000000}, // 1.0 + {"(EX22.0)", "0X80.0P-6 ", 0x4000000000000000}, // 2.0 + {"(EX22.0)", "0XC.CCCCCCCCCCCDP-7 ", 0x3fb999999999999a}, // 0.1 + {"(EX22.0)", "0X.8P-1021 ", 0x0010000000000000}, // min normal + {"(EX22.0)", "0X.8P-1022 ", 0x0008000000000000}, // subnormal + {"(EX22.0)", "0X.8P-1073 ", 0x0000000000000001}, // min subn. + {"(EX22.0)", "0X.FFFFFFFFFFFFF8P1024", 0x7fefffffffffffff}, // max finite + {"(EX22.0)", "0X.8P1025 ", 0x7ff0000000000000}, // +Inf + {"(EX22.0)", "-0X.8P1025 ", 0xfff0000000000000}, // -Inf }; for (auto const &[format, data, want] : testCases) { auto cookie{IONAME(BeginInternalFormattedInput)( From e200b0e4a7b5447052698397939c80ee3b0ebda9 Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Mon, 16 Oct 2023 14:15:40 -0700 Subject: [PATCH 267/720] [flang] Submodule names can clash only with submodule names (#67361) Name resolution creates symbols for submodules in their parents' scopes. This can lead to bogus errors about name clashes between submodule names and other entities in the parents' scopes. Create symbols for submodules but do not add them to a scope's dictionary. --- flang/lib/Semantics/mod-file.cpp | 43 +++++++++++++++++++-------- flang/lib/Semantics/resolve-names.cpp | 6 +++- flang/test/Semantics/modproc01.f90 | 4 ++- flang/test/Semantics/modproc02.f90 | 1 - 4 files changed, 39 insertions(+), 15 deletions(-) diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp index cee267a894ffd..8684eb1fbd332 100644 --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -1183,30 +1183,49 @@ Scope *ModFileReader::Read(const SourceName &name, } Scope &topScope{isIntrinsic.value_or(false) ? context_.intrinsicModulesScope() : context_.globalScope()}; - if (!ancestor) { + Symbol *moduleSymbol{nullptr}; + if (!ancestor) { // module, not submodule parentScope = &topScope; + auto pair{parentScope->try_emplace(name, UnknownDetails{})}; + if (!pair.second) { + return nullptr; + } + moduleSymbol = &*pair.first->second; + moduleSymbol->set(Symbol::Flag::ModFile); } else if (std::optional parent{GetSubmoduleParent(parseTree)}) { + // submodule with submodule parent parentScope = Read(*parent, false /*not intrinsic*/, ancestor, silent); } else { + // submodule with module parent parentScope = ancestor; } - auto pair{parentScope->try_emplace(name, UnknownDetails{})}; - if (!pair.second) { - return nullptr; - } // Process declarations from the module file - Symbol &modSymbol{*pair.first->second}; - modSymbol.set(Symbol::Flag::ModFile); bool wasInModuleFile{context_.foldingContext().inModuleFile()}; context_.foldingContext().set_inModuleFile(true); ResolveNames(context_, parseTree, topScope); context_.foldingContext().set_inModuleFile(wasInModuleFile); - CHECK(modSymbol.has()); - CHECK(modSymbol.test(Symbol::Flag::ModFile)); - if (isIntrinsic.value_or(false)) { - modSymbol.attrs().set(Attr::INTRINSIC); + if (!moduleSymbol) { + // Submodule symbols' storage are owned by their parents' scopes, + // but their names are not in their parents' dictionaries -- we + // don't want to report bogus errors about clashes between submodule + // names and other objects in the parent scopes. + if (Scope * submoduleScope{ancestor->FindSubmodule(name)}) { + moduleSymbol = submoduleScope->symbol(); + if (moduleSymbol) { + moduleSymbol->set(Symbol::Flag::ModFile); + } + } + } + if (moduleSymbol) { + CHECK(moduleSymbol->has()); + CHECK(moduleSymbol->test(Symbol::Flag::ModFile)); + if (isIntrinsic.value_or(false)) { + moduleSymbol->attrs().set(Attr::INTRINSIC); + } + return moduleSymbol->scope(); + } else { + return nullptr; } - return modSymbol.scope(); } parser::Message &ModFileReader::Say(const SourceName &name, diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 40f5ab9eb6e27..b4deac9cf5ccd 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -3230,7 +3230,11 @@ bool ModuleVisitor::BeginSubmodule( } void ModuleVisitor::BeginModule(const parser::Name &name, bool isSubmodule) { - auto &symbol{MakeSymbol(name, ModuleDetails{isSubmodule})}; + // Submodule symbols are not visible in their parents' scopes. + Symbol &symbol{isSubmodule ? Resolve(name, + currScope().MakeSymbol(name.source, Attrs{}, + ModuleDetails{true})) + : MakeSymbol(name, ModuleDetails{false})}; auto &details{symbol.get()}; PushScope(Scope::Kind::Module, &symbol); details.set_scope(&currScope()); diff --git a/flang/test/Semantics/modproc01.f90 b/flang/test/Semantics/modproc01.f90 index c7d05783335e6..5652e15750c7e 100644 --- a/flang/test/Semantics/modproc01.f90 +++ b/flang/test/Semantics/modproc01.f90 @@ -22,11 +22,12 @@ module subroutine ms(f) procedure(mf) :: f end subroutine end interface + integer sm end module !CHECK: mf, MODULE, PUBLIC (Function): Subprogram isInterface result:TYPE(pdt2(k2=2_4,l2=n)) res (INTEGER(4) n,CHARACTER(n,1) str,TYPE(pdt1(k1=1_4,l1=n)) x1) !CHECK: pdt1, PUBLIC: DerivedType components: a1 !CHECK: pdt2, PUBLIC: DerivedType components: j2,a2 -!CHECK: sm: Module (m) +!CHECK: sm, PUBLIC size=4 offset=0: ObjectEntity type: INTEGER(4) !CHECK: DerivedType scope: pdt1 !CHECK: a1, ALLOCATABLE: ObjectEntity type: TYPE(pdt2(int(k1,kind=4),int(l1,kind=4))) !CHECK: k1: TypeParam type:INTEGER(4) Kind @@ -128,6 +129,7 @@ program test !CHECK: mf, MODULE (Function): Use from mf in m !CHECK: pdt1: Use from pdt1 in m !CHECK: pdt2: Use from pdt2 in m +!CHECK: sm: Use from sm in m !CHECK: x size=88 offset=0: ObjectEntity type: TYPE(pdt2(k2=2_4,l2=3_4)) !CHECK: DerivedType scope: size=88 alignment=8 instantiation of pdt2(k2=2_4,l2=3_4) !CHECK: a2 size=80 offset=8: ObjectEntity type: TYPE(pdt1(k1=2_4,l1=3_4)) shape: 1_8:2_8 diff --git a/flang/test/Semantics/modproc02.f90 b/flang/test/Semantics/modproc02.f90 index 229ef72e6bcf0..f47f473f081d2 100644 --- a/flang/test/Semantics/modproc02.f90 +++ b/flang/test/Semantics/modproc02.f90 @@ -16,7 +16,6 @@ module subroutine s(x) ! implicitly typed !CHECK: Module scope: m size=0 alignment=1 sourceRange=63 bytes !CHECK: s, MODULE, PUBLIC (Subroutine): Subprogram isInterface (REAL(4) x) -!CHECK: sm: Module (m) !CHECK: Subprogram scope: s size=4 alignment=4 sourceRange=26 bytes !CHECK: s (Subroutine): HostAssoc !CHECK: x (Implicit) size=4 offset=0: ObjectEntity dummy type: REAL(4) From e6e62efa880e7afe8a054f24857d1b64b8567767 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20B=C3=B6ck?= Date: Mon, 16 Oct 2023 23:23:45 +0200 Subject: [PATCH 268/720] [RS4GC] Copy argument attributes from call to statepoint (#68475) The current implementation completely ignores argument attributes on calls, discarding them completely when creating a statepoint from a call instruction. This is problematic in some scenarios as the argument attributes affect the ABI of the call, leading to undefined behavior if called with the wrong ABI attributes. Note that this cannot be solved either by just having the function declaration annotated with the right parameter attributes as the call might be indirect, therefore requiring them to be present on the arguments. This PR simply copies all parameter attributes over from the original call to the created statepoint. Note that some argument attributes become invalid after the lowering as they imply memory effects that no longer hold with the statepoints. These do not need to be explicitly handled in this PR as they are removed by the `stripNonValidDataFromBody`. --- .../Scalar/RewriteStatepointsForGC.cpp | 47 +++++++++++++------ .../call-argument-attributes.ll | 42 +++++++++++++++++ 2 files changed, 74 insertions(+), 15 deletions(-) create mode 100644 llvm/test/Transforms/RewriteStatepointsForGC/call-argument-attributes.ll diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index d2984cd829a9c..06c81f53de706 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -1422,14 +1423,15 @@ static constexpr Attribute::AttrKind FnAttrsToStrip[] = {Attribute::Memory, Attribute::NoSync, Attribute::NoFree}; // Create new attribute set containing only attributes which can be transferred -// from original call to the safepoint. -static AttributeList legalizeCallAttributes(LLVMContext &Ctx, - AttributeList OrigAL, +// from the original call to the safepoint. +static AttributeList legalizeCallAttributes(CallBase *Call, bool IsMemIntrinsic, AttributeList StatepointAL) { + AttributeList OrigAL = Call->getAttributes(); if (OrigAL.isEmpty()) return StatepointAL; // Remove the readonly, readnone, and statepoint function attributes. + LLVMContext &Ctx = Call->getContext(); AttrBuilder FnAttrs(Ctx, OrigAL.getFnAttrs()); for (auto Attr : FnAttrsToStrip) FnAttrs.removeAttribute(Attr); @@ -1439,8 +1441,24 @@ static AttributeList legalizeCallAttributes(LLVMContext &Ctx, FnAttrs.removeAttribute(A); } - // Just skip parameter and return attributes for now - return StatepointAL.addFnAttributes(Ctx, FnAttrs); + StatepointAL = StatepointAL.addFnAttributes(Ctx, FnAttrs); + + // The memory intrinsics do not have a 1:1 correspondence of the original + // call arguments to the produced statepoint. Do not transfer the argument + // attributes to avoid putting them on incorrect arguments. + if (IsMemIntrinsic) + return StatepointAL; + + // Attach the argument attributes from the original call at the corresponding + // arguments in the statepoint. Note that any argument attributes that are + // invalid after lowering are stripped in stripNonValidDataFromBody. + for (unsigned I : llvm::seq(Call->arg_size())) + StatepointAL = StatepointAL.addParamAttributes( + Ctx, GCStatepointInst::CallArgsBeginPos + I, + AttrBuilder(Ctx, OrigAL.getParamAttrs(I))); + + // Return attributes are later attached to the gc.result intrinsic. + return StatepointAL; } /// Helper function to place all gc relocates necessary for the given @@ -1630,6 +1648,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ // with a return value, we lower then as never returning calls to // __llvm_deoptimize that are followed by unreachable to get better codegen. bool IsDeoptimize = false; + bool IsMemIntrinsic = false; StatepointDirectives SD = parseStatepointDirectivesFromAttrs(Call->getAttributes()); @@ -1670,6 +1689,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ IsDeoptimize = true; } else if (IID == Intrinsic::memcpy_element_unordered_atomic || IID == Intrinsic::memmove_element_unordered_atomic) { + IsMemIntrinsic = true; + // Unordered atomic memcpy and memmove intrinsics which are not explicitly // marked as "gc-leaf-function" should be lowered in a GC parseable way. // Specifically, these calls should be lowered to the @@ -1785,12 +1806,10 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ SPCall->setTailCallKind(CI->getTailCallKind()); SPCall->setCallingConv(CI->getCallingConv()); - // Currently we will fail on parameter attributes and on certain - // function attributes. In case if we can handle this set of attributes - - // set up function attrs directly on statepoint and return attrs later for + // Set up function attrs directly on statepoint and return attrs later for // gc_result intrinsic. - SPCall->setAttributes(legalizeCallAttributes( - CI->getContext(), CI->getAttributes(), SPCall->getAttributes())); + SPCall->setAttributes( + legalizeCallAttributes(CI, IsMemIntrinsic, SPCall->getAttributes())); Token = cast(SPCall); @@ -1812,12 +1831,10 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ SPInvoke->setCallingConv(II->getCallingConv()); - // Currently we will fail on parameter attributes and on certain - // function attributes. In case if we can handle this set of attributes - - // set up function attrs directly on statepoint and return attrs later for + // Set up function attrs directly on statepoint and return attrs later for // gc_result intrinsic. - SPInvoke->setAttributes(legalizeCallAttributes( - II->getContext(), II->getAttributes(), SPInvoke->getAttributes())); + SPInvoke->setAttributes( + legalizeCallAttributes(II, IsMemIntrinsic, SPInvoke->getAttributes())); Token = cast(SPInvoke); diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/call-argument-attributes.ll b/llvm/test/Transforms/RewriteStatepointsForGC/call-argument-attributes.ll new file mode 100644 index 0000000000000..4a7088f95329f --- /dev/null +++ b/llvm/test/Transforms/RewriteStatepointsForGC/call-argument-attributes.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -passes=rewrite-statepoints-for-gc -S | FileCheck %s + +declare i8 @callee(ptr, i8, float, ptr) + +define i8 @test(ptr %arg) gc "statepoint-example" { +; CHECK-LABEL: define i8 @test( +; CHECK-SAME: ptr [[ARG:%.*]]) gc "statepoint-example" { +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(i8 (ptr, i8, float, ptr)) @callee, i32 4, i32 0, ptr nocapture sret({ i64, i64 }) align 8 null, i8 signext 8, float inreg 1.000000e+00, ptr [[ARG]], i32 0, i32 0) +; CHECK-NEXT: [[R1:%.*]] = call zeroext i8 @llvm.experimental.gc.result.i8(token [[STATEPOINT_TOKEN]]) +; CHECK-NEXT: ret i8 [[R1]] +; + %r = call zeroext i8 @callee(ptr sret({i64, i64}) noalias align 8 nocapture null, i8 signext 8, float inreg 1.0, ptr writeonly %arg) + ret i8 %r +} + +declare i32 @personality_function() + +define i8 @test_invoke(ptr %arg) gc "statepoint-example" personality ptr @personality_function { +; CHECK-LABEL: define i8 @test_invoke( +; CHECK-SAME: ptr [[ARG:%.*]]) gc "statepoint-example" personality ptr @personality_function { +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = invoke token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(i8 (ptr, i8, float, ptr)) @callee, i32 4, i32 0, ptr nocapture sret({ i64, i64 }) align 8 null, i8 signext 8, float inreg 1.000000e+00, ptr [[ARG]], i32 0, i32 0) +; CHECK-NEXT: to label [[NORMAL_RETURN:%.*]] unwind label [[EXCEPTIONAL_RETURN:%.*]] +; CHECK: normal_return: +; CHECK-NEXT: [[R1:%.*]] = call zeroext i8 @llvm.experimental.gc.result.i8(token [[STATEPOINT_TOKEN]]) +; CHECK-NEXT: ret i8 [[R1]] +; CHECK: exceptional_return: +; CHECK-NEXT: [[LANDING_PAD4:%.*]] = landingpad token +; CHECK-NEXT: cleanup +; CHECK-NEXT: ret i8 0 +; + %r = invoke zeroext i8 @callee(ptr sret({i64, i64}) noalias align 8 nocapture null, i8 signext 8, float inreg 1.0, ptr writeonly %arg) + to label %normal_return unwind label %exceptional_return + +normal_return: + ret i8 %r + +exceptional_return: + %landing_pad4 = landingpad token + cleanup + ret i8 0 +} From 301a0dba56e176e3f236fc069405e3b929a76c94 Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Mon, 16 Oct 2023 14:29:40 -0700 Subject: [PATCH 269/720] [flang][runtime] Better non-repeatable RANDOM_INIT() (#67363) Use a higher-frequency clock base when initializing the pseudo-random number generator to implement CALL RANDOM_INIT(REPEATABLE=.FALSE.) --- flang/runtime/random.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/flang/runtime/random.cpp b/flang/runtime/random.cpp index b7175d6b63c35..8b00cfd1cac19 100644 --- a/flang/runtime/random.cpp +++ b/flang/runtime/random.cpp @@ -20,10 +20,10 @@ #include #include #include -#include #include #include #include +#include namespace Fortran::runtime { @@ -100,7 +100,13 @@ void RTNAME(RandomInit)(bool repeatable, bool /*image_distinct*/) { if (repeatable) { generator.seed(0); } else { - generator.seed(std::time(nullptr)); +#ifdef CLOCK_REALTIME + timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + generator.seed(ts.tv_sec & ts.tv_nsec); +#else + generator.seed(time(nullptr)); +#endif } } } From 5f4ed780d348c810a7d4c1dd9354abf79094364b Mon Sep 17 00:00:00 2001 From: Emilia Kond Date: Tue, 17 Oct 2023 00:38:33 +0300 Subject: [PATCH 270/720] [clang-format] Allow default values for template parameters in lambda (#69052) Previously, upon encountering an equals sign while parsing a lambda in the UnwrappedLineParser, it would fall through and fail. This caused any lambda template with a default argument for a template parameter to be annotated as an ArraySubscriptLSquare. This patch allows equals signs in the UnwrappedLineParser if we're currently in a template parameter list. This resolved a FIXME that was in the lambda parsing function. This patch seems deceptively easy, it's likely it doesn't solve the FIXME entirely, or causes other issues (the FIXME itself mentions something about Objective-C, which I cannot comment about). However this patch is sufficient to fix the below issue. Fixes https://github.com/llvm/llvm-project/issues/68913 --------- Co-authored-by: Owen Pan --- clang/lib/Format/UnwrappedLineParser.cpp | 8 ++-- clang/unittests/Format/TokenAnnotatorTest.cpp | 38 +++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 82a812fc8bcc6..708b70489a114 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -2226,9 +2226,6 @@ bool UnwrappedLineParser::tryToParseLambda() { // followed by an `a->b` expression, such as: // ([obj func:arg] + a->b) // Otherwise the code below would parse as a lambda. - // - // FIXME: This heuristic is incorrect for C++20 generic lambdas with - // explicit template lists: [](U &&u){} case tok::plus: case tok::minus: case tok::exclaim: @@ -2268,6 +2265,11 @@ bool UnwrappedLineParser::tryToParseLambda() { parseRequiresClause(RequiresToken); break; } + case tok::equal: + if (!InTemplateParameterList) + return true; + nextToken(); + break; default: return true; } diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index e5cc3ed3686b3..2d04694799669 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -1620,6 +1620,44 @@ TEST_F(TokenAnnotatorTest, UnderstandsLambdas) { EXPECT_TOKEN(Tokens[15], tok::kw_requires, TT_RequiresClause); EXPECT_TRUE(Tokens[19]->ClosesRequiresClause); EXPECT_TOKEN(Tokens[20], tok::l_brace, TT_LambdaLBrace); + + Tokens = annotate("[] (T t) {}"); + ASSERT_EQ(Tokens.size(), 15u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::l_square, TT_LambdaLSquare); + EXPECT_TOKEN(Tokens[2], tok::less, TT_TemplateOpener); + EXPECT_TOKEN(Tokens[7], tok::greater, TT_TemplateCloser); + EXPECT_TOKEN(Tokens[12], tok::l_brace, TT_LambdaLBrace); + + Tokens = annotate("[] (T t) {}"); + ASSERT_EQ(Tokens.size(), 15u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::l_square, TT_LambdaLSquare); + EXPECT_TOKEN(Tokens[2], tok::less, TT_TemplateOpener); + EXPECT_TOKEN(Tokens[7], tok::greater, TT_TemplateCloser); + EXPECT_TOKEN(Tokens[12], tok::l_brace, TT_LambdaLBrace); + + Tokens = annotate("[] (T t) {}"); + ASSERT_EQ(Tokens.size(), 15u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::l_square, TT_LambdaLSquare); + EXPECT_TOKEN(Tokens[2], tok::less, TT_TemplateOpener); + EXPECT_TOKEN(Tokens[7], tok::greater, TT_TemplateCloser); + EXPECT_TOKEN(Tokens[12], tok::l_brace, TT_LambdaLBrace); + + Tokens = annotate("[] (T&& t) {}"); + ASSERT_EQ(Tokens.size(), 18u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::l_square, TT_LambdaLSquare); + EXPECT_TOKEN(Tokens[2], tok::less, TT_TemplateOpener); + EXPECT_TOKEN(Tokens[7], tok::ampamp, TT_BinaryOperator); + EXPECT_TOKEN(Tokens[9], tok::greater, TT_TemplateCloser); + EXPECT_TOKEN(Tokens[12], tok::ampamp, TT_PointerOrReference); + EXPECT_TOKEN(Tokens[15], tok::l_brace, TT_LambdaLBrace); + + Tokens = annotate("[] requires Foo (T t) {}"); + ASSERT_EQ(Tokens.size(), 20u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::l_square, TT_LambdaLSquare); + EXPECT_TOKEN(Tokens[2], tok::less, TT_TemplateOpener); + EXPECT_TOKEN(Tokens[7], tok::greater, TT_TemplateCloser); + EXPECT_TOKEN(Tokens[8], tok::kw_requires, TT_RequiresClause); + EXPECT_TOKEN(Tokens[17], tok::l_brace, TT_LambdaLBrace); } TEST_F(TokenAnnotatorTest, UnderstandsFunctionAnnotations) { From 59f69a38ad375bc2ae53f9c6e0331eb222247957 Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Mon, 16 Oct 2023 14:45:42 -0700 Subject: [PATCH 271/720] [flang] Ensure component attributes affect characteristics (#67465) A recent fix causes the TypeAndShape::Characterize() member function templates for general expressions and designators to avoid using the Characterize() member function for Symbols when the argument is a whole component. This caused the corank of a component to no longer be reflected in the returned TypeAndShape characteristics. Fix the regression. --- .../include/flang/Evaluate/characteristics.h | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/flang/include/flang/Evaluate/characteristics.h b/flang/include/flang/Evaluate/characteristics.h index bcb1543203640..d685d250bf20b 100644 --- a/flang/include/flang/Evaluate/characteristics.h +++ b/flang/include/flang/Evaluate/characteristics.h @@ -81,18 +81,19 @@ class TypeAndShape { bool operator!=(const TypeAndShape &that) const { return !(*this == that); } static std::optional Characterize( - const semantics::Symbol &, FoldingContext &, bool invariantOnly = false); + const semantics::Symbol &, FoldingContext &, bool invariantOnly = true); static std::optional Characterize( const semantics::DeclTypeSpec &, FoldingContext &, - bool invariantOnly = false); + bool invariantOnly = true); static std::optional Characterize( - const ActualArgument &, FoldingContext &, bool invariantOnly = false); + const ActualArgument &, FoldingContext &, bool invariantOnly = true); // General case for Expr, &c. template static std::optional Characterize( - const A &x, FoldingContext &context, bool invariantOnly = false) { - if (const auto *symbol{UnwrapWholeSymbolDataRef(x)}) { + const A &x, FoldingContext &context, bool invariantOnly = true) { + const auto *symbol{UnwrapWholeSymbolOrComponentDataRef(x)}; + if (symbol && !symbol->owner().IsDerivedType()) { // Whole variable if (auto result{Characterize(*symbol, context, invariantOnly)}) { return result; } @@ -106,6 +107,9 @@ class TypeAndShape { } } } + if (symbol) { // component + result.AcquireAttrs(*symbol); + } return std::move(result.Rewrite(context)); } return std::nullopt; @@ -116,15 +120,21 @@ class TypeAndShape { static std::optional Characterize( const Designator> &x, FoldingContext &context, bool invariantOnly = true) { - if (const auto *symbol{UnwrapWholeSymbolDataRef(x)}) { + const auto *symbol{UnwrapWholeSymbolOrComponentDataRef(x)}; + if (symbol && !symbol->owner().IsDerivedType()) { // Whole variable if (auto result{Characterize(*symbol, context, invariantOnly)}) { return result; } } if (auto type{x.GetType()}) { TypeAndShape result{*type, GetShape(context, x, invariantOnly)}; - if (auto length{x.LEN()}) { - result.set_LEN(std::move(*length)); + if (type->category() == TypeCategory::Character) { + if (auto length{x.LEN()}) { + result.set_LEN(std::move(*length)); + } + } + if (symbol) { // component + result.AcquireAttrs(*symbol); } return std::move(result.Rewrite(context)); } @@ -133,7 +143,7 @@ class TypeAndShape { template static std::optional Characterize(const std::optional &x, - FoldingContext &context, bool invariantOnly = false) { + FoldingContext &context, bool invariantOnly = true) { if (x) { return Characterize(*x, context, invariantOnly); } else { @@ -142,7 +152,7 @@ class TypeAndShape { } template static std::optional Characterize( - A *ptr, FoldingContext &context, bool invariantOnly = false) { + A *ptr, FoldingContext &context, bool invariantOnly = true) { if (ptr) { return Characterize(std::as_const(*ptr), context, invariantOnly); } else { From 233c3e6c53a561296f3ae5c5ec99e9a527f856d8 Mon Sep 17 00:00:00 2001 From: Aart Bik <39774503+aartbik@users.noreply.github.com> Date: Mon, 16 Oct 2023 14:45:57 -0700 Subject: [PATCH 272/720] [mlir][sparse] remove sparse2sparse path in library (#69247) This cleans up all external entry points that will have to deal with non-permutations, making any subsequent refactoring much more local to the lib files. --- .../mlir/Dialect/SparseTensor/IR/Enums.h | 1 - .../ExecutionEngine/SparseTensor/Storage.h | 264 +----------------- .../ExecutionEngine/SparseTensorRuntime.h | 1 - .../SparseTensor/CMakeLists.txt | 1 - mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp | 79 ------ .../ExecutionEngine/SparseTensor/Storage.cpp | 13 +- .../ExecutionEngine/SparseTensorRuntime.cpp | 7 - .../llvm-project-overlay/mlir/BUILD.bazel | 1 - 8 files changed, 3 insertions(+), 364 deletions(-) delete mode 100644 mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h index 0caf83a63b531..08887abcd0f10 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -145,7 +145,6 @@ enum class Action : uint32_t { kEmpty = 0, kEmptyForward = 1, kFromCOO = 2, - kSparseToSparse = 3, kFromReader = 4, kToCOO = 5, kPack = 7, diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index c5be3d1acc337..bafc9baa7edde 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -12,7 +12,6 @@ // * `SparseTensorStorage` // * `SparseTensorEnumeratorBase` // * `SparseTensorEnumerator` -// * `SparseTensorNNZ` // //===----------------------------------------------------------------------===// @@ -26,14 +25,6 @@ #include "mlir/ExecutionEngine/SparseTensor/ErrorHandling.h" #include "mlir/ExecutionEngine/SparseTensor/MapRef.h" -#define ASSERT_COMPRESSED_OR_SINGLETON_LVL(l) \ - do { \ - const DimLevelType dlt = getLvlType(l); \ - (void)dlt; \ - assert((isCompressedDLT(dlt) || isSingletonDLT(dlt)) && \ - "Level is neither compressed nor singleton"); \ - } while (false) - namespace mlir { namespace sparse_tensor { @@ -152,18 +143,6 @@ class SparseTensorStorageBase { // TODO: REMOVE THIS const std::vector &getLvl2Dim() const { return lvl2dimVec; } - /// Allocates a new enumerator. Callers must make sure to delete - /// the enumerator when they're done with it. The first argument - /// is the out-parameter for storing the newly allocated enumerator; - /// all other arguments are passed along to the `SparseTensorEnumerator` - /// ctor and must satisfy the preconditions/assertions thereof. -#define DECL_NEWENUMERATOR(VNAME, V) \ - virtual void newEnumerator(SparseTensorEnumeratorBase **, uint64_t, \ - const uint64_t *, uint64_t, const uint64_t *) \ - const; - MLIR_SPARSETENSOR_FOREVERY_V(DECL_NEWENUMERATOR) -#undef DECL_NEWENUMERATOR - /// Gets positions-overhead storage for the given level. #define DECL_GETPOSITIONS(PNAME, P) \ virtual void getPositions(std::vector

**, uint64_t); @@ -312,27 +291,6 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *lvlTypes, const uint64_t *dim2lvl, const uint64_t *lvl2dim, SparseTensorCOO &lvlCOO); - /// Allocates a new sparse tensor and initializes it with the contents - /// of another sparse tensor. - // - // TODO: The `dimRank` and `dimShape` arguments are only used for - // verifying that the source tensor has the expected shape. So if we - // wanted to skip that verification, then we could remove those arguments. - // Alternatively, if we required the `dimShape` to be "sizes" instead, - // then that would remove any constraints on `source.getDimSizes()` - // (other than compatibility with `src2lvl`) as well as removing the - // requirement that `src2lvl` be the inverse of `lvl2dim`. Which would - // enable this factory to be used for performing a much larger class of - // transformations (which can already be handled by the `SparseTensorNNZ` - // implementation). - static SparseTensorStorage * - newFromSparseTensor(uint64_t dimRank, const uint64_t *dimShape, - uint64_t lvlRank, const uint64_t *lvlSizes, - const DimLevelType *lvlTypes, - const uint64_t *src2lvl, // FIXME: dim2lvl, - const uint64_t *lvl2dim, uint64_t srcRank, - const SparseTensorStorageBase &source); - /// Allocates a new sparse tensor and initialize it with the data stored level /// buffers directly. static SparseTensorStorage *packFromLvlBuffers( @@ -361,7 +319,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { /// Returns coordinate at given position. uint64_t getCrd(uint64_t lvl, uint64_t pos) const final { - ASSERT_COMPRESSED_OR_SINGLETON_LVL(lvl); + assert(isCompressedDLT(getLvlType(lvl)) || isSingletonDLT(getLvlType(lvl))); assert(pos < coordinates[lvl].size()); return coordinates[lvl][pos]; // Converts the stored `C` into `uint64_t`. } @@ -453,17 +411,6 @@ class SparseTensorStorage final : public SparseTensorStorageBase { endPath(0); } - /// Allocates a new enumerator for this class's `` types and - /// erase the `` parts from the type. Callers must make sure to - /// delete the enumerator when they're done with it. - void newEnumerator(SparseTensorEnumeratorBase **out, uint64_t trgRank, - const uint64_t *trgSizes, uint64_t srcRank, - const uint64_t *src2trg) const final { - assert(out && "Received nullptr for out parameter"); - *out = new SparseTensorEnumerator(*this, trgRank, trgSizes, - srcRank, src2trg); - } - /// Allocates a new COO object and initializes it with the contents /// of this tensor under the given mapping from the `getDimSizes()` /// coordinate-space to the `trgSizes` coordinate-space. Callers must @@ -472,7 +419,6 @@ class SparseTensorStorage final : public SparseTensorStorageBase { uint64_t srcRank, const uint64_t *src2trg, // FIXME: dim2lvl const uint64_t *lvl2dim) const { - // We inline `newEnumerator` to avoid virtual dispatch and allocation. // TODO: use MapRef here too for the translation SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, srcRank, src2trg); @@ -584,7 +530,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { /// does not check that `crd` is semantically valid (i.e., in bounds /// for `dimSizes[lvl]` and not elsewhere occurring in the same segment). void writeCrd(uint64_t lvl, uint64_t pos, uint64_t crd) { - ASSERT_COMPRESSED_OR_SINGLETON_LVL(lvl); + assert(isCompressedDLT(getLvlType(lvl)) || isSingletonDLT(getLvlType(lvl))); // Subscript assignment to `std::vector` requires that the `pos`-th // entry has been initialized; thus we must be sure to check `size()` // here, instead of `capacity()` as would be ideal. @@ -735,8 +681,6 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorCOO *lvlCOO; // COO used during forwarding }; -#undef ASSERT_COMPRESSED_OR_SINGLETON_LVL - //===----------------------------------------------------------------------===// // // SparseTensorEnumerator @@ -905,83 +849,6 @@ class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { } }; -//===----------------------------------------------------------------------===// -// -// SparseTensorNNZ -// -//===----------------------------------------------------------------------===// - -/// Statistics regarding the number of nonzero subtensors in -/// a source tensor, for direct sparse=>sparse conversion a la -/// . -/// -/// N.B., this class stores references to the parameters passed to -/// the constructor; thus, objects of this class must not outlive -/// those parameters. -/// -/// This class does not have the "dimension" vs "level" distinction, but -/// since it is used for initializing the levels of a `SparseTensorStorage` -/// object, we use the "level" name throughout for the sake of consistency. -class SparseTensorNNZ final { -public: - /// Allocates the statistics structure for the desired target-tensor - /// level structure (i.e., sizes and types). This constructor does not - /// actually populate the statistics, however; for that see `initialize`. - /// - /// Precondition: `lvlSizes` must not contain zeros. - /// Asserts: `lvlSizes.size() == lvlTypes.size()`. - SparseTensorNNZ(const std::vector &lvlSizes, - const std::vector &lvlTypes); - - // We disallow copying to help avoid leaking the stored references. - SparseTensorNNZ(const SparseTensorNNZ &) = delete; - SparseTensorNNZ &operator=(const SparseTensorNNZ &) = delete; - - /// Gets the target-tensor's level-rank. - uint64_t getLvlRank() const { return lvlSizes.size(); } - - /// Enumerates the source tensor to fill in the statistics. - /// The enumerator should already incorporate the mapping from - /// the source tensor-dimensions to the target storage-levels. - /// - /// Asserts: - /// * `enumerator.getTrgRank() == getLvlRank()`. - /// * `enumerator.getTrgSizes() == lvlSizes`. - template - void initialize(SparseTensorEnumeratorBase &enumerator) { - assert(enumerator.getTrgRank() == getLvlRank() && "Tensor rank mismatch"); - assert(enumerator.getTrgSizes() == lvlSizes && "Tensor size mismatch"); - enumerator.forallElements( - [this](const std::vector &lvlCoords, V) { add(lvlCoords); }); - } - - /// The type of callback functions which receive an nnz-statistic. - using NNZConsumer = const std::function &; - - /// Lexicographically enumerates all coordinates for levels strictly - /// less than `stopLvl`, and passes their nnz statistic to the callback. - /// Since our use-case only requires the statistic not the coordinates - /// themselves, we do not bother to construct those coordinates. - void forallCoords(uint64_t stopLvl, NNZConsumer yield) const; - -private: - /// Adds a new element (i.e., increment its statistics). We use - /// a method rather than inlining into the lambda in `initialize`, - /// to avoid spurious templating over `V`. And this method is private - /// to avoid needing to re-assert validity of `lvlCoords` (which is - /// guaranteed by `forallElements`). - void add(const std::vector &lvlCoords); - - /// Recursive component of the public `forallCoords`. - void forallCoords(NNZConsumer yield, uint64_t stopLvl, uint64_t parentPos, - uint64_t l) const; - - // All of these are in the target storage-order. - const std::vector &lvlSizes; - const std::vector &lvlTypes; - std::vector> nnz; -}; - //===----------------------------------------------------------------------===// // // SparseTensorStorage Factories @@ -1025,33 +892,6 @@ SparseTensorStorage *SparseTensorStorage::newFromCOO( lvlTypes, dim2lvl, lvl2dim, lvlCOO); } -template -SparseTensorStorage *SparseTensorStorage::newFromSparseTensor( - uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank, - const uint64_t *lvlSizes, const DimLevelType *lvlTypes, - const uint64_t *src2lvl, // dim2lvl - const uint64_t *lvl2dim, uint64_t srcRank, - const SparseTensorStorageBase &source) { - // Verify that the `source` dimensions match the expected `dimShape`. - assert(dimShape && "Got nullptr for dimension shape"); - assert(dimRank == source.getDimRank() && "Dimension-rank mismatch"); - const auto &dimSizes = source.getDimSizes(); -#ifndef NDEBUG - for (uint64_t d = 0; d < dimRank; ++d) { - const uint64_t sz = dimShape[d]; - assert((sz == 0 || sz == dimSizes[d]) && - "Dimension-sizes do not match expected shape"); - } -#endif - SparseTensorEnumeratorBase *lvlEnumerator; - source.newEnumerator(&lvlEnumerator, lvlRank, lvlSizes, srcRank, src2lvl); - auto *tensor = new SparseTensorStorage(dimRank, dimSizes.data(), - lvlRank, lvlTypes, src2lvl, - lvl2dim, *lvlEnumerator); - delete lvlEnumerator; - return tensor; -} - template SparseTensorStorage *SparseTensorStorage::packFromLvlBuffers( uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank, @@ -1128,106 +968,6 @@ SparseTensorStorage::SparseTensorStorage( // NOLINT fromCOO(elements, 0, nse, 0); } -template -SparseTensorStorage::SparseTensorStorage( - uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, - const DimLevelType *lvlTypes, const uint64_t *dim2lvl, - const uint64_t *lvl2dim, SparseTensorEnumeratorBase &lvlEnumerator) - : SparseTensorStorage(dimRank, dimSizes, lvlRank, - lvlEnumerator.getTrgSizes().data(), lvlTypes, dim2lvl, - lvl2dim) { - assert(lvlRank == lvlEnumerator.getTrgRank() && "Level-rank mismatch"); - { - // Initialize the statistics structure. - SparseTensorNNZ nnz(getLvlSizes(), getLvlTypes()); - nnz.initialize(lvlEnumerator); - // Initialize "positions" overhead (and allocate "coordinates", "values"). - uint64_t parentSz = 1; // assembled-size of the `(l - 1)`-level. - for (uint64_t l = 0; l < lvlRank; ++l) { - const auto dlt = lvlTypes[l]; // Avoid redundant bounds checking. - if (isCompressedDLT(dlt)) { - positions[l].reserve(parentSz + 1); - positions[l].push_back(0); - uint64_t currentPos = 0; - nnz.forallCoords(l, [this, ¤tPos, l](uint64_t n) { - currentPos += n; - appendPos(l, currentPos); - }); - assert(positions[l].size() == parentSz + 1 && - "Final positions size doesn't match allocated size"); - // That assertion entails `assembledSize(parentSz, l)` - // is now in a valid state. That is, `positions[l][parentSz]` - // equals the present value of `currentPos`, which is the - // correct assembled-size for `coordinates[l]`. - } - // Update assembled-size for the next iteration. - parentSz = assembledSize(parentSz, l); - // Ideally we need only `coordinates[l].reserve(parentSz)`, however - // the `std::vector` implementation forces us to initialize it too. - // That is, in the yieldPos loop we need random-access assignment - // to `coordinates[l]`; however, `std::vector`'s subscript-assignment - // only allows assigning to already-initialized positions. - if (isCompressedDLT(dlt) || isSingletonDLT(dlt)) - coordinates[l].resize(parentSz, 0); - else - assert(isDenseDLT(dlt)); - } - values.resize(parentSz, 0); // Both allocate and zero-initialize. - } - // The yieldPos loop - lvlEnumerator.forallElements([this](const auto &lvlCoords, V val) { - uint64_t parentSz = 1, parentPos = 0; - for (uint64_t lvlRank = getLvlRank(), l = 0; l < lvlRank; ++l) { - const auto dlt = getLvlTypes()[l]; // Avoid redundant bounds checking. - if (isCompressedDLT(dlt)) { - // If `parentPos == parentSz` then it's valid as an array-lookup; - // however, it's semantically invalid here since that entry - // does not represent a segment of `coordinates[l]`. Moreover, that - // entry must be immutable for `assembledSize` to remain valid. - assert(parentPos < parentSz); - const uint64_t currentPos = positions[l][parentPos]; - // This increment won't overflow the `P` type, since it can't - // exceed the original value of `positions[l][parentPos+1]` - // which was already verified to be within bounds for `P` - // when it was written to the array. - positions[l][parentPos]++; - writeCrd(l, currentPos, lvlCoords[l]); - parentPos = currentPos; - } else if (isSingletonDLT(dlt)) { - writeCrd(l, parentPos, lvlCoords[l]); - // the new parentPos equals the old parentPos. - } else { // Dense level. - assert(isDenseDLT(dlt)); - parentPos = parentPos * getLvlSizes()[l] + lvlCoords[l]; - } - parentSz = assembledSize(parentSz, l); - } - assert(parentPos < values.size()); - values[parentPos] = val; - }); - // The finalizeYieldPos loop - for (uint64_t parentSz = 1, l = 0; l < lvlRank; ++l) { - const auto dlt = lvlTypes[l]; // Avoid redundant bounds checking. - if (isCompressedDLT(dlt)) { - assert(parentSz == positions[l].size() - 1 && - "Actual positions size doesn't match the expected size"); - // Can't check all of them, but at least we can check the last one. - assert(positions[l][parentSz - 1] == positions[l][parentSz] && - "Positions got corrupted"); - for (uint64_t n = 0; n < parentSz; ++n) { - const uint64_t parentPos = parentSz - n; - positions[l][parentPos] = positions[l][parentPos - 1]; - } - positions[l][0] = 0; - } else { - // Both dense and singleton are no-ops for the finalizeYieldPos loop. - // This assertion is for future-proofing. - assert((isDenseDLT(dlt) || isSingletonDLT(dlt))); - } - parentSz = assembledSize(parentSz, l); - } -} - template SparseTensorStorage::SparseTensorStorage( uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index a470afc2f0c8c..8955b79f09197 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -47,7 +47,6 @@ extern "C" { /// kEmpty - STS, empty /// kEmptyForward - STS, empty, with forwarding COO /// kFromCOO COO STS, copied from the COO source -/// kSparseToSparse STS STS, copied from the STS source /// kToCOO STS COO, copied from the STS source /// kPack buffers STS, from level buffers /// kSortCOOInPlace STS STS, sorted in place diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt index c48af17b2d94b..15024b2475b91 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -8,7 +8,6 @@ add_mlir_library(MLIRSparseTensorRuntime File.cpp MapRef.cpp - NNZ.cpp Storage.cpp EXCLUDE_FROM_LIBMLIR diff --git a/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp b/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp deleted file mode 100644 index d3c3951c15468..0000000000000 --- a/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp +++ /dev/null @@ -1,79 +0,0 @@ -//===- NNZ.cpp - NNZ-statistics for direct sparse2sparse conversion -------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains method definitions for `SparseTensorNNZ`. -// -//===----------------------------------------------------------------------===// - -#include "mlir/ExecutionEngine/SparseTensor/Storage.h" - -using namespace mlir::sparse_tensor; - -SparseTensorNNZ::SparseTensorNNZ(const std::vector &lvlSizes, - const std::vector &lvlTypes) - : lvlSizes(lvlSizes), lvlTypes(lvlTypes), nnz(getLvlRank()) { - assert(lvlSizes.size() == lvlTypes.size() && "Rank mismatch"); - bool alreadyCompressed = false; - (void)alreadyCompressed; - uint64_t sz = 1; // the product of all `lvlSizes` strictly less than `l`. - for (uint64_t l = 0, lvlrank = getLvlRank(); l < lvlrank; ++l) { - const DimLevelType dlt = lvlTypes[l]; - if (isCompressedDLT(dlt)) { - if (alreadyCompressed) - MLIR_SPARSETENSOR_FATAL( - "Multiple compressed levels not currently supported"); - alreadyCompressed = true; - nnz[l].resize(sz, 0); // Both allocate and zero-initialize. - } else if (isDenseDLT(dlt)) { - if (alreadyCompressed) - MLIR_SPARSETENSOR_FATAL( - "Dense after compressed not currently supported"); - } else if (isSingletonDLT(dlt)) { - // Singleton after Compressed causes no problems for allocating - // `nnz` nor for the yieldPos loop. This remains true even - // when adding support for multiple compressed dimensions or - // for dense-after-compressed. - } else { - MLIR_SPARSETENSOR_FATAL("unsupported level type: %d\n", - static_cast(dlt)); - } - sz = detail::checkedMul(sz, lvlSizes[l]); - } -} - -void SparseTensorNNZ::forallCoords(uint64_t stopLvl, - SparseTensorNNZ::NNZConsumer yield) const { - assert(stopLvl < getLvlRank() && "Level out of bounds"); - assert(isCompressedDLT(lvlTypes[stopLvl]) && - "Cannot look up non-compressed levels"); - forallCoords(yield, stopLvl, 0, 0); -} - -void SparseTensorNNZ::add(const std::vector &lvlCoords) { - uint64_t parentPos = 0; - for (uint64_t l = 0, lvlrank = getLvlRank(); l < lvlrank; ++l) { - if (isCompressedDLT(lvlTypes[l])) - nnz[l][parentPos]++; - parentPos = parentPos * lvlSizes[l] + lvlCoords[l]; - } -} - -void SparseTensorNNZ::forallCoords(SparseTensorNNZ::NNZConsumer yield, - uint64_t stopLvl, uint64_t parentPos, - uint64_t l) const { - assert(l <= stopLvl); - if (l == stopLvl) { - assert(parentPos < nnz[l].size() && "Cursor is out of range"); - yield(nnz[l][parentPos]); - } else { - const uint64_t sz = lvlSizes[l]; - const uint64_t pstart = parentPos * sz; - for (uint64_t i = 0; i < sz; ++i) - forallCoords(yield, stopLvl, pstart + i, l + 1); - } -} diff --git a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp index 050dff2da1fa4..f5890ebb6f3ff 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp @@ -44,21 +44,10 @@ SparseTensorStorageBase::SparseTensorStorageBase( // NOLINT } } -// Helper macro for generating error messages when some -// `SparseTensorStorage` is cast to `SparseTensorStorageBase` -// and then the wrong "partial method specialization" is called. +// Helper macro for wrong "partial method specialization" errors. #define FATAL_PIV(NAME) \ MLIR_SPARSETENSOR_FATAL(" type mismatch for: " #NAME); -#define IMPL_NEWENUMERATOR(VNAME, V) \ - void SparseTensorStorageBase::newEnumerator( \ - SparseTensorEnumeratorBase **, uint64_t, const uint64_t *, uint64_t, \ - const uint64_t *) const { \ - FATAL_PIV("newEnumerator" #VNAME); \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_NEWENUMERATOR) -#undef IMPL_NEWENUMERATOR - #define IMPL_GETPOSITIONS(PNAME, P) \ void SparseTensorStorageBase::getPositions(std::vector

**, uint64_t) { \ FATAL_PIV("getPositions" #PNAME); \ diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 74ab65c143d63..6a4c0f292c5f8 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -131,13 +131,6 @@ extern "C" { return SparseTensorStorage::newFromCOO( \ dimRank, dimSizes, lvlRank, lvlTypes, dim2lvl, lvl2dim, coo); \ } \ - case Action::kSparseToSparse: { \ - assert(ptr && "Received nullptr for SparseTensorStorage object"); \ - auto &tensor = *static_cast(ptr); \ - return SparseTensorStorage::newFromSparseTensor( \ - dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim, \ - dimRank, tensor); \ - } \ case Action::kFromReader: { \ assert(ptr && "Received nullptr for SparseTensorReader object"); \ SparseTensorReader &reader = *static_cast(ptr); \ diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 63f9cdafce88b..09cf01e73ed8c 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -8795,7 +8795,6 @@ cc_library( srcs = [ "lib/ExecutionEngine/SparseTensor/File.cpp", "lib/ExecutionEngine/SparseTensor/MapRef.cpp", - "lib/ExecutionEngine/SparseTensor/NNZ.cpp", "lib/ExecutionEngine/SparseTensor/Storage.cpp", ], hdrs = [ From c007e0f66ee3f96467fd12f6200218fb4c38c2c9 Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Mon, 16 Oct 2023 14:55:57 -0700 Subject: [PATCH 273/720] [hwasan][test] Fix regex so deep-recursion.c is unsupported on aarch64 targets (#69254) After 144c5b6d58803a2d4a0fe92a0fe331ff0347dc3b, we still see this test running in CI for aarch64-linux targets. This appears to be related to the triple being `aarch64-unknown-linux-gnu`, or similar. The bot link below includes 144c5b6d58803a2d4a0fe92a0fe331ff0347dc3b, and fails the deep-recursion.c test, which should have been disabled. https://luci-milo.appspot.com/ui/p/fuchsia/builders/toolchain.ci/clang-linux-arm64/b8767065085790662609/overview --- compiler-rt/test/hwasan/TestCases/deep-recursion.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/test/hwasan/TestCases/deep-recursion.c b/compiler-rt/test/hwasan/TestCases/deep-recursion.c index 39902d072a0d3..bf390d051d472 100644 --- a/compiler-rt/test/hwasan/TestCases/deep-recursion.c +++ b/compiler-rt/test/hwasan/TestCases/deep-recursion.c @@ -18,7 +18,7 @@ // XFAIL: target=x86_64{{.*}} // Flaky on AArch64 Linux, see https://github.com/llvm/llvm-project/issues/69221. -// UNSUPPORTED: target=aarch64-linux{{.*}} +// UNSUPPORTED: target=aarch64{{.*}} #include // At least -O1 is needed for this function to not have a stack frame on From dda46b2e795cb12bc6799e0508d67b4dc72a8469 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Mon, 16 Oct 2023 15:02:30 -0700 Subject: [PATCH 274/720] [docs] Add a new GlobalISel office hours session to the list. --- llvm/docs/GettingInvolved.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/docs/GettingInvolved.rst b/llvm/docs/GettingInvolved.rst index 4ffdcfa2b6d8a..75e7608e1700f 100644 --- a/llvm/docs/GettingInvolved.rst +++ b/llvm/docs/GettingInvolved.rst @@ -318,6 +318,11 @@ don't find anyone present, chances are they happen to be off that day. - Monthly, 3rd Wednesday of the month at 8:30am Beijing time, for 30 minutes. - `MS Teams `__ - English, Chinese + * - Amara Emerson + - GlobalISel questions. + - Monthly, 4th Wednesday of the month at 9:30am PT, for 30 minutes. + - `GoogleMeet Date: Mon, 16 Oct 2023 15:32:27 -0700 Subject: [PATCH 275/720] [docs] Fix google meet link --- llvm/docs/GettingInvolved.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/GettingInvolved.rst b/llvm/docs/GettingInvolved.rst index 75e7608e1700f..9ace5b7cbbdaa 100644 --- a/llvm/docs/GettingInvolved.rst +++ b/llvm/docs/GettingInvolved.rst @@ -321,7 +321,7 @@ don't find anyone present, chances are they happen to be off that day. * - Amara Emerson - GlobalISel questions. - Monthly, 4th Wednesday of the month at 9:30am PT, for 30 minutes. - - `GoogleMeet `__ - English From 39f4ec5854a1ca34c70343c3ed1648a6be5b6b82 Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Mon, 16 Oct 2023 15:40:13 -0700 Subject: [PATCH 276/720] [flang] Catch a dangerous ambiguity in standard Fortran (#67483) Fortran allows forward references to type names, which can lead to ambiguity when coupled with host association, as in: module m type ambiguous; integer n; end type contains subroutine s type(ambiguous), pointer :: variable type t type(ambiguous), pointer :: component end type type ambiguous; real x; end type end end Some other compilers resolve to a host association, some resolve to a forward reference. This compiler will now emit an error. --- flang/docs/Extensions.md | 15 +++++++++++++++ flang/lib/Semantics/resolve-names.cpp | 7 ++++++- flang/test/Semantics/resolve29.f90 | 23 +++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md index 480039911719c..373f18e1e2284 100644 --- a/flang/docs/Extensions.md +++ b/flang/docs/Extensions.md @@ -613,6 +613,21 @@ end module associated objects and do not elicit errors about improper redeclarations of implicitly typed entities. +* Standard Fortran allows forward references to derived types, which + can lead to ambiguity when combined with host association. + Some Fortran compilers resolve the type name to the host type, + others to the forward-referenced local type; this compiler diagnoses + an error. +``` +module m + type ambiguous; integer n; end type + contains + subroutine s + type(ambiguous), pointer :: ptr + type ambiguous; real a; end type + end +end +``` ## De Facto Standard Features diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index b4deac9cf5ccd..90c14806afbf8 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -6429,6 +6429,11 @@ std::optional DeclarationVisitor::ResolveDerivedType( Say(name, "Derived type '%s' not found"_err_en_US); return std::nullopt; } + } else if (&DEREF(symbol).owner() != &outer && + !ultimate->has()) { + // Prevent a later declaration in this scope of a host-associated + // type name. + outer.add_importName(name.source); } if (CheckUseError(name)) { return std::nullopt; @@ -8096,7 +8101,7 @@ void ResolveNamesVisitor::CheckImport( const Symbol &ultimate{symbol->GetUltimate()}; if (&ultimate.owner() == &currScope()) { Say(location, "'%s' from host is not accessible"_err_en_US, name) - .Attach(symbol->name(), "'%s' is hidden by this entity"_en_US, + .Attach(symbol->name(), "'%s' is hidden by this entity"_because_en_US, symbol->name()); } } diff --git a/flang/test/Semantics/resolve29.f90 b/flang/test/Semantics/resolve29.f90 index ea4642c1b3ddc..3e6a8a0ba6976 100644 --- a/flang/test/Semantics/resolve29.f90 +++ b/flang/test/Semantics/resolve29.f90 @@ -9,6 +9,7 @@ subroutine s1(x) !ERROR: 't1' from host is not accessible import :: t1 type(t1) :: x + !BECAUSE: 't1' is hidden by this entity integer :: t1 end subroutine subroutine s2() @@ -24,6 +25,7 @@ subroutine s4(x, y) import, all type(t1) :: x type(t3) :: y + !BECAUSE: 't3' is hidden by this entity integer :: t3 end subroutine end interface @@ -41,6 +43,27 @@ subroutine s7() !ERROR: 's5' is an external procedure without the EXTERNAL attribute in a scope with IMPLICIT NONE(EXTERNAL) call s5() end + subroutine s8() + !This case is a dangerous ambiguity allowed by the standard. + !ERROR: 't1' from host is not accessible + type(t1), pointer :: p + !BECAUSE: 't1' is hidden by this entity + type t1 + integer n(2) + end type + end + subroutine s9() + !This case is a dangerous ambiguity allowed by the standard. + type t2 + !ERROR: 't1' from host is not accessible + type(t1), pointer :: p + end type + !BECAUSE: 't1' is hidden by this entity + type t1 + integer n(2) + end type + type(t2) x + end end module module m2 integer, parameter :: ck = kind('a') From b225934a4b0d2944958a53269665b00e7eae4875 Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Mon, 16 Oct 2023 15:55:33 -0700 Subject: [PATCH 277/720] [flang] Avoid needless overflow when folding NORM2 (#67499) The code that folds the relatively new NORM2 intrinsic function can produce overflow in cases where it's not warranted. Rearrange to NORM2 = M * SQRT((A(:)/M)**2) where M is MAXVAL(ABS(A)). --- flang/lib/Evaluate/fold-real.cpp | 28 ++++++++++++++++++++++------ flang/lib/Evaluate/fold-reduction.h | 2 +- flang/test/Evaluate/fold-norm2.f90 | 13 ++++++++++--- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/flang/lib/Evaluate/fold-real.cpp b/flang/lib/Evaluate/fold-real.cpp index 8e3ab1d8fd30b..6bcc3ec739821 100644 --- a/flang/lib/Evaluate/fold-real.cpp +++ b/flang/lib/Evaluate/fold-real.cpp @@ -52,15 +52,28 @@ template class Norm2Accumulator { const Constant &array, const Constant &maxAbs, Rounding rounding) : array_{array}, maxAbs_{maxAbs}, rounding_{rounding} {}; void operator()(Scalar &element, const ConstantSubscripts &at) { - // Kahan summation of scaled elements + // Kahan summation of scaled elements: + // Naively, + // NORM2(A(:)) = SQRT(SUM(A(:)**2)) + // For any T > 0, we have mathematically + // SQRT(SUM(A(:)**2)) + // = SQRT(T**2 * (SUM(A(:)**2) / T**2)) + // = SQRT(T**2 * SUM(A(:)**2 / T**2)) + // = SQRT(T**2 * SUM((A(:)/T)**2)) + // = SQRT(T**2) * SQRT(SUM((A(:)/T)**2)) + // = T * SQRT(SUM((A(:)/T)**2)) + // By letting T = MAXVAL(ABS(A)), we ensure that + // ALL(ABS(A(:)/T) <= 1), so ALL((A(:)/T)**2 <= 1), and the SUM will + // not overflow unless absolutely necessary. auto scale{maxAbs_.At(maxAbsAt_)}; if (scale.IsZero()) { - // If maxAbs is zero, so are all elements, and result + // Maximum value is zero, and so will the result be. + // Avoid division by zero below. element = scale; } else { auto item{array_.At(at)}; auto scaled{item.Divide(scale).value}; - auto square{item.Multiply(scaled).value}; + auto square{scaled.Multiply(scaled).value}; auto next{square.Add(correction_, rounding_)}; overflow_ |= next.flags.test(RealFlag::Overflow); auto sum{element.Add(next.value, rounding_)}; @@ -73,13 +86,16 @@ template class Norm2Accumulator { } bool overflow() const { return overflow_; } void Done(Scalar &result) { + // result+correction == SUM((data(:)/maxAbs)**2) + // result = maxAbs * SQRT(result+correction) auto corrected{result.Add(correction_, rounding_)}; overflow_ |= corrected.flags.test(RealFlag::Overflow); correction_ = Scalar{}; - auto rescaled{corrected.value.Multiply(maxAbs_.At(maxAbsAt_))}; + auto root{corrected.value.SQRT().value}; + auto product{root.Multiply(maxAbs_.At(maxAbsAt_))}; maxAbs_.IncrementSubscripts(maxAbsAt_); - overflow_ |= rescaled.flags.test(RealFlag::Overflow); - result = rescaled.value.SQRT().value; + overflow_ |= product.flags.test(RealFlag::Overflow); + result = product.value; } private: diff --git a/flang/lib/Evaluate/fold-reduction.h b/flang/lib/Evaluate/fold-reduction.h index cff7f54c60d91..0dd55124e6a51 100644 --- a/flang/lib/Evaluate/fold-reduction.h +++ b/flang/lib/Evaluate/fold-reduction.h @@ -228,7 +228,7 @@ template class MaxvalMinvalAccumulator { test.Rewrite(context_, std::move(test)))}; CHECK(folded.has_value()); if (folded->IsTrue()) { - element = array_.At(at); + element = aAt; } } void Done(Scalar &) const {} diff --git a/flang/test/Evaluate/fold-norm2.f90 b/flang/test/Evaluate/fold-norm2.f90 index 30d5289b5a6e3..370532bafaa13 100644 --- a/flang/test/Evaluate/fold-norm2.f90 +++ b/flang/test/Evaluate/fold-norm2.f90 @@ -17,13 +17,20 @@ module m real(dp), parameter :: a(3,4) = & reshape([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], shape(a)) real(dp), parameter :: nAll = norm2(a) - real(dp), parameter :: check_nAll = sqrt(sum(a * a)) + real(dp), parameter :: check_nAll = 11._dp * sqrt(sum((a/11._dp)**2)) logical, parameter :: test_all = nAll == check_nAll real(dp), parameter :: norms1(4) = norm2(a, dim=1) - real(dp), parameter :: check_norms1(4) = sqrt(sum(a * a, dim=1)) + real(dp), parameter :: check_norms1(4) = [ & + 2.236067977499789805051477742381393909454345703125_8, & + 7.07106781186547550532850436866283416748046875_8, & + 1.2206555615733702069292121450416743755340576171875e1_8, & + 1.7378147196982769884243680280633270740509033203125e1_8 ] logical, parameter :: test_norms1 = all(norms1 == check_norms1) real(dp), parameter :: norms2(3) = norm2(a, dim=2) - real(dp), parameter :: check_norms2(3) = sqrt(sum(a * a, dim=2)) + real(dp), parameter :: check_norms2(3) = [ & + 1.1224972160321822656214862945489585399627685546875e1_8, & + 1.28840987267251261272349438513629138469696044921875e1_8, & + 1.4628738838327791427218471653759479522705078125e1_8 ] logical, parameter :: test_norms2 = all(norms2 == check_norms2) logical, parameter :: test_normZ = norm2([0.,0.,0.]) == 0. end From ff1329e29709477472a93e9ce975f166f75999a3 Mon Sep 17 00:00:00 2001 From: Kirill Stoimenov <87100199+kstoimenov@users.noreply.github.com> Date: Mon, 16 Oct 2023 16:09:44 -0700 Subject: [PATCH 278/720] [HWASAN] Add bcmp interceptor (#69257) --- .../lib/hwasan/hwasan_platform_interceptors.h | 4 +-- compiler-rt/test/hwasan/TestCases/bcmp.cpp | 27 +++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 compiler-rt/test/hwasan/TestCases/bcmp.cpp diff --git a/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h b/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h index 390c9d80c38ed..86d26b5ac12d4 100644 --- a/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h +++ b/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h @@ -68,8 +68,8 @@ // #undef SANITIZER_INTERCEPT_MEMCMP // #define SANITIZER_INTERCEPT_MEMCMP 0 -#undef SANITIZER_INTERCEPT_BCMP -#define SANITIZER_INTERCEPT_BCMP 0 +// #undef SANITIZER_INTERCEPT_BCMP +// #define SANITIZER_INTERCEPT_BCMP 0 #undef SANITIZER_INTERCEPT_STRNDUP #define SANITIZER_INTERCEPT_STRNDUP 0 diff --git a/compiler-rt/test/hwasan/TestCases/bcmp.cpp b/compiler-rt/test/hwasan/TestCases/bcmp.cpp new file mode 100644 index 0000000000000..3dee4b8490efc --- /dev/null +++ b/compiler-rt/test/hwasan/TestCases/bcmp.cpp @@ -0,0 +1,27 @@ +// RUN: %clangxx_hwasan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_hwasan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_hwasan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_hwasan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s + +#include +#include +#include +#include + +int main(int argc, char **argv) { + __hwasan_enable_allocator_tagging(); + char a[] = {static_cast(argc), 2, 3, 4}; + int size = sizeof(a); + char *p = (char *)malloc(size); + memcpy(p, a, size); + free(p); + return bcmp(p, a, size); + // CHECK: HWAddressSanitizer: tag-mismatch on address + // CHECK: READ of size 4 + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}bcmp.cpp:[[@LINE-3]] + // CHECK: Cause: use-after-free + // CHECK: freed by thread + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}bcmp.cpp:[[@LINE-7]] + // CHECK: previously allocated by thread + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}bcmp.cpp:[[@LINE-11]] +} From af972f01c01843a9ffe41ff496154267fa387a51 Mon Sep 17 00:00:00 2001 From: Tai Ly Date: Mon, 16 Oct 2023 18:10:17 -0500 Subject: [PATCH 279/720] [TOSA] Add StatefulOps to TOSA Dialect (#66843) This patch adds tosa.variable, tosa.variable.read and tosa.variable.write operators and tests. Change-Id: I647e2e5c3762d7890b03f6aa7c09a29198b7d355 --------- Signed-off-by: Jerry Ge Co-authored-by: Jerry Ge --- .../Conversion/TosaToLinalg/TosaToLinalg.h | 4 +- mlir/include/mlir/Dialect/Tosa/IR/TosaOps.h | 5 + .../mlir/Dialect/Tosa/IR/TosaUtilOps.td | 67 ++++++++++++++ .../mlir/Dialect/Tosa/Transforms/Passes.h | 3 - .../mlir/Dialect/Tosa/Transforms/Passes.td | 3 +- .../TosaToLinalg/TosaToLinalgPass.cpp | 5 +- mlir/lib/Dialect/Tosa/IR/TosaOps.cpp | 43 +++++++++ .../Tosa/Transforms/TosaValidation.cpp | 92 +++++++++++++++++-- mlir/test/Dialect/Tosa/invalid.mlir | 45 +++++++++ mlir/test/Dialect/Tosa/variables.mlir | 33 +++++++ 10 files changed, 281 insertions(+), 19 deletions(-) create mode 100644 mlir/test/Dialect/Tosa/variables.mlir diff --git a/mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h b/mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h index d8d4027500f99..c411010603ac6 100644 --- a/mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h +++ b/mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h @@ -35,8 +35,8 @@ std::unique_ptr createTosaToLinalgNamed(); void addTosaToLinalgPasses( OpPassManager &pm, const TosaToLinalgOptions &options, // Note: Default to 'none' level unless otherwise specified. - tosa::ValidationOptions const &validationOptions = - tosa::ValidationOptions().setLevel(tosa::TosaLevelEnum::None)); + tosa::TosaValidationOptions const &validationOptions = { + tosa::TosaProfileEnum::Undefined, false, tosa::TosaLevelEnum::None}); /// Populates conversion passes from TOSA dialect to Linalg dialect. void populateTosaToLinalgConversionPatterns(RewritePatternSet *patterns); diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.h b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.h index 555d9bea18ba4..a9bc3351f4cff 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.h +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.h @@ -34,6 +34,11 @@ class PatternRewriter; namespace tosa { +ParseResult parseTypeOrAttr(OpAsmParser &parser, TypeAttr &typeAttr, + Attribute &attr); +void printTypeOrAttr(OpAsmPrinter &p, Operation *op, TypeAttr type, + Attribute attr); + #include "mlir/Dialect/Tosa/IR/TosaInterfaces.h.inc" } // namespace tosa diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaUtilOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaUtilOps.td index d75f5dffa8716..f9f25da1b649d 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaUtilOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaUtilOps.td @@ -79,4 +79,71 @@ def Tosa_YieldOp : Tosa_Op<"yield", [ let assemblyFormat = "$inputs attr-dict `:` type($inputs)"; } +//===----------------------------------------------------------------------===// +// Operator: variable +//===----------------------------------------------------------------------===// +def Tosa_VariableOp : Tosa_Op<"variable", []> { + let summary = "Defines a variable"; + + let description = [{ + Defines a new TOSA variable. This is a mutable value. + Modifications are expressed using read/write semantics. + }]; + + let arguments = (ins + SymbolNameAttr:$name, + TypeAttr:$type, + OptionalAttr:$initial_value + ); + + let assemblyFormat = [{ + $name + attr-dict + custom($type, $initial_value) + }]; +} + +//===----------------------------------------------------------------------===// +// Operator: variable.write +//===----------------------------------------------------------------------===// +def Tosa_VariableWriteOp : Tosa_Op<"variable.write", []> { + let summary = "write_buffer operator"; + + let description = [{ + Assigns a value to pseudo-buffer resource holding a mutable tensor. + }]; + + let arguments = (ins + SymbolNameAttr:$name, + AnyType:$value + ); + + let assemblyFormat = [{ + $name attr-dict `,` $value `:` type($value) + }]; +} + +//===----------------------------------------------------------------------===// +// Operator: variable.read +//===----------------------------------------------------------------------===// +def Tosa_VariableReadOp : Tosa_Op<"variable.read", []> { + let summary = "read_buffer operator"; + + let description = [{ + Reads the value from a pseudo-buffer resource holding a mutable tensor. + }]; + + let arguments = (ins + SymbolNameAttr:$name + ); + + let results = (outs + AnyType:$value + ); + + let assemblyFormat = [{ + $name attr-dict `:` type($value) + }]; +} + #endif // TOSA_UTIL_OPS diff --git a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h index 940aed107e2f9..fbfc56dfe2cf4 100644 --- a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h @@ -68,9 +68,6 @@ struct ValidationOptions { } }; -std::unique_ptr createTosaValidationPass( - ValidationOptions const &options = ValidationOptions()); - #define GEN_PASS_REGISTRATION #include "mlir/Dialect/Tosa/Transforms/Passes.h.inc" diff --git a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td index ac100a6d75c7c..a0f670de20150 100644 --- a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td @@ -89,13 +89,12 @@ def TosaLevelType : I32EnumAttr<"TosaLevelEnum", "Tosa level", let cppNamespace = "mlir::tosa"; } -def TosaValidation : Pass<"tosa-validate", "func::FuncOp"> { +def TosaValidation : Pass<"tosa-validate", "mlir::ModuleOp"> { let summary = "Validates TOSA dialect"; let description = [{ This pass validates if input TOSA operations match the specification for given criteria, e.g. TOSA profile. }]; - let constructor = "createTosaValidationPass()"; let options = [ Option<"profile", "profile", "mlir::tosa::TosaProfileEnum", diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp index 718e34ced8d7e..3c54f85b033b0 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp @@ -76,7 +76,7 @@ std::unique_ptr mlir::tosa::createTosaToLinalg() { void mlir::tosa::addTosaToLinalgPasses( OpPassManager &pm, const TosaToLinalgOptions &options, - tosa::ValidationOptions const &validationOptions) { + tosa::TosaValidationOptions const &validationOptions) { // Optional decompositions are designed to benefit linalg. if (!options.disableTosaDecompositions) pm.addNestedPass(tosa::createTosaOptionalDecompositions()); @@ -90,7 +90,6 @@ void mlir::tosa::addTosaToLinalgPasses( pm.addNestedPass(tosa::createTosaLayerwiseConstantFoldPass( {options.aggressiveReduceConstant})); pm.addNestedPass(tosa::createTosaMakeBroadcastablePass()); - pm.addNestedPass( - tosa::createTosaValidationPass(validationOptions)); + pm.addNestedPass(tosa::createTosaValidation(validationOptions)); pm.addNestedPass(tosa::createTosaToLinalg()); } diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index 6db04fe38bcd3..ff34183f9a030 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -146,6 +146,49 @@ Operation *TosaDialect::materializeConstant(OpBuilder &builder, Attribute value, return nullptr; } +//===----------------------------------------------------------------------===// +// Parsers and printers +//===----------------------------------------------------------------------===// + +ParseResult mlir::tosa::parseTypeOrAttr(OpAsmParser &parser, TypeAttr &typeAttr, + Attribute &attr) { + if (succeeded(parser.parseOptionalEqual())) { + if (failed(parser.parseAttribute(attr))) { + return parser.emitError(parser.getCurrentLocation()) + << "expected attribute"; + } + if (auto typedAttr = attr.dyn_cast()) { + typeAttr = TypeAttr::get(typedAttr.getType()); + } + return success(); + } + + Type type; + if (failed(parser.parseColonType(type))) { + return parser.emitError(parser.getCurrentLocation()) << "expected type"; + } + typeAttr = TypeAttr::get(type); + + return success(); +} + +void mlir::tosa::printTypeOrAttr(OpAsmPrinter &p, Operation *op, TypeAttr type, + Attribute attr) { + bool needsSpace = false; + auto typedAttr = attr.dyn_cast_or_null(); + if (!typedAttr || typedAttr.getType() != type.getValue()) { + p << ": "; + p.printAttribute(type); + needsSpace = true; // subsequent attr value needs a space separator + } + if (attr) { + if (needsSpace) + p << ' '; + p << "= "; + p.printAttribute(attr); + } +} + //===----------------------------------------------------------------------===// // TOSA Operator Verifiers. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp index 52885e69c3924..d686ce125c135 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp @@ -14,6 +14,9 @@ #include "mlir/Dialect/Tosa/Transforms/Passes.h" #include "mlir/Dialect/Tosa/Transforms/PassesEnums.cpp.inc" +#include +#include + #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/IR/Builders.h" @@ -96,12 +99,13 @@ static constexpr tosa_level_t TOSA_LEVEL_NONE = {0, 0, 0, 0}; struct TosaValidation : public tosa::impl::TosaValidationBase { public: explicit TosaValidation() { populateConstantOperandChecks(); } - explicit TosaValidation(const ValidationOptions &options) : TosaValidation() { + explicit TosaValidation(const TosaValidationOptions &options) + : TosaValidation() { this->profile = options.profile; - this->StrictOperationSpecAlignment = options.strictOperationSpecAlignment; + this->StrictOperationSpecAlignment = options.StrictOperationSpecAlignment; this->level = options.level; } - void runOnOperation() override; + void runOnOperation() final; LogicalResult applyConstantOperandCheck(Operation *op) { for (auto &checker : const_checkers) { @@ -113,6 +117,9 @@ struct TosaValidation : public tosa::impl::TosaValidationBase { LogicalResult applyLevelCheck(Operation *op); + // check variable read/write data types against variable declarations + LogicalResult applyVariableCheck(Operation *op); + private: void populateConstantOperandChecks() { const_checkers.emplace_back(checkConstantOperandPad); @@ -398,8 +405,12 @@ struct TosaValidation : public tosa::impl::TosaValidationBase { } } + bool CheckVariable(Operation *op); + bool CheckVariableReadOrWrite(Operation *op); + SmallVector> const_checkers; tosa_level_t tosa_level; + DenseMap variables_map; }; LogicalResult TosaValidation::applyLevelCheck(Operation *op) { @@ -427,6 +438,69 @@ LogicalResult TosaValidation::applyLevelCheck(Operation *op) { return success(); } +inline bool CompatibleTypes(const mlir::Type &type, + const mlir::Type &declared_type) { + // for now, simply use type equality comparison + return type == declared_type; +} + +bool TosaValidation::CheckVariable(Operation *op) { + if (isa(op)) { + auto name_attr = cast(op->getAttr("name")); + + if (variables_map.count(&name_attr)) { + op->emitOpError() << "name has already been declared"; + return false; + } + + auto type_attr = cast(op->getAttr("type")); + mlir::Type type = type_attr.getValue(); + + variables_map[&name_attr] = type; + } + + return true; +} + +bool TosaValidation::CheckVariableReadOrWrite(Operation *op) { + if (isa(op) || + isa(op)) { + auto name_attr = cast(op->getAttr("name")); + + if (!variables_map.count(&name_attr)) { + op->emitOpError() << "name has not been declared"; + return false; + } + + auto var_type = variables_map[&name_attr]; + + for (auto v : op->getOperands()) { + auto type = v.getType(); + if (!CompatibleTypes(type, var_type)) { + op->emitOpError() << "operand type does not equal variable type"; + return false; + } + } + + for (auto v : op->getResults()) { + auto type = v.getType(); + if (!CompatibleTypes(type, var_type)) { + op->emitOpError() << "result type does not equal variable type"; + return false; + } + } + } + + return true; +} + +LogicalResult TosaValidation::applyVariableCheck(Operation *op) { + if (!CheckVariable(op) || !CheckVariableReadOrWrite(op)) { + return failure(); + } + return success(); +} + void TosaValidation::runOnOperation() { configLevelAndProfile(); getOperation().walk([&](Operation *op) { @@ -440,18 +514,18 @@ void TosaValidation::runOnOperation() { } } - // Some uses of TOSA rely on the constant operands of particular operations. + // Some uses of TOSA rely on the constant operands of particular + // operations. if (StrictOperationSpecAlignment && failed(applyConstantOperandCheck(op))) signalPassFailure(); // do level checks if (failed(applyLevelCheck(op))) signalPassFailure(); + + // do variable type checks + if (failed(applyVariableCheck(op))) + signalPassFailure(); }); } } // namespace - -std::unique_ptr -mlir::tosa::createTosaValidationPass(ValidationOptions const &options) { - return std::make_unique(options); -} diff --git a/mlir/test/Dialect/Tosa/invalid.mlir b/mlir/test/Dialect/Tosa/invalid.mlir index 7c58bb10b9c5e..9233662e88db9 100644 --- a/mlir/test/Dialect/Tosa/invalid.mlir +++ b/mlir/test/Dialect/Tosa/invalid.mlir @@ -203,3 +203,48 @@ func.func @test_avg_pool2d_zero_dim_input(%arg0: tensor<1x0x?x9xf32>) -> tensor< : (tensor<1x0x?x9xf32>) -> tensor<1x7x7x9xf32> return %0 : tensor<1x7x7x9xf32> } + +// ----- + +func.func @test_variable_duplicates(%arg0: tensor<2x4x8xi32>) -> () { + tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi32> + // expected-error@+1 {{'tosa.variable' op name has already been declared}} + tosa.variable @stored_var : tensor<1x4x8xi32> + return +} + +// ----- + +func.func @test_variable_read_type(%arg0: tensor<2x4x8xi32>) -> () { + tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi32> + // expected-error@+1 {{'tosa.variable.read' op result type does not equal variable type}} + %0 = tosa.variable.read @stored_var : tensor<2x4x8xi16> + return +} + +// ----- + +func.func @test_variable_read_shape(%arg0: tensor<2x4x8xi32>) -> () { + tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi32> + // expected-error@+1 {{'tosa.variable.read' op result type does not equal variable type}} + %0 = tosa.variable.read @stored_var : tensor<1x4x8xi32> + return +} + +// ----- + +func.func @test_variable_write_type(%arg0: tensor<2x4x8xi16>) -> () { + tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi32> + // expected-error@+1 {{'tosa.variable.write' op operand type does not equal variable type}} + tosa.variable.write @stored_var, %arg0 : tensor<2x4x8xi16> + return +} + +// ----- + +func.func @test_variable_write_shape(%arg0: tensor<1x4x8xi32>) -> () { + tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi32> + // expected-error@+1 {{'tosa.variable.write' op operand type does not equal variable type}} + tosa.variable.write @stored_var, %arg0 : tensor<1x4x8xi32> + return +} diff --git a/mlir/test/Dialect/Tosa/variables.mlir b/mlir/test/Dialect/Tosa/variables.mlir new file mode 100644 index 0000000000000..9a26aa0bc8bf4 --- /dev/null +++ b/mlir/test/Dialect/Tosa/variables.mlir @@ -0,0 +1,33 @@ +// RUN: mlir-opt %s | mlir-opt | FileCheck %s +// RUN: mlir-opt %s --mlir-print-op-generic | mlir-opt | FileCheck %s + + +// ----- +// CHECK-LABEL: @test_variable_scalar( +// CHECK-SAME: %[[ADD_VAL:.*]]: tensor) { +func.func @test_variable_scalar(%arg0: tensor) -> () { + // CHECK: tosa.variable @stored_var = dense<3.140000e+00> : tensor + tosa.variable @stored_var = dense<3.14> : tensor + // CHECK: %[[STORED_VAL:.*]] = tosa.variable.read @stored_var : tensor + %0 = tosa.variable.read @stored_var : tensor + // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor, tensor) -> tensor + %1 = "tosa.add"(%arg0, %0) : (tensor, tensor) -> tensor + // CHECK: tosa.variable.write @stored_var, %[[RESULT_ADD]] : tensor + tosa.variable.write @stored_var, %1 : tensor + return +} + +// ----- +// CHECK-LABEL: @test_variable_tensor( +// CHECK-SAME: %[[ADD_VAL:.*]]: tensor<2x4x8xi32>) { +func.func @test_variable_tensor(%arg0: tensor<2x4x8xi32>) -> () { + // CHECK: tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi32> + tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi32> + // CHECK: %[[STORED_VAL:.*]] = tosa.variable.read @stored_var : tensor<2x4x8xi32> + %0 = tosa.variable.read @stored_var : tensor<2x4x8xi32> + // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32> + %1 = "tosa.add"(%arg0, %0) : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32> + // CHECK: tosa.variable.write @stored_var, %[[RESULT_ADD]] : tensor<2x4x8xi32> + tosa.variable.write @stored_var, %1 : tensor<2x4x8xi32> + return +} From e35cb730cfd30912a2ffbcac9db1014a80a6c4c8 Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Mon, 16 Oct 2023 16:10:44 -0700 Subject: [PATCH 280/720] [flang][runtime] Fix edge cases with ROUND=UP/DOWN (#67508) When an unrepresentable nonzero real input value with a very small exponent is currently being read in as zero, don't neglect ROUND=UP/DOWN; return the least nonzero subnormal value instead when appropriate. --- flang/lib/Decimal/binary-to-decimal.cpp | 3 ++- flang/lib/Decimal/decimal-to-binary.cpp | 30 +++++++++++++++++-------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/flang/lib/Decimal/binary-to-decimal.cpp b/flang/lib/Decimal/binary-to-decimal.cpp index 7b31d02b292e4..55fc548a6979b 100644 --- a/flang/lib/Decimal/binary-to-decimal.cpp +++ b/flang/lib/Decimal/binary-to-decimal.cpp @@ -373,7 +373,8 @@ STREAM &BigRadixFloatingPointNumber::Dump(STREAM &o) const { if (isNegative_) { o << '-'; } - o << "10**(" << exponent_ << ") * ...\n"; + o << "10**(" << exponent_ << ") * ... (rounding " + << static_cast(rounding_) << ")\n"; for (int j{digits_}; --j >= 0;) { std::string str{std::to_string(digit_[j])}; o << std::string(20 - str.size(), ' ') << str << " [" << j << ']'; diff --git a/flang/lib/Decimal/decimal-to-binary.cpp b/flang/lib/Decimal/decimal-to-binary.cpp index c8c7b23329e00..d5b66b9fb9338 100644 --- a/flang/lib/Decimal/decimal-to-binary.cpp +++ b/flang/lib/Decimal/decimal-to-binary.cpp @@ -257,13 +257,20 @@ ConversionToBinaryResult IntermediateFloat::ToBinary( flags |= Inexact; } if (fraction == 0 && guard <= oneHalf) { - return {Binary{}, static_cast(flags)}; - } - // The value is nonzero; normalize it. - while (fraction < topBit && expo > 1) { - --expo; - fraction = fraction * 2 + (guard >> (guardBits - 2)); - guard = (((guard >> (guardBits - 2)) & 1) << (guardBits - 1)) | (guard & 1); + if ((!isNegative && rounding == RoundUp) || + (isNegative && rounding == RoundDown)) { + // round to minimum nonzero value + } else { + return {Binary{}, static_cast(flags)}; + } + } else { + // The value is nonzero; normalize it. + while (fraction < topBit && expo > 1) { + --expo; + fraction = fraction * 2 + (guard >> (guardBits - 2)); + guard = + (((guard >> (guardBits - 2)) & 1) << (guardBits - 1)) | (guard & 1); + } } // Apply rounding bool incr{false}; @@ -330,8 +337,13 @@ BigRadixFloatingPointNumber::ConvertToBinary() { exponent_ += digits_ * log10Radix; // Sanity checks for ridiculous exponents static constexpr int crazy{2 * Real::decimalRange + log10Radix}; - if (exponent_ < -crazy) { // underflow to +/-0. - return {Real{SignBit()}, Inexact}; + if (exponent_ < -crazy) { + if ((!isNegative_ && rounding_ == RoundUp) || + (isNegative_ && rounding_ == RoundDown)) { + return {Real{Raw{1} | SignBit()}}; // return least nonzero value + } else { // underflow to +/-0. + return {Real{SignBit()}, Inexact}; + } } else if (exponent_ > crazy) { // overflow to +/-Inf. return {Real{Infinity()}, Overflow}; } From 910a4bf5b70ae14e7262677a8880ee98056e44e1 Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov <6532716+alexander-shaposhnikov@users.noreply.github.com> Date: Mon, 16 Oct 2023 16:12:33 -0700 Subject: [PATCH 281/720] [compiler-rt] Implement __extendxftf2 and __trunctfxf2 for x86_64 (#66918) This patch implements __extendxftf2 (long double -> f128) and __trunctfxf2 (f128 -> long double) on x86_64. This is a preparation to unblock https://reviews.llvm.org/D53608, We intentionally do not modify compiler-rt/lib/builtins/fp_lib.h in this PR (in particular, to limit the scope and avoid exposing other functions on X86_64 in this PR). Instead, TODOs were added to use fp_lib.h once it is available. Test plan: 1. ninja check-compiler-rt (verified on X86_64 and on Aarch64) In particular, new tests (extendxftf2_test.c and trunctfxf2_test.c) were added. 2. compared the results of conversions with what other compilers (gcc) produce. --- compiler-rt/lib/builtins/CMakeLists.txt | 2 + compiler-rt/lib/builtins/extendxftf2.c | 23 ++++ compiler-rt/lib/builtins/fp_extend.h | 92 +++++++++++-- compiler-rt/lib/builtins/fp_extend_impl.inc | 83 ++++++------ compiler-rt/lib/builtins/fp_trunc.h | 83 ++++++++++-- compiler-rt/lib/builtins/fp_trunc_impl.inc | 122 ++++++++++-------- compiler-rt/lib/builtins/trunctfxf2.c | 24 ++++ compiler-rt/test/builtins/Unit/addtf3_test.c | 2 +- compiler-rt/test/builtins/Unit/divtf3_test.c | 2 +- .../test/builtins/Unit/extenddftf2_test.c | 2 +- .../test/builtins/Unit/extendhftf2_test.c | 2 +- .../test/builtins/Unit/extendsftf2_test.c | 2 +- .../test/builtins/Unit/extendxftf2_test.c | 74 +++++++++++ .../test/builtins/Unit/floatditf_test.c | 2 +- .../test/builtins/Unit/floatsitf_test.c | 2 +- .../test/builtins/Unit/floatunditf_test.c | 2 +- .../test/builtins/Unit/floatunsitf_test.c | 2 +- compiler-rt/test/builtins/Unit/fp_test.h | 93 +++++++++---- compiler-rt/test/builtins/Unit/multf3_test.c | 2 +- compiler-rt/test/builtins/Unit/subtf3_test.c | 2 +- .../test/builtins/Unit/trunctfxf2_test.c | 97 ++++++++++++++ 21 files changed, 564 insertions(+), 151 deletions(-) create mode 100644 compiler-rt/lib/builtins/extendxftf2.c create mode 100644 compiler-rt/lib/builtins/trunctfxf2.c create mode 100644 compiler-rt/test/builtins/Unit/extendxftf2_test.c create mode 100644 compiler-rt/test/builtins/Unit/trunctfxf2_test.c diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 753d08273ea54..4f210a5c0fef9 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -280,6 +280,7 @@ endif () # long double is not 80 bits on Android or MSVC. set(x86_80_BIT_SOURCES divxc3.c + extendxftf2.c fixxfdi.c fixxfti.c fixunsxfdi.c @@ -291,6 +292,7 @@ set(x86_80_BIT_SOURCES floatuntixf.c mulxc3.c powixf2.c + trunctfxf2.c ) if (NOT MSVC) diff --git a/compiler-rt/lib/builtins/extendxftf2.c b/compiler-rt/lib/builtins/extendxftf2.c new file mode 100644 index 0000000000000..20911fe7cf2a0 --- /dev/null +++ b/compiler-rt/lib/builtins/extendxftf2.c @@ -0,0 +1,23 @@ +//===-- lib/extendxftf2.c - long double -> quad conversion --------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Assumption: long double is a IEEE 80 bit floating point type padded to 128 +// bits. + +// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64. +#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__) && \ + (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)) +#define SRC_80 +#define DST_QUAD +#include "fp_extend_impl.inc" + +COMPILER_RT_ABI __float128 __extendxftf2(long double a) { + return __extendXfYf2__(a); +} + +#endif diff --git a/compiler-rt/lib/builtins/fp_extend.h b/compiler-rt/lib/builtins/fp_extend.h index eee4722bf90e6..86b32be12d55f 100644 --- a/compiler-rt/lib/builtins/fp_extend.h +++ b/compiler-rt/lib/builtins/fp_extend.h @@ -20,15 +20,22 @@ typedef float src_t; typedef uint32_t src_rep_t; #define SRC_REP_C UINT32_C -static const int srcSigBits = 23; +static const int srcBits = sizeof(src_t) * CHAR_BIT; +static const int srcSigFracBits = 23; +// -1 accounts for the sign bit. +static const int srcExpBits = srcBits - srcSigFracBits - 1; #define src_rep_t_clz clzsi #elif defined SRC_DOUBLE typedef double src_t; typedef uint64_t src_rep_t; #define SRC_REP_C UINT64_C -static const int srcSigBits = 52; -static __inline int src_rep_t_clz(src_rep_t a) { +static const int srcBits = sizeof(src_t) * CHAR_BIT; +static const int srcSigFracBits = 52; +// -1 accounts for the sign bit. +static const int srcExpBits = srcBits - srcSigFracBits - 1; + +static inline int src_rep_t_clz_impl(src_rep_t a) { #if defined __LP64__ return __builtin_clzl(a); #else @@ -38,6 +45,18 @@ static __inline int src_rep_t_clz(src_rep_t a) { return 32 + clzsi(a & REP_C(0xffffffff)); #endif } +#define src_rep_t_clz src_rep_t_clz_impl + +#elif defined SRC_80 +typedef long double src_t; +typedef __uint128_t src_rep_t; +#define SRC_REP_C (__uint128_t) +// sign bit, exponent and significand occupy the lower 80 bits. +static const int srcBits = 80; +static const int srcSigFracBits = 63; +// -1 accounts for the sign bit. +// -1 accounts for the explicitly stored integer bit. +static const int srcExpBits = srcBits - srcSigFracBits - 1 - 1; #elif defined SRC_HALF #ifdef COMPILER_RT_HAS_FLOAT16 @@ -47,7 +66,11 @@ typedef uint16_t src_t; #endif typedef uint16_t src_rep_t; #define SRC_REP_C UINT16_C -static const int srcSigBits = 10; +static const int srcBits = sizeof(src_t) * CHAR_BIT; +static const int srcSigFracBits = 10; +// -1 accounts for the sign bit. +static const int srcExpBits = srcBits - srcSigFracBits - 1; + #define src_rep_t_clz __builtin_clz #else @@ -58,28 +81,75 @@ static const int srcSigBits = 10; typedef float dst_t; typedef uint32_t dst_rep_t; #define DST_REP_C UINT32_C -static const int dstSigBits = 23; +static const int dstBits = sizeof(dst_t) * CHAR_BIT; +static const int dstSigFracBits = 23; +// -1 accounts for the sign bit. +static const int dstExpBits = dstBits - dstSigFracBits - 1; #elif defined DST_DOUBLE typedef double dst_t; typedef uint64_t dst_rep_t; #define DST_REP_C UINT64_C -static const int dstSigBits = 52; +static const int dstBits = sizeof(dst_t) * CHAR_BIT; +static const int dstSigFracBits = 52; +// -1 accounts for the sign bit. +static const int dstExpBits = dstBits - dstSigFracBits - 1; #elif defined DST_QUAD +// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64. +#if __LDBL_MANT_DIG__ == 113 typedef long double dst_t; +#elif defined(__x86_64__) && \ + (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)) +typedef __float128 dst_t; +#endif typedef __uint128_t dst_rep_t; #define DST_REP_C (__uint128_t) -static const int dstSigBits = 112; +static const int dstBits = sizeof(dst_t) * CHAR_BIT; +static const int dstSigFracBits = 112; +// -1 accounts for the sign bit. +static const int dstExpBits = dstBits - dstSigFracBits - 1; #else #error Destination should be single, double, or quad precision! #endif // end destination precision -// End of specialization parameters. Two helper routines for conversion to and -// from the representation of floating-point data as integer values follow. +// End of specialization parameters. + +// TODO: These helper routines should be placed into fp_lib.h +// Currently they depend on macros/constants defined above. + +static inline src_rep_t extract_sign_from_src(src_rep_t x) { + const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1); + return (x & srcSignMask) >> (srcBits - 1); +} + +static inline src_rep_t extract_exp_from_src(src_rep_t x) { + const int srcSigBits = srcBits - 1 - srcExpBits; + const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits; + return (x & srcExpMask) >> srcSigBits; +} + +static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) { + const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1; + return x & srcSigFracMask; +} + +#ifdef src_rep_t_clz +static inline int clz_in_sig_frac(src_rep_t sigFrac) { + const int skip = (sizeof(dst_t) * CHAR_BIT - srcBits) + 1 + srcExpBits; + return src_rep_t_clz(sigFrac) - skip; +} +#endif + +static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) { + return (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac; +} + +// Two helper routines for conversion to and from the representation of +// floating-point data as integer values follow. -static __inline src_rep_t srcToRep(src_t x) { +static inline src_rep_t srcToRep(src_t x) { const union { src_t f; src_rep_t i; @@ -87,7 +157,7 @@ static __inline src_rep_t srcToRep(src_t x) { return rep.i; } -static __inline dst_t dstFromRep(dst_rep_t x) { +static inline dst_t dstFromRep(dst_rep_t x) { const union { dst_t f; dst_rep_t i; diff --git a/compiler-rt/lib/builtins/fp_extend_impl.inc b/compiler-rt/lib/builtins/fp_extend_impl.inc index d1c9c02a00c53..e16b55d150d2e 100644 --- a/compiler-rt/lib/builtins/fp_extend_impl.inc +++ b/compiler-rt/lib/builtins/fp_extend_impl.inc @@ -37,71 +37,72 @@ #include "fp_extend.h" +// The source type may use a usual IEEE-754 interchange format or Intel 80-bit +// format. In particular, for the source type srcSigFracBits may be not equal to +// srcSigBits. The destination type is assumed to be one of IEEE-754 standard +// types. static __inline dst_t __extendXfYf2__(src_t a) { // Various constants whose values follow from the type parameters. // Any reasonable optimizer will fold and propagate all of these. - const int srcBits = sizeof(src_t) * CHAR_BIT; - const int srcExpBits = srcBits - srcSigBits - 1; const int srcInfExp = (1 << srcExpBits) - 1; const int srcExpBias = srcInfExp >> 1; - const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; - const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; - const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); - const src_rep_t srcAbsMask = srcSignMask - 1; - const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); - const src_rep_t srcNaNCode = srcQNaN - 1; - - const int dstBits = sizeof(dst_t) * CHAR_BIT; - const int dstExpBits = dstBits - dstSigBits - 1; const int dstInfExp = (1 << dstExpBits) - 1; const int dstExpBias = dstInfExp >> 1; - const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits; - // Break a into a sign and representation of the absolute value. const src_rep_t aRep = srcToRep(a); - const src_rep_t aAbs = aRep & srcAbsMask; - const src_rep_t sign = aRep & srcSignMask; - dst_rep_t absResult; + const src_rep_t srcSign = extract_sign_from_src(aRep); + const src_rep_t srcExp = extract_exp_from_src(aRep); + const src_rep_t srcSigFrac = extract_sig_frac_from_src(aRep); + + dst_rep_t dstSign = srcSign; + dst_rep_t dstExp; + dst_rep_t dstSigFrac; - // If sizeof(src_rep_t) < sizeof(int), the subtraction result is promoted - // to (signed) int. To avoid that, explicitly cast to src_rep_t. - if ((src_rep_t)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) { + if (srcExp >= 1 && srcExp < srcInfExp) { // a is a normal number. - // Extend to the destination type by shifting the significand and - // exponent into the proper position and rebiasing the exponent. - absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits); - absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits; + dstExp = (dst_rep_t)srcExp + (dst_rep_t)(dstExpBias - srcExpBias); + dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits); } - else if (aAbs >= srcInfinity) { + else if (srcExp == srcInfExp) { // a is NaN or infinity. - // Conjure the result by beginning with infinity, then setting the qNaN - // bit (if needed) and right-aligning the rest of the trailing NaN - // payload field. - absResult = (dst_rep_t)dstInfExp << dstSigBits; - absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits); - absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits); + dstExp = dstInfExp; + dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits); } - else if (aAbs) { + else if (srcSigFrac) { // a is denormal. - // renormalize the significand and clear the leading bit, then insert - // the correct adjusted exponent in the destination type. - const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal); - absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale); - absResult ^= dstMinNormal; - const int resultExponent = dstExpBias - srcExpBias - scale + 1; - absResult |= (dst_rep_t)resultExponent << dstSigBits; + if (srcExpBits == dstExpBits) { + // The exponent fields are identical and this is a denormal number, so all + // the non-significand bits are zero. In particular, this branch is always + // taken when we extend a denormal F80 to F128. + dstExp = 0; + dstSigFrac = ((dst_rep_t)srcSigFrac) << (dstSigFracBits - srcSigFracBits); + } else { +#ifndef src_rep_t_clz + // If src_rep_t_clz is not defined this branch must be unreachable. + __builtin_unreachable(); +#else + // Renormalize the significand and clear the leading bit. + // For F80 -> F128 this codepath is unused. + const int scale = clz_in_sig_frac(srcSigFrac) + 1; + dstExp = dstExpBias - srcExpBias - scale + 1; + dstSigFrac = (dst_rep_t)srcSigFrac + << (dstSigFracBits - srcSigFracBits + scale); + const dst_rep_t dstMinNormal = DST_REP_C(1) << (dstBits - 1 - dstExpBits); + dstSigFrac ^= dstMinNormal; +#endif + } } else { // a is zero. - absResult = 0; + dstExp = 0; + dstSigFrac = 0; } - // Apply the signbit to the absolute value. - const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits); + const dst_rep_t result = construct_dst_rep(dstSign, dstExp, dstSigFrac); return dstFromRep(result); } diff --git a/compiler-rt/lib/builtins/fp_trunc.h b/compiler-rt/lib/builtins/fp_trunc.h index 91f614528ab3f..ea13dc2efae54 100644 --- a/compiler-rt/lib/builtins/fp_trunc.h +++ b/compiler-rt/lib/builtins/fp_trunc.h @@ -19,19 +19,34 @@ typedef float src_t; typedef uint32_t src_rep_t; #define SRC_REP_C UINT32_C -static const int srcSigBits = 23; +static const int srcBits = sizeof(src_t) * CHAR_BIT; +static const int srcSigFracBits = 23; +// -1 accounts for the sign bit. +static const int srcExpBits = srcBits - srcSigFracBits - 1; #elif defined SRC_DOUBLE typedef double src_t; typedef uint64_t src_rep_t; #define SRC_REP_C UINT64_C -static const int srcSigBits = 52; +static const int srcBits = sizeof(src_t) * CHAR_BIT; +static const int srcSigFracBits = 52; +// -1 accounts for the sign bit. +static const int srcExpBits = srcBits - srcSigFracBits - 1; #elif defined SRC_QUAD +// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64. +#if __LDBL_MANT_DIG__ == 113 typedef long double src_t; +#elif defined(__x86_64__) && \ + (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)) +typedef __float128 src_t; +#endif typedef __uint128_t src_rep_t; #define SRC_REP_C (__uint128_t) -static const int srcSigBits = 112; +static const int srcBits = sizeof(src_t) * CHAR_BIT; +static const int srcSigFracBits = 112; +// -1 accounts for the sign bit. +static const int srcExpBits = srcBits - srcSigFracBits - 1; #else #error Source should be double precision or quad precision! @@ -41,13 +56,29 @@ static const int srcSigBits = 112; typedef double dst_t; typedef uint64_t dst_rep_t; #define DST_REP_C UINT64_C -static const int dstSigBits = 52; +static const int dstBits = sizeof(dst_t) * CHAR_BIT; +static const int dstSigFracBits = 52; +// -1 accounts for the sign bit. +static const int dstExpBits = dstBits - dstSigFracBits - 1; + +#elif defined DST_80 +typedef long double dst_t; +typedef __uint128_t dst_rep_t; +#define DST_REP_C (__uint128_t) +static const int dstBits = 80; +static const int dstSigFracBits = 63; +// -1 accounts for the sign bit. +// -1 accounts for the explicitly stored integer bit. +static const int dstExpBits = dstBits - dstSigFracBits - 1 - 1; #elif defined DST_SINGLE typedef float dst_t; typedef uint32_t dst_rep_t; #define DST_REP_C UINT32_C -static const int dstSigBits = 23; +static const int dstBits = sizeof(dst_t) * CHAR_BIT; +static const int dstSigFracBits = 23; +// -1 accounts for the sign bit. +static const int dstExpBits = dstBits - dstSigFracBits - 1; #elif defined DST_HALF #ifdef COMPILER_RT_HAS_FLOAT16 @@ -57,22 +88,56 @@ typedef uint16_t dst_t; #endif typedef uint16_t dst_rep_t; #define DST_REP_C UINT16_C -static const int dstSigBits = 10; +static const int dstBits = sizeof(dst_t) * CHAR_BIT; +static const int dstSigFracBits = 10; +// -1 accounts for the sign bit. +static const int dstExpBits = dstBits - dstSigFracBits - 1; #elif defined DST_BFLOAT typedef __bf16 dst_t; typedef uint16_t dst_rep_t; #define DST_REP_C UINT16_C -static const int dstSigBits = 7; +static const int dstBits = sizeof(dst_t) * CHAR_BIT; +static const int dstSigFracBits = 7; +// -1 accounts for the sign bit. +static const int dstExpBits = dstBits - dstSigFracBits - 1; #else #error Destination should be single precision or double precision! #endif // end destination precision +// TODO: These helper routines should be placed into fp_lib.h +// Currently they depend on macros/constants defined above. + +static inline src_rep_t extract_sign_from_src(src_rep_t x) { + const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1); + return (x & srcSignMask) >> (srcBits - 1); +} + +static inline src_rep_t extract_exp_from_src(src_rep_t x) { + const int srcSigBits = srcBits - 1 - srcExpBits; + const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits; + return (x & srcExpMask) >> srcSigBits; +} + +static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) { + const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1; + return x & srcSigFracMask; +} + +static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) { + dst_rep_t result = (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac; + // Set the explicit integer bit in F80 if present. + if (dstBits == 80 && exp) { + result |= (DST_REP_C(1) << dstSigFracBits); + } + return result; +} + // End of specialization parameters. Two helper routines for conversion to and // from the representation of floating-point data as integer values follow. -static __inline src_rep_t srcToRep(src_t x) { +static inline src_rep_t srcToRep(src_t x) { const union { src_t f; src_rep_t i; @@ -80,7 +145,7 @@ static __inline src_rep_t srcToRep(src_t x) { return rep.i; } -static __inline dst_t dstFromRep(dst_rep_t x) { +static inline dst_t dstFromRep(dst_rep_t x) { const union { dst_t f; dst_rep_t i; diff --git a/compiler-rt/lib/builtins/fp_trunc_impl.inc b/compiler-rt/lib/builtins/fp_trunc_impl.inc index e235f45965a72..f68492495697f 100644 --- a/compiler-rt/lib/builtins/fp_trunc_impl.inc +++ b/compiler-rt/lib/builtins/fp_trunc_impl.inc @@ -38,102 +38,118 @@ #include "fp_trunc.h" +// The destination type may use a usual IEEE-754 interchange format or Intel +// 80-bit format. In particular, for the destination type dstSigFracBits may be +// not equal to dstSigBits. The source type is assumed to be one of IEEE-754 +// standard types. static __inline dst_t __truncXfYf2__(src_t a) { // Various constants whose values follow from the type parameters. // Any reasonable optimizer will fold and propagate all of these. - const int srcBits = sizeof(src_t) * CHAR_BIT; - const int srcExpBits = srcBits - srcSigBits - 1; const int srcInfExp = (1 << srcExpBits) - 1; const int srcExpBias = srcInfExp >> 1; - const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; - const src_rep_t srcSignificandMask = srcMinNormal - 1; - const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; - const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); - const src_rep_t srcAbsMask = srcSignMask - 1; - const src_rep_t roundMask = (SRC_REP_C(1) << (srcSigBits - dstSigBits)) - 1; - const src_rep_t halfway = SRC_REP_C(1) << (srcSigBits - dstSigBits - 1); - const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); + const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigFracBits; + const src_rep_t roundMask = + (SRC_REP_C(1) << (srcSigFracBits - dstSigFracBits)) - 1; + const src_rep_t halfway = SRC_REP_C(1) + << (srcSigFracBits - dstSigFracBits - 1); + const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigFracBits - 1); const src_rep_t srcNaNCode = srcQNaN - 1; - const int dstBits = sizeof(dst_t) * CHAR_BIT; - const int dstExpBits = dstBits - dstSigBits - 1; const int dstInfExp = (1 << dstExpBits) - 1; const int dstExpBias = dstInfExp >> 1; - - const int underflowExponent = srcExpBias + 1 - dstExpBias; const int overflowExponent = srcExpBias + dstInfExp - dstExpBias; - const src_rep_t underflow = (src_rep_t)underflowExponent << srcSigBits; - const src_rep_t overflow = (src_rep_t)overflowExponent << srcSigBits; - const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigBits - 1); + const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigFracBits - 1); const dst_rep_t dstNaNCode = dstQNaN - 1; - // Break a into a sign and representation of the absolute value. const src_rep_t aRep = srcToRep(a); - const src_rep_t aAbs = aRep & srcAbsMask; - const src_rep_t sign = aRep & srcSignMask; - dst_rep_t absResult; + const src_rep_t srcSign = extract_sign_from_src(aRep); + const src_rep_t srcExp = extract_exp_from_src(aRep); + const src_rep_t srcSigFrac = extract_sig_frac_from_src(aRep); + + dst_rep_t dstSign = srcSign; + dst_rep_t dstExp; + dst_rep_t dstSigFrac; - const int tailBits = srcBits - dstBits; - if (srcExpBits == dstExpBits && ((aRep >> tailBits) << tailBits) == aRep) { - // Same size exponents and a's significand tail is 0. Remove tail. - dst_rep_t result = aRep >> tailBits; - return dstFromRep(result); + // Same size exponents and a's significand tail is 0. + // The significand can be truncated and the exponent can be copied over. + const int sigFracTailBits = srcSigFracBits - dstSigFracBits; + if (srcExpBits == dstExpBits && + ((aRep >> sigFracTailBits) << sigFracTailBits) == aRep) { + dstExp = srcExp; + dstSigFrac = (dst_rep_t)(srcSigFrac >> sigFracTailBits); + return dstFromRep(construct_dst_rep(dstSign, dstExp, dstSigFrac)); } - if (aAbs - underflow < aAbs - overflow) { + const int dstExpCandidate = ((int)srcExp - srcExpBias) + dstExpBias; + if (dstExpCandidate >= 1 && dstExpCandidate < dstInfExp) { // The exponent of a is within the range of normal numbers in the - // destination format. We can convert by simply right-shifting with + // destination format. We can convert by simply right-shifting with // rounding and adjusting the exponent. - absResult = aAbs >> (srcSigBits - dstSigBits); - absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits; + dstExp = dstExpCandidate; + dstSigFrac = (dst_rep_t)(srcSigFrac >> sigFracTailBits); - const src_rep_t roundBits = aAbs & roundMask; + const src_rep_t roundBits = srcSigFrac & roundMask; // Round to nearest. if (roundBits > halfway) - absResult++; + dstSigFrac++; // Tie to even. else if (roundBits == halfway) - absResult += absResult & 1; - } else if (aAbs > srcInfinity) { + dstSigFrac += dstSigFrac & 1; + + // Rounding has changed the exponent. + if (dstSigFrac >= (DST_REP_C(1) << dstSigFracBits)) { + dstExp += 1; + dstSigFrac ^= (DST_REP_C(1) << dstSigFracBits); + } + } else if (srcExp == srcInfExp && srcSigFrac) { // a is NaN. // Conjure the result by beginning with infinity, setting the qNaN // bit and inserting the (truncated) trailing NaN field. - absResult = (dst_rep_t)dstInfExp << dstSigBits; - absResult |= dstQNaN; - absResult |= - ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode; - } else if (aAbs >= overflow) { - // a overflows to infinity. - absResult = (dst_rep_t)dstInfExp << dstSigBits; + dstExp = dstInfExp; + dstSigFrac = dstQNaN; + dstSigFrac |= ((srcSigFrac & srcNaNCode) >> sigFracTailBits) & dstNaNCode; + } else if ((int)srcExp >= overflowExponent) { + dstExp = dstInfExp; + dstSigFrac = 0; } else { // a underflows on conversion to the destination type or is an exact // zero. The result may be a denormal or zero. Extract the exponent // to get the shift amount for the denormalization. - const int aExp = aAbs >> srcSigBits; - const int shift = srcExpBias - dstExpBias - aExp + 1; + src_rep_t significand = srcSigFrac; + int shift = srcExpBias - dstExpBias - srcExp; - const src_rep_t significand = (aRep & srcSignificandMask) | srcMinNormal; + if (srcExp) { + // Set the implicit integer bit if the source is a normal number. + significand |= srcMinNormal; + shift += 1; + } // Right shift by the denormalization amount with sticky. - if (shift > srcSigBits) { - absResult = 0; + if (shift > srcSigFracBits) { + dstExp = 0; + dstSigFrac = 0; } else { - const bool sticky = (significand << (srcBits - shift)) != 0; + dstExp = 0; + const bool sticky = shift && ((significand << (srcBits - shift)) != 0); src_rep_t denormalizedSignificand = significand >> shift | sticky; - absResult = denormalizedSignificand >> (srcSigBits - dstSigBits); + dstSigFrac = denormalizedSignificand >> sigFracTailBits; const src_rep_t roundBits = denormalizedSignificand & roundMask; // Round to nearest if (roundBits > halfway) - absResult++; + dstSigFrac++; // Ties to even else if (roundBits == halfway) - absResult += absResult & 1; + dstSigFrac += dstSigFrac & 1; + + // Rounding has changed the exponent. + if (dstSigFrac >= (DST_REP_C(1) << dstSigFracBits)) { + dstExp += 1; + dstSigFrac ^= (DST_REP_C(1) << dstSigFracBits); + } } } - // Apply the signbit to the absolute value. - const dst_rep_t result = absResult | sign >> (srcBits - dstBits); - return dstFromRep(result); + return dstFromRep(construct_dst_rep(dstSign, dstExp, dstSigFrac)); } diff --git a/compiler-rt/lib/builtins/trunctfxf2.c b/compiler-rt/lib/builtins/trunctfxf2.c new file mode 100644 index 0000000000000..4a22a602b3817 --- /dev/null +++ b/compiler-rt/lib/builtins/trunctfxf2.c @@ -0,0 +1,24 @@ +//===-- lib/trunctfsf2.c - long double -> quad conversion ---------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Assumption: long double is a IEEE 80 bit floating point type padded to 128 +// bits. + +// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64. +#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__) && \ + (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)) + +#define SRC_QUAD +#define DST_80 +#include "fp_trunc_impl.inc" + +COMPILER_RT_ABI long double __trunctfxf2(__float128 a) { + return __truncXfYf2__(a); +} + +#endif diff --git a/compiler-rt/test/builtins/Unit/addtf3_test.c b/compiler-rt/test/builtins/Unit/addtf3_test.c index fe2e2c80f655b..e6986c236a64f 100644 --- a/compiler-rt/test/builtins/Unit/addtf3_test.c +++ b/compiler-rt/test/builtins/Unit/addtf3_test.c @@ -16,7 +16,7 @@ int test__addtf3(long double a, long double b, uint64_t expectedHi, uint64_t expectedLo) { long double x = __addtf3(a, b); - int ret = compareResultLD(x, expectedHi, expectedLo); + int ret = compareResultF128(x, expectedHi, expectedLo); if (ret){ printf("error in test__addtf3(%.20Lf, %.20Lf) = %.20Lf, " diff --git a/compiler-rt/test/builtins/Unit/divtf3_test.c b/compiler-rt/test/builtins/Unit/divtf3_test.c index 927d0b826f8f5..da6465636e923 100644 --- a/compiler-rt/test/builtins/Unit/divtf3_test.c +++ b/compiler-rt/test/builtins/Unit/divtf3_test.c @@ -15,7 +15,7 @@ int test__divtf3(long double a, long double b, uint64_t expectedHi, uint64_t expectedLo) { long double x = __divtf3(a, b); - int ret = compareResultLD(x, expectedHi, expectedLo); + int ret = compareResultF128(x, expectedHi, expectedLo); if (ret){ printf("error in test__divtf3(%.20Le, %.20Le) = %.20Le, " diff --git a/compiler-rt/test/builtins/Unit/extenddftf2_test.c b/compiler-rt/test/builtins/Unit/extenddftf2_test.c index 04a346887661b..fcc030ca92202 100644 --- a/compiler-rt/test/builtins/Unit/extenddftf2_test.c +++ b/compiler-rt/test/builtins/Unit/extenddftf2_test.c @@ -13,7 +13,7 @@ COMPILER_RT_ABI long double __extenddftf2(double a); int test__extenddftf2(double a, uint64_t expectedHi, uint64_t expectedLo) { long double x = __extenddftf2(a); - int ret = compareResultLD(x, expectedHi, expectedLo); + int ret = compareResultF128(x, expectedHi, expectedLo); if (ret){ printf("error in test__extenddftf2(%f) = %.20Lf, " diff --git a/compiler-rt/test/builtins/Unit/extendhftf2_test.c b/compiler-rt/test/builtins/Unit/extendhftf2_test.c index 7d3ea3049e8a1..5de17379093af 100644 --- a/compiler-rt/test/builtins/Unit/extendhftf2_test.c +++ b/compiler-rt/test/builtins/Unit/extendhftf2_test.c @@ -12,7 +12,7 @@ COMPILER_RT_ABI long double __extendhftf2(TYPE_FP16 a); int test__extendhftf2(TYPE_FP16 a, uint64_t expectedHi, uint64_t expectedLo) { long double x = __extendhftf2(a); - int ret = compareResultLD(x, expectedHi, expectedLo); + int ret = compareResultF128(x, expectedHi, expectedLo); if (ret) { printf("error in test__extendhftf2(%#.4x) = %.20Lf, " diff --git a/compiler-rt/test/builtins/Unit/extendsftf2_test.c b/compiler-rt/test/builtins/Unit/extendsftf2_test.c index 19dd5b02c07bd..6ce9bd81a3dd9 100644 --- a/compiler-rt/test/builtins/Unit/extendsftf2_test.c +++ b/compiler-rt/test/builtins/Unit/extendsftf2_test.c @@ -13,7 +13,7 @@ COMPILER_RT_ABI long double __extendsftf2(float a); int test__extendsftf2(float a, uint64_t expectedHi, uint64_t expectedLo) { long double x = __extendsftf2(a); - int ret = compareResultLD(x, expectedHi, expectedLo); + int ret = compareResultF128(x, expectedHi, expectedLo); if (ret) { diff --git a/compiler-rt/test/builtins/Unit/extendxftf2_test.c b/compiler-rt/test/builtins/Unit/extendxftf2_test.c new file mode 100644 index 0000000000000..f5211875438c7 --- /dev/null +++ b/compiler-rt/test/builtins/Unit/extendxftf2_test.c @@ -0,0 +1,74 @@ +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_extendxftf2 + +#include "int_lib.h" +#include + +#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__) && \ + (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)) + +#include "fp_test.h" + +COMPILER_RT_ABI __float128 __extendxftf2(long double a); + +int test__extendxftf2(long double a, uint64_t expectedHi, uint64_t expectedLo) { + __float128 x = __extendxftf2(a); + int ret = compareResultF128(x, expectedHi, expectedLo); + + if (ret) { + printf("error in __extendxftf2(%.20Lf) = %.20Lf, " + "expected %.20Lf\n", + a, x, fromRep128(expectedHi, expectedLo)); + } + return ret; +} + +char assumption_1[sizeof(long double) * CHAR_BIT == 128] = {0}; + +#endif + +int main() { +#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__) && \ + (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)) + // qNaN + if (test__extendxftf2(makeQNaN80(), UINT64_C(0x7fff800000000000), + UINT64_C(0x0))) + return 1; + // NaN + if (test__extendxftf2(makeNaN80(UINT64_C(0x3fffffffffffffff)), + UINT64_C(0x7fff7fffffffffff), + UINT64_C(0xfffe000000000000))) + return 1; + // inf + if (test__extendxftf2(makeInf80(), UINT64_C(0x7fff000000000000), + UINT64_C(0x0))) + return 1; + // zero + if (test__extendxftf2(0.0, UINT64_C(0x0), UINT64_C(0x0))) + return 1; + if (test__extendxftf2(0x1.23456789abcdefp+5, UINT64_C(0x400423456789abcd), + UINT64_C(0xf000000000000000))) + return 1; + if (test__extendxftf2(0x1.edcba987654321fp-9, UINT64_C(0x3ff6edcba9876543), + UINT64_C(0x2000000000000000))) + return 1; + if (test__extendxftf2(0x1.23456789abcdefp+45, UINT64_C(0x402c23456789abcd), + UINT64_C(0xf000000000000000))) + return 1; + if (test__extendxftf2(0x1.edcba987654321fp-45, UINT64_C(0x3fd2edcba9876543), + UINT64_C(0x2000000000000000))) + return 1; + // denormal number + if (test__extendxftf2(1e-4932L, UINT64_C(0x00004c248f91e526), + UINT64_C(0xafe0000000000000))) + return 1; + // denormal number + if (test__extendxftf2(2e-4932L, UINT64_C(0x000098491f23ca4d), + UINT64_C(0x5fc0000000000000))) + return 1; +#else + printf("skipped\n"); + +#endif + return 0; +} diff --git a/compiler-rt/test/builtins/Unit/floatditf_test.c b/compiler-rt/test/builtins/Unit/floatditf_test.c index 4d5da32ec25d4..fe7a5fd86ae84 100644 --- a/compiler-rt/test/builtins/Unit/floatditf_test.c +++ b/compiler-rt/test/builtins/Unit/floatditf_test.c @@ -17,7 +17,7 @@ COMPILER_RT_ABI long double __floatditf(di_int a); int test__floatditf(di_int a, uint64_t expectedHi, uint64_t expectedLo) { long double x = __floatditf(a); - int ret = compareResultLD(x, expectedHi, expectedLo); + int ret = compareResultF128(x, expectedHi, expectedLo); if (ret) printf("error in __floatditf(%Ld) = %.20Lf, " diff --git a/compiler-rt/test/builtins/Unit/floatsitf_test.c b/compiler-rt/test/builtins/Unit/floatsitf_test.c index 751a4a9b9207a..b6571b9ba223d 100644 --- a/compiler-rt/test/builtins/Unit/floatsitf_test.c +++ b/compiler-rt/test/builtins/Unit/floatsitf_test.c @@ -13,7 +13,7 @@ COMPILER_RT_ABI long double __floatsitf(si_int a); int test__floatsitf(si_int a, uint64_t expectedHi, uint64_t expectedLo) { long double x = __floatsitf(a); - int ret = compareResultLD(x, expectedHi, expectedLo); + int ret = compareResultF128(x, expectedHi, expectedLo); if (ret) { diff --git a/compiler-rt/test/builtins/Unit/floatunditf_test.c b/compiler-rt/test/builtins/Unit/floatunditf_test.c index d44ae7934145a..8da78da976029 100644 --- a/compiler-rt/test/builtins/Unit/floatunditf_test.c +++ b/compiler-rt/test/builtins/Unit/floatunditf_test.c @@ -17,7 +17,7 @@ COMPILER_RT_ABI long double __floatunditf(du_int a); int test__floatunditf(du_int a, uint64_t expectedHi, uint64_t expectedLo) { long double x = __floatunditf(a); - int ret = compareResultLD(x, expectedHi, expectedLo); + int ret = compareResultF128(x, expectedHi, expectedLo); if (ret) printf("error in __floatunditf(%Lu) = %.20Lf, " diff --git a/compiler-rt/test/builtins/Unit/floatunsitf_test.c b/compiler-rt/test/builtins/Unit/floatunsitf_test.c index f0a6c63eb8379..b6b1ba0457399 100644 --- a/compiler-rt/test/builtins/Unit/floatunsitf_test.c +++ b/compiler-rt/test/builtins/Unit/floatunsitf_test.c @@ -13,7 +13,7 @@ COMPILER_RT_ABI long double __floatunsitf(su_int a); int test__floatunsitf(su_int a, uint64_t expectedHi, uint64_t expectedLo) { long double x = __floatunsitf(a); - int ret = compareResultLD(x, expectedHi, expectedLo); + int ret = compareResultF128(x, expectedHi, expectedLo); if (ret){ printf("error in test__floatunsitf(%u) = %.20Lf, " diff --git a/compiler-rt/test/builtins/Unit/fp_test.h b/compiler-rt/test/builtins/Unit/fp_test.h index e54dfc108e718..f095ae0701d77 100644 --- a/compiler-rt/test/builtins/Unit/fp_test.h +++ b/compiler-rt/test/builtins/Unit/fp_test.h @@ -9,6 +9,18 @@ #define TYPE_FP16 uint16_t #endif +// TODO: Switch to using fp_lib.h once QUAD_PRECISION is available on x86_64. +#if __LDBL_MANT_DIG__ == 113 || \ + ((__LDBL_MANT_DIG__ == 64) && defined(__x86_64__) && \ + (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))) +#if __LDBL_MANT_DIG__ == 113 +#define TYPE_FP128 long double +#else +#define TYPE_FP128 __float128 +#endif +#define TEST_COMPILER_RT_HAS_FLOAT128 +#endif + enum EXPECTED_RESULT { LESS_0, LESS_EQUAL_0, EQUAL_0, GREATER_0, GREATER_EQUAL_0, NEQUAL_0 }; @@ -38,11 +50,10 @@ static inline double fromRep64(uint64_t x) return ret; } -#if __LDBL_MANT_DIG__ == 113 -static inline long double fromRep128(uint64_t hi, uint64_t lo) -{ +#ifdef TEST_COMPILER_RT_HAS_FLOAT128 +static inline TYPE_FP128 fromRep128(uint64_t hi, uint64_t lo) { __uint128_t x = ((__uint128_t)hi << 64) + lo; - long double ret; + TYPE_FP128 ret; memcpy(&ret, &x, 16); return ret; } @@ -73,9 +84,8 @@ static inline uint64_t toRep64(double x) return ret; } -#if __LDBL_MANT_DIG__ == 113 -static inline __uint128_t toRep128(long double x) -{ +#ifdef TEST_COMPILER_RT_HAS_FLOAT128 +static inline __uint128_t toRep128(TYPE_FP128 x) { __uint128_t ret; memcpy(&ret, &x, 16); return ret; @@ -136,25 +146,23 @@ static inline int compareResultD(double result, return 1; } -#if __LDBL_MANT_DIG__ == 113 +#ifdef TEST_COMPILER_RT_HAS_FLOAT128 // return 0 if equal // use two 64-bit integers instead of one 128-bit integer // because 128-bit integer constant can't be assigned directly -static inline int compareResultLD(long double result, - uint64_t expectedHi, - uint64_t expectedLo) -{ +static inline int compareResultF128(TYPE_FP128 result, uint64_t expectedHi, + uint64_t expectedLo) { __uint128_t rep = toRep128(result); uint64_t hi = rep >> 64; uint64_t lo = rep; - if (hi == expectedHi && lo == expectedLo){ + if (hi == expectedHi && lo == expectedLo) { return 0; } // test other possible NaN representation(signal NaN) - else if (expectedHi == 0x7fff800000000000UL && expectedLo == 0x0UL){ + else if (expectedHi == 0x7fff800000000000UL && expectedLo == 0x0UL) { if ((hi & 0x7fff000000000000UL) == 0x7fff000000000000UL && - ((hi & 0xffffffffffffUL) > 0 || lo > 0)){ + ((hi & 0xffffffffffffUL) > 0 || lo > 0)) { return 0; } } @@ -232,9 +240,45 @@ static inline double makeQNaN64(void) return fromRep64(0x7ff8000000000000UL); } -#if __LDBL_MANT_DIG__ == 113 -static inline long double makeQNaN128(void) -{ +#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__) +static inline long double F80FromRep128(uint64_t hi, uint64_t lo) { + __uint128_t x = ((__uint128_t)hi << 64) + lo; + long double ret; + memcpy(&ret, &x, 16); + return ret; +} + +static inline __uint128_t F80ToRep128(long double x) { + __uint128_t ret; + memcpy(&ret, &x, 16); + return ret; +} + +static inline int compareResultF80(long double result, uint64_t expectedHi, + uint64_t expectedLo) { + __uint128_t rep = F80ToRep128(result); + // F80 occupies the lower 80 bits of __uint128_t. + uint64_t hi = (rep >> 64) & ((1UL << (80 - 64)) - 1); + uint64_t lo = rep; + return !(hi == expectedHi && lo == expectedLo); +} + +static inline long double makeQNaN80(void) { + return F80FromRep128(0x7fffUL, 0xc000000000000000UL); +} + +static inline long double makeNaN80(uint64_t rand) { + return F80FromRep128(0x7fffUL, + 0x8000000000000000 | (rand & 0x3fffffffffffffff)); +} + +static inline long double makeInf80(void) { + return F80FromRep128(0x7fffUL, 0x8000000000000000UL); +} +#endif + +#ifdef TEST_COMPILER_RT_HAS_FLOAT128 +static inline TYPE_FP128 makeQNaN128(void) { return fromRep128(0x7fff800000000000UL, 0x0UL); } #endif @@ -254,9 +298,8 @@ static inline double makeNaN64(uint64_t rand) return fromRep64(0x7ff0000000000000UL | (rand & 0xfffffffffffffUL)); } -#if __LDBL_MANT_DIG__ == 113 -static inline long double makeNaN128(uint64_t rand) -{ +#ifdef TEST_COMPILER_RT_HAS_FLOAT128 +static inline TYPE_FP128 makeNaN128(uint64_t rand) { return fromRep128(0x7fff000000000000UL | (rand & 0xffffffffffffUL), 0x0UL); } #endif @@ -286,14 +329,12 @@ static inline double makeNegativeInf64(void) return fromRep64(0xfff0000000000000UL); } -#if __LDBL_MANT_DIG__ == 113 -static inline long double makeInf128(void) -{ +#ifdef TEST_COMPILER_RT_HAS_FLOAT128 +static inline TYPE_FP128 makeInf128(void) { return fromRep128(0x7fff000000000000UL, 0x0UL); } -static inline long double makeNegativeInf128(void) -{ +static inline TYPE_FP128 makeNegativeInf128(void) { return fromRep128(0xffff000000000000UL, 0x0UL); } #endif diff --git a/compiler-rt/test/builtins/Unit/multf3_test.c b/compiler-rt/test/builtins/Unit/multf3_test.c index 3bf6ab24cec02..543b55899ce82 100644 --- a/compiler-rt/test/builtins/Unit/multf3_test.c +++ b/compiler-rt/test/builtins/Unit/multf3_test.c @@ -15,7 +15,7 @@ int test__multf3(long double a, long double b, uint64_t expectedHi, uint64_t expectedLo) { long double x = __multf3(a, b); - int ret = compareResultLD(x, expectedHi, expectedLo); + int ret = compareResultF128(x, expectedHi, expectedLo); if (ret){ printf("error in test__multf3(%.20Lf, %.20Lf) = %.20Lf, " diff --git a/compiler-rt/test/builtins/Unit/subtf3_test.c b/compiler-rt/test/builtins/Unit/subtf3_test.c index 377ae95a9a7d7..724fa4820d99d 100644 --- a/compiler-rt/test/builtins/Unit/subtf3_test.c +++ b/compiler-rt/test/builtins/Unit/subtf3_test.c @@ -16,7 +16,7 @@ int test__subtf3(long double a, long double b, uint64_t expectedHi, uint64_t expectedLo) { long double x = __subtf3(a, b); - int ret = compareResultLD(x, expectedHi, expectedLo); + int ret = compareResultF128(x, expectedHi, expectedLo); if (ret){ printf("error in test__subtf3(%.20Lf, %.20Lf) = %.20Lf, " diff --git a/compiler-rt/test/builtins/Unit/trunctfxf2_test.c b/compiler-rt/test/builtins/Unit/trunctfxf2_test.c new file mode 100644 index 0000000000000..53024ef139624 --- /dev/null +++ b/compiler-rt/test/builtins/Unit/trunctfxf2_test.c @@ -0,0 +1,97 @@ +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_trunctfxf2 + +#include "int_lib.h" +#include + +#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__) && \ + (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)) + +#include "fp_test.h" + +COMPILER_RT_ABI long double __trunctfxf2(__float128 a); + +int test__trunctfxf2(__float128 a, uint64_t expectedHi, uint64_t expectedLo) { + long double x = __trunctfxf2(a); + int ret = compareResultF80(x, expectedHi, expectedLo); + ; + if (ret) { + printf("error in __trunctfxf2(%.20Lf) = %.20Lf, " + "expected %.20Lf\n", + a, x, fromRep128(expectedHi, expectedLo)); + } + return ret; +} + +char assumption_1[sizeof(long double) * CHAR_BIT == 128] = {0}; + +#endif + +int main() { +#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__) && \ + (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)) + // qNaN + if (test__trunctfxf2(makeQNaN128(), UINT64_C(0x7FFF), + UINT64_C(0xC000000000000000))) + return 1; + // NaN + if (test__trunctfxf2(makeNaN128(UINT64_C(0x810000000000)), UINT64_C(0x7FFF), + UINT64_C(0xC080000000000000))) + return 1; + // inf + if (test__trunctfxf2(makeInf128(), UINT64_C(0x7FFF), + UINT64_C(0x8000000000000000))) + return 1; + // zero + if (test__trunctfxf2(0.0Q, UINT64_C(0x0), UINT64_C(0x0))) + return 1; + if (test__trunctfxf2(0x1.af23456789bbaaab347645365cdep+5L, UINT64_C(0x4004), + UINT64_C(0xd791a2b3c4ddd556))) + return 1; + if (test__trunctfxf2(0x1.dedafcff354b6ae9758763545432p-9L, UINT64_C(0x3ff6), + UINT64_C(0xef6d7e7f9aa5b575))) + return 1; + if (test__trunctfxf2(0x1.2f34dd5f437e849b4baab754cdefp+4534L, + UINT64_C(0x51b5), UINT64_C(0x979a6eafa1bf424e))) + return 1; + if (test__trunctfxf2(0x1.edcbff8ad76ab5bf46463233214fp-435L, UINT64_C(0x3e4c), + UINT64_C(0xf6e5ffc56bb55ae0))) + return 1; + + // Test rounding near halfway. + __float128 halfwayPlus = + fromRep128(UINT64_C(0x7ffa000000000000), + ((UINT64_C(1) << (112 - 63 - 1)) + UINT64_C(1))); + if (test__trunctfxf2(halfwayPlus, UINT64_C(0x7ffa), + UINT64_C(0x8000000000000001))) + return 1; + __float128 halfwayExactOdd = fromRep128( + UINT64_C(0x7ffa000000000000), + ((UINT64_C(1) << (112 - 63)) + (UINT64_C(1) << (112 - 63 - 1)))); + if (test__trunctfxf2(halfwayExactOdd, UINT64_C(0x7ffa), + UINT64_C(0x8000000000000002))) + return 1; + __float128 halfwayExactEven = + fromRep128(UINT64_C(0x7ffa000000000000), (UINT64_C(1) << (112 - 63 - 1))); + if (test__trunctfxf2(halfwayExactEven, UINT64_C(0x7ffa), + UINT64_C(0x8000000000000000))) + return 1; + __float128 halfwayRoundingWillChangeExponent = + fromRep128(UINT64_C(0x7ffaffffffffffff), UINT64_C(0xffff000000000001)); + if (test__trunctfxf2(halfwayRoundingWillChangeExponent, UINT64_C(0x7ffb), + UINT64_C(0x8000000000000000))) + return 1; + + // denormal number + if (test__trunctfxf2(1e-4932Q, UINT64_C(0), UINT64_C(0x261247c8f29357f0))) + return 1; + // denormal number + if (test__trunctfxf2(2e-4932Q, UINT64_C(0), UINT64_C(0x4c248f91e526afe0))) + return 1; + +#else + printf("skipped\n"); + +#endif + return 0; +} From 78be6b22347e9900ad6aef0664161be60dbe8ced Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Mon, 16 Oct 2023 16:24:07 -0700 Subject: [PATCH 282/720] llvm-gsymutil now handles empty linkage names correctly. (#68931) Previous to this fix, if we had a DW_TAG_subprogram that had a DW_AT_linkage_name that was empty, it would attempt to use this name which would cause an error to be emitted when saving the gsym file to disk: error: DWARF conversion failed: : attempted to encode invalid FunctionInfo object This patch fixes this issue and adds a unit test case. --- llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp | 10 +- llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp | 152 +++++++++++++++++++ 2 files changed, 157 insertions(+), 5 deletions(-) diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp index e38347f15e3ae..d720c1e334955 100644 --- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp +++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -132,11 +132,11 @@ static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) { static std::optional getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreator &Gsym) { // If the dwarf has mangled name, use mangled name - if (auto LinkageName = - dwarf::toString(Die.findRecursively({dwarf::DW_AT_MIPS_linkage_name, - dwarf::DW_AT_linkage_name}), - nullptr)) - return Gsym.insertString(LinkageName, /* Copy */ false); + if (auto LinkageName = Die.getLinkageName()) { + // We have seen cases were linkage name is actually empty. + if (strlen(LinkageName) > 0) + return Gsym.insertString(LinkageName, /* Copy */ false); + } StringRef ShortName(Die.getName(DINameKind::ShortName)); if (ShortName.empty()) diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp index 58bc83997d1a9..ad81a2fcd1644 100644 --- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp +++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp @@ -4005,3 +4005,155 @@ TEST(GSYMTest, TestEmptyRangeWarnings) { // Make sure we don't see spurious errors in the output: EXPECT_TRUE(errors.find("error:") == std::string::npos); } + + +TEST(GSYMTest, TestEmptyLinkageName) { + // This example has a single compile unit that has a DW_TAG_subprogram that + // has a function that has an empty linkage name and a valid normal name. + // Previously this would cause an encoding error: + // + // DWARF conversion failed: attempted to encode invalid FunctionInfo object + // + // This was because we would get a valid but empty linkage name and we would + // try to use this in the GSYM FunctionInfo and that would cause the error + // as the name was empty. + // + // 0x0000000b: DW_TAG_compile_unit + // DW_AT_name ("/tmp/main.cpp") + // DW_AT_language (DW_LANG_C) + // DW_AT_stmt_list (0x00000000) + // + // 0x00000015: DW_TAG_subprogram + // DW_AT_name ("foo") + // DW_AT_linkage_name ("") + // DW_AT_low_pc (0x0000000000001000) + // DW_AT_high_pc (0x0000000000001050) + // + // 0x0000002e: NULL + + + StringRef yamldata = R"( + debug_str: + - '' + - '/tmp/main.cpp' + - foo + - '' + debug_abbrev: + - ID: 0 + Table: + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_language + Form: DW_FORM_udata + - Attribute: DW_AT_stmt_list + Form: DW_FORM_sec_offset + - Code: 0x2 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_linkage_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_addr + debug_info: + - Length: 0x2B + Version: 4 + AbbrevTableID: 0 + AbbrOffset: 0x0 + AddrSize: 8 + Entries: + - AbbrCode: 0x1 + Values: + - Value: 0x1 + - Value: 0x2 + - Value: 0x0 + - AbbrCode: 0x2 + Values: + - Value: 0xF + - Value: 0x13 + - Value: 0x1000 + - Value: 0x1050 + - AbbrCode: 0x0 + debug_line: + - Length: 68 + Version: 2 + PrologueLength: 36 + MinInstLength: 1 + DefaultIsStmt: 1 + LineBase: 251 + LineRange: 14 + OpcodeBase: 13 + StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ] + IncludeDirs: + - '/tmp' + Files: + - Name: main.cpp + DirIdx: 1 + ModTime: 0 + Length: 0 + Opcodes: + - Opcode: DW_LNS_extended_op + ExtLen: 9 + SubOpcode: DW_LNE_set_address + Data: 4096 + - Opcode: DW_LNS_advance_line + SData: 9 + Data: 0 + - Opcode: DW_LNS_copy + Data: 0 + - Opcode: DW_LNS_advance_pc + Data: 256 + - Opcode: DW_LNS_advance_line + SData: 1 + Data: 0 + - Opcode: DW_LNS_copy + Data: 0 + - Opcode: DW_LNS_advance_pc + Data: 256 + - Opcode: DW_LNS_extended_op + ExtLen: 1 + SubOpcode: DW_LNE_end_sequence + Data: 0 + )"; + auto ErrOrSections = DWARFYAML::emitDebugSections(yamldata); + ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded()); + std::unique_ptr DwarfContext = + DWARFContext::create(*ErrOrSections, 8); + ASSERT_TRUE(DwarfContext.get() != nullptr); + std::string errors; + raw_string_ostream OS(errors); + GsymCreator GC; + DwarfTransformer DT(*DwarfContext, GC); + const uint32_t ThreadCount = 1; + ASSERT_THAT_ERROR(DT.convert(ThreadCount, &OS), Succeeded()); + ASSERT_THAT_ERROR(GC.finalize(OS), Succeeded()); + OS.flush(); + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + const auto ByteOrder = llvm::endianness::native; + FileWriter FW(OutStrm, ByteOrder); + ASSERT_THAT_ERROR(GC.encode(FW), Succeeded()); + Expected GR = GsymReader::copyBuffer(OutStrm.str()); + ASSERT_THAT_EXPECTED(GR, Succeeded()); + // There should be one function in our GSYM. + EXPECT_EQ(GR->getNumAddresses(), 1u); + // Verify "foo" is present and has a line table and no inline info. + auto ExpFI = GR->getFunctionInfo(0x1000); + ASSERT_THAT_EXPECTED(ExpFI, Succeeded()); + ASSERT_EQ(ExpFI->Range, AddressRange(0x1000, 0x1050)); + EXPECT_TRUE(ExpFI->OptLineTable.has_value()); + EXPECT_FALSE(ExpFI->Inline.has_value()); + StringRef FuncName = GR->getString(ExpFI->Name); + EXPECT_EQ(FuncName, "foo"); + + // Make sure we don't see spurious errors in the output: + EXPECT_TRUE(errors.find("error:") == std::string::npos); +} From d343529d0bd035c515fc6aa5bad5750f262b3345 Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Mon, 16 Oct 2023 16:26:06 -0700 Subject: [PATCH 283/720] [flang] Fix CFI_CDESC_T for C++ interoperability (#67568) Full namespace qualification is needed on an identifier. --- flang/include/flang/ISO_Fortran_binding.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flang/include/flang/ISO_Fortran_binding.h b/flang/include/flang/ISO_Fortran_binding.h index 2893fd46c267d..51d6219427cce 100644 --- a/flang/include/flang/ISO_Fortran_binding.h +++ b/flang/include/flang/ISO_Fortran_binding.h @@ -169,7 +169,7 @@ template struct CdescStorage : public CFI_cdesc_t { template <> struct CdescStorage<1> : public CFI_cdesc_t {}; template <> struct CdescStorage<0> : public CFI_cdesc_t {}; } // namespace cfi_internal -#define CFI_CDESC_T(rank) cfi_internal::CdescStorage +#define CFI_CDESC_T(rank) ::Fortran::ISO::cfi_internal::CdescStorage #else #define CFI_CDESC_T(_RANK) \ struct { \ @@ -200,8 +200,8 @@ RT_API_ATTRS int CFI_setpointer( #ifdef __cplusplus } // extern "C" } // inline namespace Fortran_2018 -} -} +} // namespace ISO +} // namespace Fortran #endif #endif /* CFI_ISO_FORTRAN_BINDING_H_ */ From 2565f9f49b79e11ab613f125cb4a8daa87f4bab6 Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Mon, 16 Oct 2023 16:36:46 -0700 Subject: [PATCH 284/720] [flang] Remove IEEE_DENORM from IEEE_ALL (#67573) The array of all exceptions IEEE_ALL defined in the intrinsic module IEEE_EXCEPTIONS should contain only what the standard mandates. Existing code depends on it having only five elements. The legacy extension exception flag IEEE_DENORM shouldn't be an element. --- flang/module/__fortran_ieee_exceptions.f90 | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/flang/module/__fortran_ieee_exceptions.f90 b/flang/module/__fortran_ieee_exceptions.f90 index 77dc6f8551786..785c4adaec25d 100644 --- a/flang/module/__fortran_ieee_exceptions.f90 +++ b/flang/module/__fortran_ieee_exceptions.f90 @@ -27,10 +27,8 @@ ieee_denorm = ieee_flag_type(32) ! PGI extension type(ieee_flag_type), parameter :: & - ieee_usual(*) = [ & - ieee_overflow, ieee_divide_by_zero, ieee_invalid ], & - ieee_all(*) = [ & - ieee_usual, ieee_underflow, ieee_inexact, ieee_denorm ] + ieee_usual(*) = [ ieee_overflow, ieee_divide_by_zero, ieee_invalid ], & + ieee_all(*) = [ ieee_usual, ieee_underflow, ieee_inexact ] type :: ieee_modes_type ! Fortran 2018, 17.7 private From 30ca258614dd231e23f45ad1188905acadb86e66 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Sun, 15 Oct 2023 08:49:46 -0700 Subject: [PATCH 285/720] [RISCV] Pre-commit concat-vectors-constant-stride.ll This patch commits tests that can be optimized by improving performCONCAT_VECTORCombine to do a better job at decomposing the base pointer and recognizing a constant offset. --- .../rvv/concat-vectors-constant-stride.ll | 231 ++++++++++++++++++ 1 file changed, 231 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll diff --git a/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll new file mode 100644 index 0000000000000..611270ab98ebd --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll @@ -0,0 +1,231 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v,+unaligned-vector-mem -target-abi=ilp32 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+unaligned-vector-mem -target-abi=lp64 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +define void @constant_forward_stride(ptr %s, ptr %d) { +; CHECK-LABEL: constant_forward_stride: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a0, 16 +; CHECK-NEXT: addi a3, a0, 32 +; CHECK-NEXT: addi a4, a0, 48 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v9, (a2) +; CHECK-NEXT: vle8.v v10, (a3) +; CHECK-NEXT: vle8.v v11, (a4) +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v11, 6 +; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: ret + %1 = getelementptr inbounds i8, ptr %s, i64 16 + %2 = getelementptr inbounds i8, ptr %s, i64 32 + %3 = getelementptr inbounds i8, ptr %s, i64 48 + %4 = load <2 x i8>, ptr %s, align 1 + %5 = load <2 x i8>, ptr %1, align 1 + %6 = load <2 x i8>, ptr %2, align 1 + %7 = load <2 x i8>, ptr %3, align 1 + %8 = shufflevector <2 x i8> %4, <2 x i8> %5, <4 x i32> + %9 = shufflevector <2 x i8> %6, <2 x i8> %7, <4 x i32> + %10 = shufflevector <4 x i8> %8, <4 x i8> %9, <8 x i32> + store <8 x i8> %10, ptr %d, align 1 + ret void +} + +define void @constant_forward_stride2(ptr %s, ptr %d) { +; CHECK-LABEL: constant_forward_stride2: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a0, -16 +; CHECK-NEXT: addi a3, a0, -32 +; CHECK-NEXT: addi a4, a0, -48 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vle8.v v8, (a4) +; CHECK-NEXT: vle8.v v9, (a3) +; CHECK-NEXT: vle8.v v10, (a2) +; CHECK-NEXT: vle8.v v11, (a0) +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v11, 6 +; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: ret + %1 = getelementptr inbounds i8, ptr %s, i64 -16 + %2 = getelementptr inbounds i8, ptr %s, i64 -32 + %3 = getelementptr inbounds i8, ptr %s, i64 -48 + %4 = load <2 x i8>, ptr %3, align 1 + %5 = load <2 x i8>, ptr %2, align 1 + %6 = load <2 x i8>, ptr %1, align 1 + %7 = load <2 x i8>, ptr %s, align 1 + %8 = shufflevector <2 x i8> %4, <2 x i8> %5, <4 x i32> + %9 = shufflevector <2 x i8> %6, <2 x i8> %7, <4 x i32> + %10 = shufflevector <4 x i8> %8, <4 x i8> %9, <8 x i32> + store <8 x i8> %10, ptr %d, align 1 + ret void +} + +define void @constant_forward_stride3(ptr %s, ptr %d) { +; CHECK-LABEL: constant_forward_stride3: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a0, 16 +; CHECK-NEXT: addi a3, a0, 32 +; CHECK-NEXT: addi a4, a0, 48 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v9, (a2) +; CHECK-NEXT: vle8.v v10, (a3) +; CHECK-NEXT: vle8.v v11, (a4) +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v11, 6 +; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: ret + %1 = getelementptr inbounds i8, ptr %s, i64 16 + %2 = getelementptr inbounds i8, ptr %s, i64 32 + %3 = getelementptr inbounds i8, ptr %s, i64 48 + %4 = getelementptr inbounds i8, ptr %1, i64 0 + %5 = getelementptr inbounds i8, ptr %2, i64 0 + %6 = getelementptr inbounds i8, ptr %3, i64 0 + %7 = load <2 x i8>, ptr %s, align 1 + %8 = load <2 x i8>, ptr %4, align 1 + %9 = load <2 x i8>, ptr %5, align 1 + %10 = load <2 x i8>, ptr %6, align 1 + %11 = shufflevector <2 x i8> %7, <2 x i8> %8, <4 x i32> + %12 = shufflevector <2 x i8> %9, <2 x i8> %10, <4 x i32> + %13 = shufflevector <4 x i8> %11, <4 x i8> %12, <8 x i32> + store <8 x i8> %13, ptr %d, align 1 + ret void +} + +define void @constant_back_stride(ptr %s, ptr %d) { +; CHECK-LABEL: constant_back_stride: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a0, -16 +; CHECK-NEXT: addi a3, a0, -32 +; CHECK-NEXT: addi a4, a0, -48 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v9, (a2) +; CHECK-NEXT: vle8.v v10, (a3) +; CHECK-NEXT: vle8.v v11, (a4) +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v11, 6 +; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: ret + %1 = getelementptr inbounds i8, ptr %s, i64 -16 + %2 = getelementptr inbounds i8, ptr %s, i64 -32 + %3 = getelementptr inbounds i8, ptr %s, i64 -48 + %4 = load <2 x i8>, ptr %s, align 1 + %5 = load <2 x i8>, ptr %1, align 1 + %6 = load <2 x i8>, ptr %2, align 1 + %7 = load <2 x i8>, ptr %3, align 1 + %8 = shufflevector <2 x i8> %4, <2 x i8> %5, <4 x i32> + %9 = shufflevector <2 x i8> %6, <2 x i8> %7, <4 x i32> + %10 = shufflevector <4 x i8> %8, <4 x i8> %9, <8 x i32> + store <8 x i8> %10, ptr %d, align 1 + ret void +} + +define void @constant_back_stride2(ptr %s, ptr %d) { +; CHECK-LABEL: constant_back_stride2: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a0, 16 +; CHECK-NEXT: addi a3, a0, 32 +; CHECK-NEXT: addi a4, a0, 48 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vle8.v v8, (a4) +; CHECK-NEXT: vle8.v v9, (a3) +; CHECK-NEXT: vle8.v v10, (a2) +; CHECK-NEXT: vle8.v v11, (a0) +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v11, 6 +; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: ret + %1 = getelementptr inbounds i8, ptr %s, i64 16 + %2 = getelementptr inbounds i8, ptr %s, i64 32 + %3 = getelementptr inbounds i8, ptr %s, i64 48 + %4 = load <2 x i8>, ptr %3, align 1 + %5 = load <2 x i8>, ptr %2, align 1 + %6 = load <2 x i8>, ptr %1, align 1 + %7 = load <2 x i8>, ptr %s, align 1 + %8 = shufflevector <2 x i8> %4, <2 x i8> %5, <4 x i32> + %9 = shufflevector <2 x i8> %6, <2 x i8> %7, <4 x i32> + %10 = shufflevector <4 x i8> %8, <4 x i8> %9, <8 x i32> + store <8 x i8> %10, ptr %d, align 1 + ret void +} + +define void @constant_back_stride3(ptr %s, ptr %d) { +; CHECK-LABEL: constant_back_stride3: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a0, -16 +; CHECK-NEXT: addi a3, a0, -32 +; CHECK-NEXT: addi a4, a0, -48 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v9, (a2) +; CHECK-NEXT: vle8.v v10, (a3) +; CHECK-NEXT: vle8.v v11, (a4) +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v11, 6 +; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: ret + %1 = getelementptr inbounds i8, ptr %s, i64 -16 + %2 = getelementptr inbounds i8, ptr %s, i64 -32 + %3 = getelementptr inbounds i8, ptr %s, i64 -48 + %4 = getelementptr inbounds i8, ptr %1, i64 0 + %5 = getelementptr inbounds i8, ptr %2, i64 0 + %6 = getelementptr inbounds i8, ptr %3, i64 0 + %7 = load <2 x i8>, ptr %s, align 1 + %8 = load <2 x i8>, ptr %4, align 1 + %9 = load <2 x i8>, ptr %5, align 1 + %10 = load <2 x i8>, ptr %6, align 1 + %11 = shufflevector <2 x i8> %7, <2 x i8> %8, <4 x i32> + %12 = shufflevector <2 x i8> %9, <2 x i8> %10, <4 x i32> + %13 = shufflevector <4 x i8> %11, <4 x i8> %12, <8 x i32> + store <8 x i8> %13, ptr %d, align 1 + ret void +} + +define void @constant_zero_stride(ptr %s, ptr %d) { +; CHECK-LABEL: constant_zero_stride: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v9, v8 +; CHECK-NEXT: vslideup.vi v9, v8, 2 +; CHECK-NEXT: vse8.v v9, (a1) +; CHECK-NEXT: ret + %1 = getelementptr inbounds i8, ptr %s, i64 0 + %2 = load <2 x i8>, ptr %s, align 1 + %3 = load <2 x i8>, ptr %1, align 1 + %4 = shufflevector <2 x i8> %2, <2 x i8> %3, <4 x i32> + store <4 x i8> %4, ptr %d, align 1 + ret void +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} From c319c741463a039c2323825b149df70cbe535c67 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Sun, 15 Oct 2023 09:00:04 -0700 Subject: [PATCH 286/720] [RISCV] Improve performCONCAT_VECTORCombine stride matching If the load ptrs can be decomposed into a common (Base + Index) with a common constant stride, then return the constant stride. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 21 +++- .../rvv/concat-vectors-constant-stride.ll | 116 ++++-------------- 2 files changed, 43 insertions(+), 94 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6eb253cc51466..4dc3f6137e306 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DiagnosticInfo.h" @@ -13803,9 +13804,17 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, Align = std::min(Align, Ld->getAlign()); } - using PtrDiff = std::pair; - auto GetPtrDiff = [](LoadSDNode *Ld1, - LoadSDNode *Ld2) -> std::optional { + using PtrDiff = std::pair, bool>; + auto GetPtrDiff = [&DAG](LoadSDNode *Ld1, + LoadSDNode *Ld2) -> std::optional { + // If the load ptrs can be decomposed into a common (Base + Index) with a + // common constant stride, then return the constant stride. + BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG); + BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG); + if (BIO1.equalBaseIndex(BIO2, DAG)) + return {{BIO2.getOffset() - BIO1.getOffset(), false}}; + + // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride) SDValue P1 = Ld1->getBasePtr(); SDValue P2 = Ld2->getBasePtr(); if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1) @@ -13844,7 +13853,11 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, if (!TLI.isLegalStridedLoadStore(WideVecVT, Align)) return SDValue(); - auto [Stride, MustNegateStride] = *BaseDiff; + auto [StrideVariant, MustNegateStride] = *BaseDiff; + SDValue Stride = std::holds_alternative(StrideVariant) + ? std::get(StrideVariant) + : DAG.getConstant(std::get(StrideVariant), DL, + Lds[0]->getOffset().getValueType()); if (MustNegateStride) Stride = DAG.getNegative(Stride, DL, Stride.getValueType()); diff --git a/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll index 611270ab98ebd..ff35043dbd7e7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll +++ b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll @@ -7,21 +7,10 @@ define void @constant_forward_stride(ptr %s, ptr %d) { ; CHECK-LABEL: constant_forward_stride: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, a0, 16 -; CHECK-NEXT: addi a3, a0, 32 -; CHECK-NEXT: addi a4, a0, 48 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vle8.v v9, (a2) -; CHECK-NEXT: vle8.v v10, (a3) -; CHECK-NEXT: vle8.v v11, (a4) -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 -; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 4 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v11, 6 -; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a2 +; CHECK-NEXT: vse16.v v8, (a1) ; CHECK-NEXT: ret %1 = getelementptr inbounds i8, ptr %s, i64 16 %2 = getelementptr inbounds i8, ptr %s, i64 32 @@ -40,21 +29,11 @@ define void @constant_forward_stride(ptr %s, ptr %d) { define void @constant_forward_stride2(ptr %s, ptr %d) { ; CHECK-LABEL: constant_forward_stride2: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: addi a3, a0, -32 -; CHECK-NEXT: addi a4, a0, -48 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vle8.v v8, (a4) -; CHECK-NEXT: vle8.v v9, (a3) -; CHECK-NEXT: vle8.v v10, (a2) -; CHECK-NEXT: vle8.v v11, (a0) -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 -; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 4 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v11, 6 -; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: addi a0, a0, -48 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a2 +; CHECK-NEXT: vse16.v v8, (a1) ; CHECK-NEXT: ret %1 = getelementptr inbounds i8, ptr %s, i64 -16 %2 = getelementptr inbounds i8, ptr %s, i64 -32 @@ -73,21 +52,10 @@ define void @constant_forward_stride2(ptr %s, ptr %d) { define void @constant_forward_stride3(ptr %s, ptr %d) { ; CHECK-LABEL: constant_forward_stride3: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, a0, 16 -; CHECK-NEXT: addi a3, a0, 32 -; CHECK-NEXT: addi a4, a0, 48 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vle8.v v9, (a2) -; CHECK-NEXT: vle8.v v10, (a3) -; CHECK-NEXT: vle8.v v11, (a4) -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 -; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 4 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v11, 6 -; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a2 +; CHECK-NEXT: vse16.v v8, (a1) ; CHECK-NEXT: ret %1 = getelementptr inbounds i8, ptr %s, i64 16 %2 = getelementptr inbounds i8, ptr %s, i64 32 @@ -109,21 +77,10 @@ define void @constant_forward_stride3(ptr %s, ptr %d) { define void @constant_back_stride(ptr %s, ptr %d) { ; CHECK-LABEL: constant_back_stride: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: addi a3, a0, -32 -; CHECK-NEXT: addi a4, a0, -48 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vle8.v v9, (a2) -; CHECK-NEXT: vle8.v v10, (a3) -; CHECK-NEXT: vle8.v v11, (a4) -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 -; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 4 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v11, 6 -; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: li a2, -16 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a2 +; CHECK-NEXT: vse16.v v8, (a1) ; CHECK-NEXT: ret %1 = getelementptr inbounds i8, ptr %s, i64 -16 %2 = getelementptr inbounds i8, ptr %s, i64 -32 @@ -142,21 +99,11 @@ define void @constant_back_stride(ptr %s, ptr %d) { define void @constant_back_stride2(ptr %s, ptr %d) { ; CHECK-LABEL: constant_back_stride2: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, a0, 16 -; CHECK-NEXT: addi a3, a0, 32 -; CHECK-NEXT: addi a4, a0, 48 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vle8.v v8, (a4) -; CHECK-NEXT: vle8.v v9, (a3) -; CHECK-NEXT: vle8.v v10, (a2) -; CHECK-NEXT: vle8.v v11, (a0) -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 -; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 4 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v11, 6 -; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: addi a0, a0, 48 +; CHECK-NEXT: li a2, -16 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a2 +; CHECK-NEXT: vse16.v v8, (a1) ; CHECK-NEXT: ret %1 = getelementptr inbounds i8, ptr %s, i64 16 %2 = getelementptr inbounds i8, ptr %s, i64 32 @@ -175,21 +122,10 @@ define void @constant_back_stride2(ptr %s, ptr %d) { define void @constant_back_stride3(ptr %s, ptr %d) { ; CHECK-LABEL: constant_back_stride3: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: addi a3, a0, -32 -; CHECK-NEXT: addi a4, a0, -48 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vle8.v v9, (a2) -; CHECK-NEXT: vle8.v v10, (a3) -; CHECK-NEXT: vle8.v v11, (a4) -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 -; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 4 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v11, 6 -; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: li a2, -16 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a2 +; CHECK-NEXT: vse16.v v8, (a1) ; CHECK-NEXT: ret %1 = getelementptr inbounds i8, ptr %s, i64 -16 %2 = getelementptr inbounds i8, ptr %s, i64 -32 From d85f5a621667018e33eae274e05cbe8ffe9f4cc5 Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Mon, 16 Oct 2023 23:46:58 +0000 Subject: [PATCH 287/720] [compiler-rt] Fix build of builtins on Windows Fix Windows build after 910a4bf5b70ae14e (the breakage was found by the buildbot https://lab.llvm.org/buildbot/#/builders/127/builds/56796) --- compiler-rt/lib/builtins/fp_extend.h | 21 ++++++++++++++------- compiler-rt/lib/builtins/fp_trunc.h | 24 ++++++++++++++++-------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/compiler-rt/lib/builtins/fp_extend.h b/compiler-rt/lib/builtins/fp_extend.h index 86b32be12d55f..d640bdcb0ec1f 100644 --- a/compiler-rt/lib/builtins/fp_extend.h +++ b/compiler-rt/lib/builtins/fp_extend.h @@ -23,7 +23,8 @@ typedef uint32_t src_rep_t; static const int srcBits = sizeof(src_t) * CHAR_BIT; static const int srcSigFracBits = 23; // -1 accounts for the sign bit. -static const int srcExpBits = srcBits - srcSigFracBits - 1; +// srcBits - srcSigFracBits - 1 +static const int srcExpBits = 8; #define src_rep_t_clz clzsi #elif defined SRC_DOUBLE @@ -33,7 +34,8 @@ typedef uint64_t src_rep_t; static const int srcBits = sizeof(src_t) * CHAR_BIT; static const int srcSigFracBits = 52; // -1 accounts for the sign bit. -static const int srcExpBits = srcBits - srcSigFracBits - 1; +// srcBits - srcSigFracBits - 1 +static const int srcExpBits = 11; static inline int src_rep_t_clz_impl(src_rep_t a) { #if defined __LP64__ @@ -56,7 +58,8 @@ static const int srcBits = 80; static const int srcSigFracBits = 63; // -1 accounts for the sign bit. // -1 accounts for the explicitly stored integer bit. -static const int srcExpBits = srcBits - srcSigFracBits - 1 - 1; +// srcBits - srcSigFracBits - 1 - 1 +static const int srcExpBits = 15; #elif defined SRC_HALF #ifdef COMPILER_RT_HAS_FLOAT16 @@ -69,7 +72,8 @@ typedef uint16_t src_rep_t; static const int srcBits = sizeof(src_t) * CHAR_BIT; static const int srcSigFracBits = 10; // -1 accounts for the sign bit. -static const int srcExpBits = srcBits - srcSigFracBits - 1; +// srcBits - srcSigFracBits - 1 +static const int srcExpBits = 5; #define src_rep_t_clz __builtin_clz @@ -84,7 +88,8 @@ typedef uint32_t dst_rep_t; static const int dstBits = sizeof(dst_t) * CHAR_BIT; static const int dstSigFracBits = 23; // -1 accounts for the sign bit. -static const int dstExpBits = dstBits - dstSigFracBits - 1; +// dstBits - dstSigFracBits - 1 +static const int dstExpBits = 8; #elif defined DST_DOUBLE typedef double dst_t; @@ -93,7 +98,8 @@ typedef uint64_t dst_rep_t; static const int dstBits = sizeof(dst_t) * CHAR_BIT; static const int dstSigFracBits = 52; // -1 accounts for the sign bit. -static const int dstExpBits = dstBits - dstSigFracBits - 1; +// dstBits - dstSigFracBits - 1 +static const int dstExpBits = 11; #elif defined DST_QUAD // TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64. @@ -108,7 +114,8 @@ typedef __uint128_t dst_rep_t; static const int dstBits = sizeof(dst_t) * CHAR_BIT; static const int dstSigFracBits = 112; // -1 accounts for the sign bit. -static const int dstExpBits = dstBits - dstSigFracBits - 1; +// dstBits - dstSigFracBits - 1 +static const int dstExpBits = 15; #else #error Destination should be single, double, or quad precision! diff --git a/compiler-rt/lib/builtins/fp_trunc.h b/compiler-rt/lib/builtins/fp_trunc.h index ea13dc2efae54..f62f8bafc7995 100644 --- a/compiler-rt/lib/builtins/fp_trunc.h +++ b/compiler-rt/lib/builtins/fp_trunc.h @@ -22,7 +22,8 @@ typedef uint32_t src_rep_t; static const int srcBits = sizeof(src_t) * CHAR_BIT; static const int srcSigFracBits = 23; // -1 accounts for the sign bit. -static const int srcExpBits = srcBits - srcSigFracBits - 1; +// srcBits - srcSigFracBits - 1 +static const int srcExpBits = 8; #elif defined SRC_DOUBLE typedef double src_t; @@ -31,7 +32,8 @@ typedef uint64_t src_rep_t; static const int srcBits = sizeof(src_t) * CHAR_BIT; static const int srcSigFracBits = 52; // -1 accounts for the sign bit. -static const int srcExpBits = srcBits - srcSigFracBits - 1; +// srcBits - srcSigFracBits - 1 +static const int srcExpBits = 11; #elif defined SRC_QUAD // TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64. @@ -46,7 +48,8 @@ typedef __uint128_t src_rep_t; static const int srcBits = sizeof(src_t) * CHAR_BIT; static const int srcSigFracBits = 112; // -1 accounts for the sign bit. -static const int srcExpBits = srcBits - srcSigFracBits - 1; +// srcBits - srcSigFracBits - 1 +static const int srcExpBits = 15; #else #error Source should be double precision or quad precision! @@ -59,7 +62,8 @@ typedef uint64_t dst_rep_t; static const int dstBits = sizeof(dst_t) * CHAR_BIT; static const int dstSigFracBits = 52; // -1 accounts for the sign bit. -static const int dstExpBits = dstBits - dstSigFracBits - 1; +// dstBits - dstSigFracBits - 1 +static const int dstExpBits = 11; #elif defined DST_80 typedef long double dst_t; @@ -69,7 +73,8 @@ static const int dstBits = 80; static const int dstSigFracBits = 63; // -1 accounts for the sign bit. // -1 accounts for the explicitly stored integer bit. -static const int dstExpBits = dstBits - dstSigFracBits - 1 - 1; +// dstBits - dstSigFracBits - 1 - 1 +static const int dstExpBits = 15; #elif defined DST_SINGLE typedef float dst_t; @@ -78,7 +83,8 @@ typedef uint32_t dst_rep_t; static const int dstBits = sizeof(dst_t) * CHAR_BIT; static const int dstSigFracBits = 23; // -1 accounts for the sign bit. -static const int dstExpBits = dstBits - dstSigFracBits - 1; +// dstBits - dstSigFracBits - 1 +static const int dstExpBits = 8; #elif defined DST_HALF #ifdef COMPILER_RT_HAS_FLOAT16 @@ -91,7 +97,8 @@ typedef uint16_t dst_rep_t; static const int dstBits = sizeof(dst_t) * CHAR_BIT; static const int dstSigFracBits = 10; // -1 accounts for the sign bit. -static const int dstExpBits = dstBits - dstSigFracBits - 1; +// dstBits - dstSigFracBits - 1 +static const int dstExpBits = 5; #elif defined DST_BFLOAT typedef __bf16 dst_t; @@ -100,7 +107,8 @@ typedef uint16_t dst_rep_t; static const int dstBits = sizeof(dst_t) * CHAR_BIT; static const int dstSigFracBits = 7; // -1 accounts for the sign bit. -static const int dstExpBits = dstBits - dstSigFracBits - 1; +// dstBits - dstSigFracBits - 1 +static const int dstExpBits = 8; #else #error Destination should be single precision or double precision! From 28a686a704fab6631d18160e5f8ee2e07620ebe1 Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Mon, 16 Oct 2023 16:51:46 -0700 Subject: [PATCH 288/720] [flang][NFC] Speed up large DATA statement initializations (#67585) To ensure that the map from symbols to their initial images has an entry for a particular symbol, use std::map<>::find() before std::map<>::emplace() to avoid needless memory allocation and deallocation. Also, combine adjacent intervals in the lists of initialized ranges so that contiguous initializations don't require long lists. Fixes https://github.com/llvm/llvm-project/issues/66452. --- flang/lib/Semantics/data-to-inits.cpp | 29 +++++++++++++++------------ flang/lib/Semantics/data-to-inits.h | 16 +++++++++++++++ 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/flang/lib/Semantics/data-to-inits.cpp b/flang/lib/Semantics/data-to-inits.cpp index bc0355a2c597a..85bce874e78cd 100644 --- a/flang/lib/Semantics/data-to-inits.cpp +++ b/flang/lib/Semantics/data-to-inits.cpp @@ -81,7 +81,7 @@ template class ValueListIterator { }; template void ValueListIterator::SetRepetitionCount() { - for (repetitionsRemaining_ = 1; at_ != end_; ++at_) { + for (; at_ != end_; ++at_) { auto repetitions{GetValue().repetitions}; if (repetitions < 0) { hasFatalError_ = true; @@ -335,10 +335,15 @@ bool DataInitializationCompiler::InitElement( } }}; const auto GetImage{[&]() -> evaluate::InitialImage & { - auto iter{inits_.emplace(&symbol, symbol.size())}; - auto &symbolInit{iter.first->second}; - symbolInit.initializedRanges.emplace_back( - offsetSymbol.offset(), offsetSymbol.size()); + // This could be (and was) written to always call std::map<>::emplace(), + // which should handle duplicate entries gracefully, but it was still + // causing memory allocation & deallocation with gcc. + auto iter{inits_.find(&symbol)}; + if (iter == inits_.end()) { + iter = inits_.emplace(&symbol, symbol.size()).first; + } + auto &symbolInit{iter->second}; + symbolInit.NoteInitializedRange(offsetSymbol); return symbolInit.image; }}; const auto OutOfRangeError{[&]() { @@ -590,8 +595,7 @@ static void PopulateWithComponentDefaults(SymbolDataInitialization &init, } } if (initialized) { - init.initializedRanges.emplace_back( - componentOffset, component.size()); + init.NoteInitializedRange(componentOffset, component.size()); } } } else if (const auto *proc{component.detailsIf()}) { @@ -599,8 +603,7 @@ static void PopulateWithComponentDefaults(SymbolDataInitialization &init, SomeExpr procPtrInit{evaluate::ProcedureDesignator{**proc->init()}}; auto extant{init.image.AsConstantPointer(componentOffset)}; if (!extant || !(*extant == procPtrInit)) { - init.initializedRanges.emplace_back( - componentOffset, component.size()); + init.NoteInitializedRange(componentOffset, component.size()); init.image.AddPointer(componentOffset, std::move(procPtrInit)); } } @@ -651,7 +654,7 @@ static void IncorporateExplicitInitialization( if (iter != inits.end()) { // DATA statement initialization for (const auto &range : iter->second.initializedRanges) { auto at{offset + range.start()}; - combined.initializedRanges.emplace_back(at, range.size()); + combined.NoteInitializedRange(at, range.size()); combined.image.Incorporate( at, iter->second.image, range.start(), range.size()); } @@ -663,7 +666,7 @@ static void IncorporateExplicitInitialization( if (IsPointer(mutableSymbol)) { if (auto *object{mutableSymbol.detailsIf()}) { if (object->init()) { - combined.initializedRanges.emplace_back(offset, mutableSymbol.size()); + combined.NoteInitializedRange(offset, mutableSymbol.size()); combined.image.AddPointer(offset, *object->init()); if (removeOriginalInits) { object->init().reset(); @@ -671,7 +674,7 @@ static void IncorporateExplicitInitialization( } } else if (auto *proc{mutableSymbol.detailsIf()}) { if (proc->init() && *proc->init()) { - combined.initializedRanges.emplace_back(offset, mutableSymbol.size()); + combined.NoteInitializedRange(offset, mutableSymbol.size()); combined.image.AddPointer( offset, SomeExpr{evaluate::ProcedureDesignator{**proc->init()}}); if (removeOriginalInits) { @@ -681,7 +684,7 @@ static void IncorporateExplicitInitialization( } } else if (auto *object{mutableSymbol.detailsIf()}) { if (!IsNamedConstant(mutableSymbol) && object->init()) { - combined.initializedRanges.emplace_back(offset, mutableSymbol.size()); + combined.NoteInitializedRange(offset, mutableSymbol.size()); combined.image.Add( offset, mutableSymbol.size(), *object->init(), foldingContext); if (removeOriginalInits) { diff --git a/flang/lib/Semantics/data-to-inits.h b/flang/lib/Semantics/data-to-inits.h index 10d850d23d5d6..d8cc4601de26f 100644 --- a/flang/lib/Semantics/data-to-inits.h +++ b/flang/lib/Semantics/data-to-inits.h @@ -11,6 +11,7 @@ #include "flang/Common/default-kinds.h" #include "flang/Common/interval.h" +#include "flang/Evaluate/fold-designator.h" #include "flang/Evaluate/initial-image.h" #include #include @@ -30,6 +31,21 @@ struct SymbolDataInitialization { using Range = common::Interval; explicit SymbolDataInitialization(std::size_t bytes) : image{bytes} {} SymbolDataInitialization(SymbolDataInitialization &&) = default; + + void NoteInitializedRange(Range range) { + if (initializedRanges.empty() || + !initializedRanges.back().AnnexIfPredecessor(range)) { + initializedRanges.emplace_back(range); + } + } + void NoteInitializedRange( + common::ConstantSubscript offset, std::size_t size) { + NoteInitializedRange(Range{offset, size}); + } + void NoteInitializedRange(evaluate::OffsetSymbol offsetSymbol) { + NoteInitializedRange(offsetSymbol.offset(), offsetSymbol.size()); + } + evaluate::InitialImage image; std::list initializedRanges; }; From 11d07d9ef618497b825badee8b4f06a48575606b Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Mon, 16 Oct 2023 17:08:28 -0700 Subject: [PATCH 289/720] [flang] Handle separate module procedures with INTERFACE dummy arguments (#67608) The code that duplicates the interface of a separate module procedure into its definition doesn't allow for a dummy procedure with an explicit INTERFACE declaration. Extend the code to handle this case. Fixes https://github.com/llvm/llvm-project/issues/66631. --- flang/lib/Semantics/resolve-names-utils.cpp | 56 +++++++++++++++------ flang/test/Semantics/separate-mp05.f90 | 40 +++++++++++++++ 2 files changed, 80 insertions(+), 16 deletions(-) create mode 100644 flang/test/Semantics/separate-mp05.f90 diff --git a/flang/lib/Semantics/resolve-names-utils.cpp b/flang/lib/Semantics/resolve-names-utils.cpp index ebc7aab3744d5..b901080e2860c 100644 --- a/flang/lib/Semantics/resolve-names-utils.cpp +++ b/flang/lib/Semantics/resolve-names-utils.cpp @@ -779,6 +779,7 @@ class SymbolMapper : public evaluate::AnyTraverse { return false; } void MapSymbolExprs(Symbol &); + Symbol *CopySymbol(const Symbol *); private: void MapParamValue(ParamValue ¶m) const { (*this)(param.GetExplicit()); } @@ -797,16 +798,44 @@ class SymbolMapper : public evaluate::AnyTraverse { SymbolAndTypeMappings &map_; }; -void SymbolMapper::MapSymbolExprs(Symbol &symbol) { - if (auto *object{symbol.detailsIf()}) { - if (const DeclTypeSpec *type{object->type()}) { - if (const DeclTypeSpec *newType{MapType(*type)}) { - object->ReplaceType(*newType); +Symbol *SymbolMapper::CopySymbol(const Symbol *symbol) { + if (symbol) { + if (auto *subp{symbol->detailsIf()}) { + if (subp->isInterface()) { + if (auto pair{scope_.try_emplace(symbol->name(), symbol->attrs())}; + pair.second) { + Symbol ©{*pair.first->second}; + map_.symbolMap[symbol] = © + copy.set(symbol->test(Symbol::Flag::Subroutine) + ? Symbol::Flag::Subroutine + : Symbol::Flag::Function); + Scope &newScope{scope_.MakeScope(Scope::Kind::Subprogram, ©)}; + copy.set_scope(&newScope); + copy.set_details(SubprogramDetails{}); + auto &newSubp{copy.get()}; + newSubp.set_isInterface(true); + newSubp.set_isDummy(subp->isDummy()); + newSubp.set_defaultIgnoreTKR(subp->defaultIgnoreTKR()); + MapSubprogramToNewSymbols(*symbol, copy, newScope, &map_); + return © + } } + } else if (Symbol * copy{scope_.CopySymbol(*symbol)}) { + map_.symbolMap[symbol] = copy; + return copy; } } + return nullptr; +} + +void SymbolMapper::MapSymbolExprs(Symbol &symbol) { common::visit( common::visitors{[&](ObjectEntityDetails &object) { + if (const DeclTypeSpec * type{object.type()}) { + if (const DeclTypeSpec * newType{MapType(*type)}) { + object.ReplaceType(*newType); + } + } for (ShapeSpec &spec : object.shape()) { MapShapeSpec(spec); } @@ -892,13 +921,7 @@ const Symbol *SymbolMapper::MapInterface(const Symbol *interface) { return interface; } else if (const auto *subp{interface->detailsIf()}; subp && subp->isInterface()) { - if (Symbol *newSymbol{scope_.CopySymbol(*interface)}) { - newSymbol->get().set_isInterface(true); - map_.symbolMap[interface] = newSymbol; - Scope &newScope{scope_.MakeScope(Scope::Kind::Subprogram, newSymbol)}; - MapSubprogramToNewSymbols(*interface, *newSymbol, newScope, &map_); - return newSymbol; - } + return CopySymbol(interface); } } return nullptr; @@ -913,10 +936,11 @@ void MapSubprogramToNewSymbols(const Symbol &oldSymbol, Symbol &newSymbol, mappings->symbolMap[&oldSymbol] = &newSymbol; const auto &oldDetails{oldSymbol.get()}; auto &newDetails{newSymbol.get()}; + SymbolMapper mapper{newScope, *mappings}; for (const Symbol *dummyArg : oldDetails.dummyArgs()) { if (!dummyArg) { newDetails.add_alternateReturn(); - } else if (Symbol *copy{newScope.CopySymbol(*dummyArg)}) { + } else if (Symbol * copy{mapper.CopySymbol(dummyArg)}) { copy->set(Symbol::Flag::Implicit, false); newDetails.add_dummyArg(*copy); mappings->symbolMap[dummyArg] = copy; @@ -924,12 +948,12 @@ void MapSubprogramToNewSymbols(const Symbol &oldSymbol, Symbol &newSymbol, } if (oldDetails.isFunction()) { newScope.erase(newSymbol.name()); - if (Symbol *copy{newScope.CopySymbol(oldDetails.result())}) { + const Symbol &result{oldDetails.result()}; + if (Symbol * copy{mapper.CopySymbol(&result)}) { newDetails.set_result(*copy); - mappings->symbolMap[&oldDetails.result()] = copy; + mappings->symbolMap[&result] = copy; } } - SymbolMapper mapper{newScope, *mappings}; for (auto &[_, ref] : newScope) { mapper.MapSymbolExprs(*ref); } diff --git a/flang/test/Semantics/separate-mp05.f90 b/flang/test/Semantics/separate-mp05.f90 new file mode 100644 index 0000000000000..5b7e2523a2286 --- /dev/null +++ b/flang/test/Semantics/separate-mp05.f90 @@ -0,0 +1,40 @@ +! RUN: %python %S/test_symbols.py %s %flang_fc1 +! Ensure that SMPs work with dummy procedures declared as interfaces +!DEF: /m Module +module m + implicit none + interface + !DEF: /m/smp MODULE, PUBLIC, PURE (Function) Subprogram REAL(4) + !DEF: /m/smp/f EXTERNAL, PURE (Function) Subprogram REAL(4) + !DEF: /m/smp/x INTENT(IN) ObjectEntity REAL(4) + !DEF: /m/smp/res (Implicit) ObjectEntity REAL(4) + pure module function smp(f, x) result(res) + interface + !REF: /m/smp/f + !DEF: /m/smp/f/x INTENT(IN) ObjectEntity REAL(4) + !DEF: /m/smp/f/r ObjectEntity REAL(4) + pure function f(x) result(r) + !REF: /m/smp/f/x + real, intent(in) :: x + !REF: /m/smp/f/r + real r + end function + end interface + !REF: /m/smp/x + real, intent(in) :: x + end function + end interface +end module +!REF: /m +!DEF: /m/sm Module +submodule (m)sm + implicit none +contains + !DEF: /m/sm/smp MODULE, PUBLIC, PURE (Function) Subprogram REAL(4) + module procedure smp + !DEF: /m/sm/smp/res (Implicit) ObjectEntity REAL(4) + !DEF: /m/sm/smp/f EXTERNAL, PURE (Function) Subprogram REAL(4) + !DEF: /m/sm/smp/x INTENT(IN) ObjectEntity REAL(4) + res = f(x) + end procedure +end submodule From 81d04709f86968431ecab1df12a17279d057daa9 Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Mon, 16 Oct 2023 17:29:25 -0700 Subject: [PATCH 290/720] [flang] Fix construct names on labeled DO (#67622) Fortran requires that a DO construct with a construct name end with an END DO statement bearing the same name. This is true even if the DO construct begins with a label DO statement; e.g., "constrName: do 10 j=1,10" must end with "10 end do constrName". The compiler presently basically ignores construct names that appear on label DO statements, because only non-label DO statements can be parsed as DO constructs. This causes us to miss some errors, and (worse) breaks the usage of the construct name on CYCLE and EXIT statements. To fix this, this patch changes the parse tree and parser so that a DO construct name on a putative label DO statement causes it to be parsed as a "non-label" DO statement... with a label. Only true old-style labeled DO statements without construct names are now parsed as such. I did not change the class name NonLabelDoStmt -- it's widely used across the front-end, and is the name of a production in the standard's grammar. But now it basically means DoConstructDoStmt. Fixes https://github.com/llvm/llvm-project/issues/67283. --- flang/include/flang/Parser/parse-tree.h | 7 ++-- flang/lib/Parser/executable-parsers.cpp | 10 ++++-- flang/lib/Parser/unparse.cpp | 5 +-- flang/lib/Semantics/canonicalize-do.cpp | 9 +++--- flang/lib/Semantics/resolve-labels.cpp | 43 ++++++++++++++++++++++--- flang/test/Semantics/dosemantics13.f90 | 29 +++++++++++++++++ flang/test/Semantics/dosemantics14.f90 | 12 +++++++ 7 files changed, 99 insertions(+), 16 deletions(-) create mode 100644 flang/test/Semantics/dosemantics13.f90 create mode 100644 flang/test/Semantics/dosemantics14.f90 diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index cb4bb59bf312c..408a474cfa8a5 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -2259,15 +2259,18 @@ struct LoopControl { }; // R1121 label-do-stmt -> [do-construct-name :] DO label [loop-control] +// A label-do-stmt with a do-construct-name is parsed as a non-label-do-stmt. struct LabelDoStmt { TUPLE_CLASS_BOILERPLATE(LabelDoStmt); - std::tuple, Label, std::optional> t; + std::tuple> t; }; // R1122 nonlabel-do-stmt -> [do-construct-name :] DO [loop-control] struct NonLabelDoStmt { TUPLE_CLASS_BOILERPLATE(NonLabelDoStmt); - std::tuple, std::optional> t; + std::tuple, std::optional

C23 implementation status

N2672 Yes + + Towards Integer Safety + N2683 + Clang 18 + Adding Fundamental Type for N-bit Integers From b0eba8e209d46fbd18aa1fec126ee4454e9b93ff Mon Sep 17 00:00:00 2001 From: Jianjian Guan Date: Tue, 17 Oct 2023 10:10:19 +0800 Subject: [PATCH 292/720] [RISCV] Support STRICT_FP_ROUND and STRICT_FP_EXTEND when only have Zvfhmin (#68559) This patch supports STRICT_FP_ROUND and STRICT_FP_EXTEND when we only have Zvfhmin but no Zvfh. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 ++++ .../RISCV/rvv/fixed-vectors-vfptrunc-constrained-sdnode.ll | 4 ++++ llvm/test/CodeGen/RISCV/rvv/vfptrunc-constrained-sdnode.ll | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4dc3f6137e306..666998fecd6e1 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -942,6 +942,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (!isTypeLegal(VT)) continue; setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); + setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, + Custom); setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT, Custom); @@ -1154,6 +1156,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (VT.getVectorElementType() == MVT::f16 && !Subtarget.hasVInstructionsF16()) { setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); + setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, + Custom); setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); setOperationAction( {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptrunc-constrained-sdnode.ll index 32a050800b979..fd53113741de0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptrunc-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptrunc-constrained-sdnode.ll @@ -3,6 +3,10 @@ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) define <2 x float> @vfptrunc_v2f64_v2f32(<2 x double> %va) strictfp { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-constrained-sdnode.ll index 72bf2b94e6f9f..4404a275858f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-constrained-sdnode.ll @@ -3,6 +3,10 @@ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.experimental.constrained.fptrunc.nxv1f32.nxv1f64(, metadata, metadata) define @vfptrunc_nxv1f64_nxv1f32( %va) strictfp { From ce9eaf0360d9f528ab061bcdbcf81c5b2155f098 Mon Sep 17 00:00:00 2001 From: Antonio Abbatangelo Date: Mon, 16 Oct 2023 22:10:58 -0400 Subject: [PATCH 293/720] Revert "[clang][Sema] Use original template pattern when declaring implicit deduction guides for nested template classes (#68379)" This reverts commit dd0fba11690f9fef304d5f48cde646e5eca8d3c0. It fails on nested classes that have both an explicit deduction guide and a constructor that has an argument of the same type as the class (i.e. a copy constructor). --- clang/docs/ReleaseNotes.rst | 5 ----- clang/lib/Sema/SemaTemplate.cpp | 22 +------------------ .../nested-implicit-deduction-guides.cpp | 12 ---------- 3 files changed, 1 insertion(+), 38 deletions(-) delete mode 100644 clang/test/SemaTemplate/nested-implicit-deduction-guides.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 3f83cd71e64cb..99525b00239a4 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -516,11 +516,6 @@ Bug Fixes to C++ Support rather than prefer the non-templated constructor as specified in [standard.group]p3. -- Fix a bug where implicit deduction guides are not correctly generated for nested template - classes. Fixes: - (`#46200 `_) - (`#57812 `_) - Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ - Fixed an import failure of recursive friend class template. diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index fba5b22139170..ff370dd1e080b 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -2250,7 +2250,6 @@ struct ConvertConstructorToDeductionGuideTransform { Sema &SemaRef; ClassTemplateDecl *Template; - ClassTemplateDecl *NestedPattern = nullptr; DeclContext *DC = Template->getDeclContext(); CXXRecordDecl *Primary = Template->getTemplatedDecl(); @@ -2328,8 +2327,6 @@ struct ConvertConstructorToDeductionGuideTransform { if (FTD) { Args.addOuterTemplateArguments(SubstArgs); Args.addOuterRetainedLevel(); - if (NestedPattern) - Args.addOuterRetainedLevels(NestedPattern->getTemplateDepth()); } FunctionProtoTypeLoc FPTL = CD->getTypeSourceInfo()->getTypeLoc() @@ -2441,17 +2438,10 @@ struct ConvertConstructorToDeductionGuideTransform { SmallVector ParamTypes; const FunctionProtoType *T = TL.getTypePtr(); - MultiLevelTemplateArgumentList OuterInstantiationArgs; - if (NestedPattern) - OuterInstantiationArgs = SemaRef.getTemplateInstantiationArgs(Template); - // -- The types of the function parameters are those of the constructor. for (auto *OldParam : TL.getParams()) { ParmVarDecl *NewParam = transformFunctionTypeParam(OldParam, Args, MaterializedTypedefs); - if (NestedPattern && NewParam) - NewParam = transformFunctionTypeParam(NewParam, OuterInstantiationArgs, - MaterializedTypedefs); if (!NewParam) return QualType(); ParamTypes.push_back(NewParam->getType()); @@ -2657,23 +2647,13 @@ void Sema::DeclareImplicitDeductionGuides(TemplateDecl *Template, if (BuildingDeductionGuides.isInvalid()) return; - // If the template is nested, then we need to use the original - // pattern to iterate over the constructors. - ClassTemplateDecl *Pattern = Transform.Template; - while (Pattern->getInstantiatedFromMemberTemplate()) { - if (Pattern->isMemberSpecialization()) - break; - Pattern = Pattern->getInstantiatedFromMemberTemplate(); - Transform.NestedPattern = Pattern; - } - // Convert declared constructors into deduction guide templates. // FIXME: Skip constructors for which deduction must necessarily fail (those // for which some class template parameter without a default argument never // appears in a deduced context). llvm::SmallPtrSet ProcessedCtors; bool AddedAny = false; - for (NamedDecl *D : LookupConstructors(Pattern->getTemplatedDecl())) { + for (NamedDecl *D : LookupConstructors(Transform.Primary)) { D = D->getUnderlyingDecl(); if (D->isInvalidDecl() || D->isImplicit()) continue; diff --git a/clang/test/SemaTemplate/nested-implicit-deduction-guides.cpp b/clang/test/SemaTemplate/nested-implicit-deduction-guides.cpp deleted file mode 100644 index 4915c687cf4c4..0000000000000 --- a/clang/test/SemaTemplate/nested-implicit-deduction-guides.cpp +++ /dev/null @@ -1,12 +0,0 @@ -// RUN: %clang_cc1 -std=c++17 -verify %s -// expected-no-diagnostics - -template struct S { - template struct N { - N(T) {} - N(T, U) {} - template N(V, U) {} - }; -}; - -S::N x{"a", 1}; From 5a6ef95a1cb5c9b537b288361b70d00043750995 Mon Sep 17 00:00:00 2001 From: Shao-Ce SUN Date: Tue, 17 Oct 2023 10:36:24 +0800 Subject: [PATCH 294/720] [RISCV][GISel] Add legalizer for G_UMAX, G_UMIN, G_SMAX, G_SMIN (#69150) Similar to #67577, Lower G_UMAX, G_UMIN, G_SMAX, G_SMIN. --- .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 1 + .../legalizer/rv32/legalize-smax.mir | 114 ++++++++++++++++++ .../legalizer/rv32/legalize-smin.mir | 114 ++++++++++++++++++ .../legalizer/rv32/legalize-umax.mir | 112 +++++++++++++++++ .../legalizer/rv32/legalize-umin.mir | 112 +++++++++++++++++ .../legalizer/rv64/legalize-smax.mir | 110 +++++++++++++++++ .../legalizer/rv64/legalize-smin.mir | 110 +++++++++++++++++ .../legalizer/rv64/legalize-umax.mir | 109 +++++++++++++++++ .../legalizer/rv64/legalize-umin.mir | 109 +++++++++++++++++ 9 files changed, 891 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-smax.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-smin.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-umax.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-umin.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-smax.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-smin.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-umax.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-umin.mir diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 3ec3359884883..475d8d5e3c6c7 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -186,6 +186,7 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) { } getActionDefinitionsBuilder(G_ABS).lower(); + getActionDefinitionsBuilder({G_UMAX, G_UMIN, G_SMAX, G_SMIN}).lower(); getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-smax.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-smax.mir new file mode 100644 index 0000000000000..31df394c4f754 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-smax.mir @@ -0,0 +1,114 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: umax_i8 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umax_i8 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C3]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C3]](s32) + ; CHECK-NEXT: $x10 = COPY [[ASHR]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s32) = COPY $x10 + %1:_(s32) = COPY $x11 + %2:_(s8) = G_TRUNC %0(s32) + %3:_(s8) = G_TRUNC %1(s32) + %4:_(s8) = G_UMAX %2, %3 + %5:_(s32) = G_SEXT %4(s8) + $x10 = COPY %5(s32) + PseudoRET implicit $x10 +... + +--- +name: umax_i16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umax_i16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C3]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C3]](s32) + ; CHECK-NEXT: $x10 = COPY [[ASHR]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s32) = COPY $x10 + %1:_(s32) = COPY $x11 + %2:_(s16) = G_TRUNC %0(s32) + %3:_(s16) = G_TRUNC %1(s32) + %4:_(s16) = G_UMAX %2, %3 + %5:_(s32) = G_SEXT %4(s16) + $x10 = COPY %5(s32) + PseudoRET implicit $x10 +... + +--- +name: umax_i32 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umax_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s32) = COPY $x10 + %1:_(s32) = COPY $x11 + %2:_(s32) = G_UMAX %0, %1 + $x10 = COPY %2(s32) + PseudoRET implicit $x10 +... + +--- +name: umax_i64 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umax_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x13 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY1]](s32), [[COPY3]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY2]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[COPY]], [[COPY2]] + ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s32) = COPY $x10 + %1:_(s32) = COPY $x11 + %2:_(s32) = COPY $x12 + %3:_(s32) = COPY $x13 + %4:_(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %5:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %6:_(s64) = G_UMAX %4, %5 + %7:_(s32) = G_TRUNC %6(s64) + $x10 = COPY %7(s32) + PseudoRET implicit $x10 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-smin.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-smin.mir new file mode 100644 index 0000000000000..2b589e6bb63c1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-smin.mir @@ -0,0 +1,114 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: umin_i8 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umin_i8 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C3]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C3]](s32) + ; CHECK-NEXT: $x10 = COPY [[ASHR]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s32) = COPY $x10 + %1:_(s32) = COPY $x11 + %2:_(s8) = G_TRUNC %0(s32) + %3:_(s8) = G_TRUNC %1(s32) + %4:_(s8) = G_UMIN %2, %3 + %5:_(s32) = G_SEXT %4(s8) + $x10 = COPY %5(s32) + PseudoRET implicit $x10 +... + +--- +name: umin_i16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umin_i16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C3]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C3]](s32) + ; CHECK-NEXT: $x10 = COPY [[ASHR]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s32) = COPY $x10 + %1:_(s32) = COPY $x11 + %2:_(s16) = G_TRUNC %0(s32) + %3:_(s16) = G_TRUNC %1(s32) + %4:_(s16) = G_UMIN %2, %3 + %5:_(s32) = G_SEXT %4(s16) + $x10 = COPY %5(s32) + PseudoRET implicit $x10 +... + +--- +name: umin_i32 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umin_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s32) = COPY $x10 + %1:_(s32) = COPY $x11 + %2:_(s32) = G_UMIN %0, %1 + $x10 = COPY %2(s32) + PseudoRET implicit $x10 +... + +--- +name: umin_i64 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umin_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x13 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY1]](s32), [[COPY3]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY2]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[COPY]], [[COPY2]] + ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s32) = COPY $x10 + %1:_(s32) = COPY $x11 + %2:_(s32) = COPY $x12 + %3:_(s32) = COPY $x13 + %4:_(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %5:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %6:_(s64) = G_UMIN %4, %5 + %7:_(s32) = G_TRUNC %6(s64) + $x10 = COPY %7(s32) + PseudoRET implicit $x10 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-umax.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-umax.mir new file mode 100644 index 0000000000000..8dea2cb875073 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-umax.mir @@ -0,0 +1,112 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: umax_i8 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umax_i8 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] + ; CHECK-NEXT: $x10 = COPY [[AND3]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s32) = COPY $x10 + %1:_(s32) = COPY $x11 + %2:_(s8) = G_TRUNC %0(s32) + %3:_(s8) = G_TRUNC %1(s32) + %4:_(s8) = G_UMAX %2, %3 + %5:_(s32) = G_ZEXT %4(s8) + $x10 = COPY %5(s32) + PseudoRET implicit $x10 +... + +--- +name: umax_i16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umax_i16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] + ; CHECK-NEXT: $x10 = COPY [[AND3]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s32) = COPY $x10 + %1:_(s32) = COPY $x11 + %2:_(s16) = G_TRUNC %0(s32) + %3:_(s16) = G_TRUNC %1(s32) + %4:_(s16) = G_UMAX %2, %3 + %5:_(s32) = G_ZEXT %4(s16) + $x10 = COPY %5(s32) + PseudoRET implicit $x10 +... + +--- +name: umax_i32 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umax_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s32) = COPY $x10 + %1:_(s32) = COPY $x11 + %2:_(s32) = G_UMAX %0, %1 + $x10 = COPY %2(s32) + PseudoRET implicit $x10 +... + +--- +name: umax_i64 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umax_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x13 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY1]](s32), [[COPY3]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY2]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[COPY]], [[COPY2]] + ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s32) = COPY $x10 + %1:_(s32) = COPY $x11 + %2:_(s32) = COPY $x12 + %3:_(s32) = COPY $x13 + %4:_(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %5:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %6:_(s64) = G_UMAX %4, %5 + %7:_(s32) = G_TRUNC %6(s64) + $x10 = COPY %7(s32) + PseudoRET implicit $x10 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-umin.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-umin.mir new file mode 100644 index 0000000000000..cd180a2a5b329 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-umin.mir @@ -0,0 +1,112 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: umin_i8 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umin_i8 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] + ; CHECK-NEXT: $x10 = COPY [[AND3]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s32) = COPY $x10 + %1:_(s32) = COPY $x11 + %2:_(s8) = G_TRUNC %0(s32) + %3:_(s8) = G_TRUNC %1(s32) + %4:_(s8) = G_UMIN %2, %3 + %5:_(s32) = G_ZEXT %4(s8) + $x10 = COPY %5(s32) + PseudoRET implicit $x10 +... + +--- +name: umin_i16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umin_i16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] + ; CHECK-NEXT: $x10 = COPY [[AND3]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s32) = COPY $x10 + %1:_(s32) = COPY $x11 + %2:_(s16) = G_TRUNC %0(s32) + %3:_(s16) = G_TRUNC %1(s32) + %4:_(s16) = G_UMIN %2, %3 + %5:_(s32) = G_ZEXT %4(s16) + $x10 = COPY %5(s32) + PseudoRET implicit $x10 +... + +--- +name: umin_i32 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umin_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s32) = COPY $x10 + %1:_(s32) = COPY $x11 + %2:_(s32) = G_UMIN %0, %1 + $x10 = COPY %2(s32) + PseudoRET implicit $x10 +... + +--- +name: umin_i64 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umin_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x13 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY1]](s32), [[COPY3]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY2]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[COPY]], [[COPY2]] + ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s32) = COPY $x10 + %1:_(s32) = COPY $x11 + %2:_(s32) = COPY $x12 + %3:_(s32) = COPY $x13 + %4:_(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %5:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %6:_(s64) = G_UMIN %4, %5 + %7:_(s32) = G_TRUNC %6(s64) + $x10 = COPY %7(s32) + PseudoRET implicit $x10 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-smax.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-smax.mir new file mode 100644 index 0000000000000..43f4309dc5670 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-smax.mir @@ -0,0 +1,110 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv64 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: umax_i8 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umax_i8 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[AND]](s64), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SELECT]], [[C3]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C3]](s64) + ; CHECK-NEXT: $x10 = COPY [[ASHR]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s64) = COPY $x10 + %1:_(s64) = COPY $x11 + %2:_(s8) = G_TRUNC %0(s64) + %3:_(s8) = G_TRUNC %1(s64) + %4:_(s8) = G_UMAX %2, %3 + %5:_(s64) = G_SEXT %4(s8) + $x10 = COPY %5(s64) + PseudoRET implicit $x10 +... + +--- +name: umax_i16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umax_i16 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[AND]](s64), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SELECT]], [[C3]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C3]](s64) + ; CHECK-NEXT: $x10 = COPY [[ASHR]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s64) = COPY $x10 + %1:_(s64) = COPY $x11 + %2:_(s16) = G_TRUNC %0(s64) + %3:_(s16) = G_TRUNC %1(s64) + %4:_(s16) = G_UMAX %2, %3 + %5:_(s64) = G_SEXT %4(s16) + $x10 = COPY %5(s64) + PseudoRET implicit $x10 +... + +--- +name: umax_i32 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umax_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[AND]](s64), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SELECT]], 32 + ; CHECK-NEXT: $x10 = COPY [[SEXT_INREG]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s64) = COPY $x10 + %1:_(s64) = COPY $x11 + %2:_(s32) = G_TRUNC %0(s64) + %3:_(s32) = G_TRUNC %1(s64) + %4:_(s32) = G_UMAX %2, %3 + %5:_(s64) = G_SEXT %4(s32) + $x10 = COPY %5(s64) + PseudoRET implicit $x10 +... + +--- +name: umax_i64 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umax_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s64) = COPY $x10 + %1:_(s64) = COPY $x11 + %2:_(s64) = G_UMAX %0, %1 + $x10 = COPY %2(s64) + PseudoRET implicit $x10 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-smin.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-smin.mir new file mode 100644 index 0000000000000..85fea46b4bc46 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-smin.mir @@ -0,0 +1,110 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv64 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: umin_i8 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umin_i8 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[AND]](s64), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SELECT]], [[C3]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C3]](s64) + ; CHECK-NEXT: $x10 = COPY [[ASHR]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s64) = COPY $x10 + %1:_(s64) = COPY $x11 + %2:_(s8) = G_TRUNC %0(s64) + %3:_(s8) = G_TRUNC %1(s64) + %4:_(s8) = G_UMIN %2, %3 + %5:_(s64) = G_SEXT %4(s8) + $x10 = COPY %5(s64) + PseudoRET implicit $x10 +... + +--- +name: umin_i16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umin_i16 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[AND]](s64), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SELECT]], [[C3]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C3]](s64) + ; CHECK-NEXT: $x10 = COPY [[ASHR]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s64) = COPY $x10 + %1:_(s64) = COPY $x11 + %2:_(s16) = G_TRUNC %0(s64) + %3:_(s16) = G_TRUNC %1(s64) + %4:_(s16) = G_UMIN %2, %3 + %5:_(s64) = G_SEXT %4(s16) + $x10 = COPY %5(s64) + PseudoRET implicit $x10 +... + +--- +name: umin_i32 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umin_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[AND]](s64), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SELECT]], 32 + ; CHECK-NEXT: $x10 = COPY [[SEXT_INREG]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s64) = COPY $x10 + %1:_(s64) = COPY $x11 + %2:_(s32) = G_TRUNC %0(s64) + %3:_(s32) = G_TRUNC %1(s64) + %4:_(s32) = G_UMIN %2, %3 + %5:_(s64) = G_SEXT %4(s32) + $x10 = COPY %5(s64) + PseudoRET implicit $x10 +... + +--- +name: umin_i64 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umin_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s64) = COPY $x10 + %1:_(s64) = COPY $x11 + %2:_(s64) = G_UMIN %0, %1 + $x10 = COPY %2(s64) + PseudoRET implicit $x10 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-umax.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-umax.mir new file mode 100644 index 0000000000000..d0310e3e21ec9 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-umax.mir @@ -0,0 +1,109 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv64 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: umax_i8 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umax_i8 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[AND]](s64), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C3]] + ; CHECK-NEXT: $x10 = COPY [[AND3]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s64) = COPY $x10 + %1:_(s64) = COPY $x11 + %2:_(s8) = G_TRUNC %0(s64) + %3:_(s8) = G_TRUNC %1(s64) + %4:_(s8) = G_UMAX %2, %3 + %5:_(s64) = G_ZEXT %4(s8) + $x10 = COPY %5(s64) + PseudoRET implicit $x10 +... + +--- +name: umax_i16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umax_i16 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[AND]](s64), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C3]] + ; CHECK-NEXT: $x10 = COPY [[AND3]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s64) = COPY $x10 + %1:_(s64) = COPY $x11 + %2:_(s16) = G_TRUNC %0(s64) + %3:_(s16) = G_TRUNC %1(s64) + %4:_(s16) = G_UMAX %2, %3 + %5:_(s64) = G_ZEXT %4(s16) + $x10 = COPY %5(s64) + PseudoRET implicit $x10 +... + +--- +name: umax_i32 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umax_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[AND]](s64), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C3]] + ; CHECK-NEXT: $x10 = COPY [[AND3]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s64) = COPY $x10 + %1:_(s64) = COPY $x11 + %2:_(s32) = G_TRUNC %0(s64) + %3:_(s32) = G_TRUNC %1(s64) + %4:_(s32) = G_UMAX %2, %3 + %5:_(s64) = G_ZEXT %4(s32) + $x10 = COPY %5(s64) + PseudoRET implicit $x10 +... + +--- +name: umax_i64 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umax_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s64) = COPY $x10 + %1:_(s64) = COPY $x11 + %2:_(s64) = G_UMAX %0, %1 + $x10 = COPY %2(s64) + PseudoRET implicit $x10 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-umin.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-umin.mir new file mode 100644 index 0000000000000..a0eec3298a586 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-umin.mir @@ -0,0 +1,109 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv64 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: umin_i8 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umin_i8 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[AND]](s64), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C3]] + ; CHECK-NEXT: $x10 = COPY [[AND3]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s64) = COPY $x10 + %1:_(s64) = COPY $x11 + %2:_(s8) = G_TRUNC %0(s64) + %3:_(s8) = G_TRUNC %1(s64) + %4:_(s8) = G_UMIN %2, %3 + %5:_(s64) = G_ZEXT %4(s8) + $x10 = COPY %5(s64) + PseudoRET implicit $x10 +... + +--- +name: umin_i16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umin_i16 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[AND]](s64), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C3]] + ; CHECK-NEXT: $x10 = COPY [[AND3]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s64) = COPY $x10 + %1:_(s64) = COPY $x11 + %2:_(s16) = G_TRUNC %0(s64) + %3:_(s16) = G_TRUNC %1(s64) + %4:_(s16) = G_UMIN %2, %3 + %5:_(s64) = G_ZEXT %4(s16) + $x10 = COPY %5(s64) + PseudoRET implicit $x10 +... + +--- +name: umin_i32 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umin_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[AND]](s64), [[AND1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C3]] + ; CHECK-NEXT: $x10 = COPY [[AND3]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s64) = COPY $x10 + %1:_(s64) = COPY $x11 + %2:_(s32) = G_TRUNC %0(s64) + %3:_(s32) = G_TRUNC %1(s64) + %4:_(s32) = G_UMIN %2, %3 + %5:_(s64) = G_ZEXT %4(s32) + $x10 = COPY %5(s64) + PseudoRET implicit $x10 +... + +--- +name: umin_i64 +body: | + bb.0.entry: + ; CHECK-LABEL: name: umin_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:_(s64) = COPY $x10 + %1:_(s64) = COPY $x11 + %2:_(s64) = G_UMIN %0, %1 + $x10 = COPY %2(s64) + PseudoRET implicit $x10 +... From cc6a5ea6e33d3febafc4334617230c528a0c4fa7 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Mon, 16 Oct 2023 20:44:53 -0700 Subject: [PATCH 295/720] [M68k][NFC] Fix some unused variable warnings Induced by variables that are only used in assertion statements. NFC. --- llvm/lib/Target/M68k/M68kInstrInfo.cpp | 4 ++++ llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp | 7 +++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/llvm/lib/Target/M68k/M68kInstrInfo.cpp index 8d36e94d8e696..d56fef9e9029a 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.cpp +++ b/llvm/lib/Target/M68k/M68kInstrInfo.cpp @@ -361,6 +361,7 @@ bool M68kInstrInfo::ExpandMOVX_RR(MachineInstrBuilder &MIB, MVT MVTDst, assert(RCDst && RCSrc && "Wrong use of MOVX_RR"); assert(RCDst != RCSrc && "You cannot use the same Reg Classes with MOVX_RR"); + (void)RCSrc; // We need to find the super source register that matches the size of Dst unsigned SSrc = RI.getMatchingMegaReg(Src, RCDst); @@ -407,6 +408,7 @@ bool M68kInstrInfo::ExpandMOVSZX_RR(MachineInstrBuilder &MIB, bool IsSigned, assert(RCDst && RCSrc && "Wrong use of MOVSX_RR"); assert(RCDst != RCSrc && "You cannot use the same Reg Classes with MOVSX_RR"); + (void)RCSrc; // We need to find the super source register that matches the size of Dst unsigned SSrc = RI.getMatchingMegaReg(Src, RCDst); @@ -746,6 +748,7 @@ void M68kInstrInfo::storeRegToStackSlot( const MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo(); assert(MFI.getObjectSize(FrameIndex) >= TRI->getSpillSize(*RC) && "Stack slot is too small to store"); + (void)MFI; unsigned Opc = getStoreRegOpcode(SrcReg, RC, TRI, Subtarget); DebugLoc DL = MBB.findDebugLoc(MI); @@ -763,6 +766,7 @@ void M68kInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo(); assert(MFI.getObjectSize(FrameIndex) >= TRI->getSpillSize(*RC) && "Stack slot is too small to load"); + (void)MFI; unsigned Opc = getLoadRegOpcode(DstReg, RC, TRI, Subtarget); DebugLoc DL = MBB.findDebugLoc(MI); diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp index 16460f0a105b8..32a5bb1dc6706 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp @@ -203,10 +203,9 @@ void M68kMCCodeEmitter::encodeInstruction(const MCInst &MI, SmallVectorImpl &CB, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { - unsigned Opcode = MI.getOpcode(); - - LLVM_DEBUG(dbgs() << "EncodeInstruction: " << MCII.getName(Opcode) << "(" - << Opcode << ")\n"); + LLVM_DEBUG(dbgs() << "EncodeInstruction: " << MCII.getName(MI.getOpcode()) + << "(" << Opcode << ")\n"); + (void)MCII; // Try using the new method first. APInt EncodedInst(16, 0U); From 7bc793a6925ccebbe21f1c98a79d6dc89a615c01 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Tue, 17 Oct 2023 06:53:33 +0200 Subject: [PATCH 296/720] [clang][Interp] Check pointer inc/dec ops for null (#69168) --- clang/lib/AST/Interp/Interp.h | 7 +++++-- clang/test/AST/Interp/arrays.cpp | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h index e3e6a4cec63b1..3d226a40f9cf6 100644 --- a/clang/lib/AST/Interp/Interp.h +++ b/clang/lib/AST/Interp/Interp.h @@ -1488,11 +1488,14 @@ static inline bool IncDecPtrHelper(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { using OneT = Integral<8, false>; + const Pointer &P = Ptr.deref(); + if (!CheckNull(S, OpPC, P, CSK_ArrayIndex)) + return false; + // Get the current value on the stack. - S.Stk.push(Ptr.deref()); + S.Stk.push(P); // Now the current Ptr again and a constant 1. - Pointer P = Ptr.deref(); OneT One = OneT::from(1); if (!OffsetHelper(S, OpPC, One, P)) return false; diff --git a/clang/test/AST/Interp/arrays.cpp b/clang/test/AST/Interp/arrays.cpp index 281835f828bbd..18c4ae4354f54 100644 --- a/clang/test/AST/Interp/arrays.cpp +++ b/clang/test/AST/Interp/arrays.cpp @@ -333,6 +333,26 @@ namespace IncDec { // expected-note {{in call to}} \ // ref-error {{not an integral constant expression}} \ // ref-note {{in call to}} + + constexpr int nullptr1(bool Pre) { + int *a = nullptr; + if (Pre) + ++a; // ref-note {{arithmetic on null pointer}} \ + // expected-note {{arithmetic on null pointer}} + else + a++; // ref-note {{arithmetic on null pointer}} \ + // expected-note {{arithmetic on null pointer}} + return 1; + } + static_assert(nullptr1(true) == 1, ""); // ref-error {{not an integral constant expression}} \ + // ref-note {{in call to}} \ + // expected-error {{not an integral constant expression}} \ + // expected-note {{in call to}} + + static_assert(nullptr1(false) == 1, ""); // ref-error {{not an integral constant expression}} \ + // ref-note {{in call to}} \ + // expected-error {{not an integral constant expression}} \ + // expected-note {{in call to}} }; namespace ZeroInit { From 12a731b5a4cfec96ba7c72888a1d76b8e13b043e Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 16 Oct 2023 22:08:45 -0700 Subject: [PATCH 297/720] [CI] Add Github actions job to build LLVM documentation (#69269) This patch adds in support for building the LLVM documentation through a Github actions job. This enables catching documentation build failures earlier and also more easily as the job failure will show up directly on pull requests. The job currently only builds the documentation for LLVM, but the plan is to extend it to also build the documentation for other subprojects when appropriate (i.e., the docs files have changed), starting with clang. --- .github/workflows/docs.yml | 46 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 .github/workflows/docs.yml diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000000000..4af4083a77b8e --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,46 @@ +# LLVM Documentation CI +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +name: "Test documentation build" + +permissions: + contents: read + +on: + push: + branches: + - 'main' + paths: + - 'llvm/docs/**' + pull_request: + paths: + - 'llvm/docs/**' + +jobs: + check-docs-build: + name: "Test documentation build" + runs-on: ubuntu-latest + steps: + - name: Fetch LLVM sources + uses: actions/checkout@v4 + with: + fetch-depth: 1 + - name: Setup Python env + uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' + cache-dependency-path: 'llvm/docs/requirements.txt' + - name: Install python dependencies + run: pip install -r llvm/docs/requirements.txt + - name: Install system dependencies + run: apt-get update && apt-get install -y cmake ninja-build + - name: Build docs + run: | + mkdir build + cd build + cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_SPHINX=ON -DSPHINX_OUTPUT_HTML=ON -DSPHINX_OUTPUT_MAN=ON ../llvm + TZ=UTC ninja docs-llvm-html docs-llvm-man + From 4b8b70a52fa4d133a19f620c8a9160793ded08b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E9=9B=A8=E5=9F=B9?= Date: Tue, 17 Oct 2023 13:23:28 +0800 Subject: [PATCH 298/720] [Clang] Fix dependence handling of nttp for variable templates (#69075) The dependence of a template argument is not only determined by the argument itself, but also by the type of the template parameter: > Furthermore, a non-type [template-argument](https://eel.is/c++draft/temp.names#nt:template-argument) is dependent if the corresponding non-type [template-parameter](https://eel.is/c++draft/temp.param#nt:template-parameter) is of reference or pointer type and the [template-argument](https://eel.is/c++draft/temp.names#nt:template-argument) designates or points to a member of the current instantiation or a member of a dependent type[.](https://eel.is/c++draft/temp.dep#temp-3.sentence-1) For example: ```cpp struct A{}; template const A JoinStringViews = T; template class Builder { public: static constexpr A Equal{}; static constexpr auto Val = JoinStringViews; }; ``` The constant expression `Equal` is not dependent, but because the type of the template parameter is a reference type and `Equal` is a member of the current instantiation, the template argument of `JoinStringViews` is actually dependent, which makes `JoinStringViews` dependent. When a template-id of a variable template is dependent, `CheckVarTemplateId` will return an `UnresolvedLookupExpr`, but `UnresolvedLookupExpr` calculates dependence by template arguments only (the `ConstantExpr` `Equal` here), which is not dependent. This causes type deduction to think that `JoinStringViews` is `OverloadTy` and treat it as a function template, which is clearly wrong. This PR adds a `KnownDependent` parameter to the constructor of `UnresolvedLookupExpr`. After canonicalization, if `CanonicalConverted` contains any dependent argument, `KnownDependent` is set to `true`. This fixes the dependence calculation of `UnresolvedLookupExpr` for dependent variable templates. Fixes #65153 . --- clang/docs/ReleaseNotes.rst | 4 ++++ clang/include/clang/AST/ExprCXX.h | 8 ++++++-- clang/lib/AST/ASTImporter.cpp | 5 ++++- clang/lib/AST/ExprCXX.cpp | 16 ++++++++-------- clang/lib/Sema/SemaDeclCXX.cpp | 5 +++-- clang/lib/Sema/SemaTemplate.cpp | 14 ++++++-------- clang/test/SemaTemplate/dependent-expr.cpp | 15 +++++++++++++++ 7 files changed, 46 insertions(+), 21 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 99525b00239a4..81cbfd90155fe 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -516,6 +516,10 @@ Bug Fixes to C++ Support rather than prefer the non-templated constructor as specified in [standard.group]p3. +- Fixed a crash caused by incorrect handling of dependence on variable templates + with non-type template parameters of reference type. Fixes: + (`#65153 `_) + Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ - Fixed an import failure of recursive friend class template. diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h index 17dbb5e888ebd..798c98cfcf2d4 100644 --- a/clang/include/clang/AST/ExprCXX.h +++ b/clang/include/clang/AST/ExprCXX.h @@ -3191,7 +3191,8 @@ class UnresolvedLookupExpr final const DeclarationNameInfo &NameInfo, bool RequiresADL, bool Overloaded, const TemplateArgumentListInfo *TemplateArgs, - UnresolvedSetIterator Begin, UnresolvedSetIterator End); + UnresolvedSetIterator Begin, UnresolvedSetIterator End, + bool KnownDependent); UnresolvedLookupExpr(EmptyShell Empty, unsigned NumResults, bool HasTemplateKWAndArgsInfo); @@ -3211,12 +3212,15 @@ class UnresolvedLookupExpr final const DeclarationNameInfo &NameInfo, bool RequiresADL, bool Overloaded, UnresolvedSetIterator Begin, UnresolvedSetIterator End); + // After canonicalization, there may be dependent template arguments in + // CanonicalConverted But none of Args is dependent. When any of + // CanonicalConverted dependent, KnownDependent is true. static UnresolvedLookupExpr * Create(const ASTContext &Context, CXXRecordDecl *NamingClass, NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc, const DeclarationNameInfo &NameInfo, bool RequiresADL, const TemplateArgumentListInfo *Args, UnresolvedSetIterator Begin, - UnresolvedSetIterator End); + UnresolvedSetIterator End, bool KnownDependent); static UnresolvedLookupExpr *CreateEmpty(const ASTContext &Context, unsigned NumResults, diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 628a2b2bbca39..650ff201e66b7 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -8395,10 +8395,13 @@ ASTNodeImporter::VisitUnresolvedLookupExpr(UnresolvedLookupExpr *E) { if (!ToTemplateKeywordLocOrErr) return ToTemplateKeywordLocOrErr.takeError(); + const bool KnownDependent = + (E->getDependence() & ExprDependence::TypeValue) == + ExprDependence::TypeValue; return UnresolvedLookupExpr::Create( Importer.getToContext(), *ToNamingClassOrErr, *ToQualifierLocOrErr, *ToTemplateKeywordLocOrErr, ToNameInfo, E->requiresADL(), &ToTAInfo, - ToDecls.begin(), ToDecls.end()); + ToDecls.begin(), ToDecls.end(), KnownDependent); } return UnresolvedLookupExpr::Create( diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp index 06163255f9b5e..b9a004acc5ad0 100644 --- a/clang/lib/AST/ExprCXX.cpp +++ b/clang/lib/AST/ExprCXX.cpp @@ -354,10 +354,10 @@ UnresolvedLookupExpr::UnresolvedLookupExpr( NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc, const DeclarationNameInfo &NameInfo, bool RequiresADL, bool Overloaded, const TemplateArgumentListInfo *TemplateArgs, UnresolvedSetIterator Begin, - UnresolvedSetIterator End) + UnresolvedSetIterator End, bool KnownDependent) : OverloadExpr(UnresolvedLookupExprClass, Context, QualifierLoc, - TemplateKWLoc, NameInfo, TemplateArgs, Begin, End, false, - false, false), + TemplateKWLoc, NameInfo, TemplateArgs, Begin, End, + KnownDependent, false, false), NamingClass(NamingClass) { UnresolvedLookupExprBits.RequiresADL = RequiresADL; UnresolvedLookupExprBits.Overloaded = Overloaded; @@ -380,7 +380,7 @@ UnresolvedLookupExpr *UnresolvedLookupExpr::Create( void *Mem = Context.Allocate(Size, alignof(UnresolvedLookupExpr)); return new (Mem) UnresolvedLookupExpr(Context, NamingClass, QualifierLoc, SourceLocation(), NameInfo, RequiresADL, - Overloaded, nullptr, Begin, End); + Overloaded, nullptr, Begin, End, false); } UnresolvedLookupExpr *UnresolvedLookupExpr::Create( @@ -388,7 +388,7 @@ UnresolvedLookupExpr *UnresolvedLookupExpr::Create( NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc, const DeclarationNameInfo &NameInfo, bool RequiresADL, const TemplateArgumentListInfo *Args, UnresolvedSetIterator Begin, - UnresolvedSetIterator End) { + UnresolvedSetIterator End, bool KnownDependent) { assert(Args || TemplateKWLoc.isValid()); unsigned NumResults = End - Begin; unsigned NumTemplateArgs = Args ? Args->size() : 0; @@ -396,9 +396,9 @@ UnresolvedLookupExpr *UnresolvedLookupExpr::Create( totalSizeToAlloc(NumResults, 1, NumTemplateArgs); void *Mem = Context.Allocate(Size, alignof(UnresolvedLookupExpr)); - return new (Mem) UnresolvedLookupExpr(Context, NamingClass, QualifierLoc, - TemplateKWLoc, NameInfo, RequiresADL, - /*Overloaded*/ true, Args, Begin, End); + return new (Mem) UnresolvedLookupExpr( + Context, NamingClass, QualifierLoc, TemplateKWLoc, NameInfo, RequiresADL, + /*Overloaded=*/true, Args, Begin, End, KnownDependent); } UnresolvedLookupExpr *UnresolvedLookupExpr::CreateEmpty( diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index f9c010b1a0024..0193e476b3a78 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -1299,8 +1299,9 @@ static bool checkTupleLikeDecomposition(Sema &S, // in the associated namespaces. Expr *Get = UnresolvedLookupExpr::Create( S.Context, nullptr, NestedNameSpecifierLoc(), SourceLocation(), - DeclarationNameInfo(GetDN, Loc), /*RequiresADL*/true, &Args, - UnresolvedSetIterator(), UnresolvedSetIterator()); + DeclarationNameInfo(GetDN, Loc), /*RequiresADL*/ true, &Args, + UnresolvedSetIterator(), UnresolvedSetIterator(), + /*KnownDependent=*/false); Expr *Arg = E.get(); E = S.BuildCallExpr(nullptr, Get, Loc, Arg, Loc); diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index ff370dd1e080b..6389ec708bf34 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -4982,7 +4982,7 @@ ExprResult Sema::BuildTemplateIdExpr(const CXXScopeSpec &SS, return ExprError(); } } - + bool KnownDependent = false; // In C++1y, check variable template ids. if (R.getAsSingle()) { ExprResult Res = CheckVarTemplateId(SS, R.getLookupNameInfo(), @@ -4991,6 +4991,7 @@ ExprResult Sema::BuildTemplateIdExpr(const CXXScopeSpec &SS, if (Res.isInvalid() || Res.isUsable()) return Res; // Result is dependent. Carry on to build an UnresolvedLookupEpxr. + KnownDependent = true; } if (R.getAsSingle()) { @@ -5002,13 +5003,10 @@ ExprResult Sema::BuildTemplateIdExpr(const CXXScopeSpec &SS, // We don't want lookup warnings at this point. R.suppressDiagnostics(); - UnresolvedLookupExpr *ULE - = UnresolvedLookupExpr::Create(Context, R.getNamingClass(), - SS.getWithLocInContext(Context), - TemplateKWLoc, - R.getLookupNameInfo(), - RequiresADL, TemplateArgs, - R.begin(), R.end()); + UnresolvedLookupExpr *ULE = UnresolvedLookupExpr::Create( + Context, R.getNamingClass(), SS.getWithLocInContext(Context), + TemplateKWLoc, R.getLookupNameInfo(), RequiresADL, TemplateArgs, + R.begin(), R.end(), KnownDependent); return ULE; } diff --git a/clang/test/SemaTemplate/dependent-expr.cpp b/clang/test/SemaTemplate/dependent-expr.cpp index 51bd375d7920e..ce210d9b74f6d 100644 --- a/clang/test/SemaTemplate/dependent-expr.cpp +++ b/clang/test/SemaTemplate/dependent-expr.cpp @@ -165,3 +165,18 @@ namespace BindingInStmtExpr { using U = decltype(num_bindings()); // expected-note {{previous}} using U = N<3>; // expected-error-re {{type alias redefinition with different types ('N<3>' vs {{.*}}N<2>}} } + +namespace PR65153 { +struct A{}; + +template +const A JoinStringViews = T; + +template +class Builder { +public: + static constexpr A Equal{}; + // no crash here + static constexpr auto Val = JoinStringViews; +}; +} // namespace PR65153 From aa4dfd3736dd1c2e0263eacd09bd613c5784ea73 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Mon, 16 Oct 2023 22:46:27 -0700 Subject: [PATCH 299/720] [hwasan] Fix and re-enable deep-recursion.c (#69265) deep-recursion.c was disabled (https://github.com/llvm/llvm-project/commit/c007e0f66ee3f96467fd12f6200218fb4c38c2c9) because the test may get unlucky and end up with a zero-tagged variable, leading to a false negative (https://github.com/llvm/llvm-project/issues/69221). This patch re-enables the test and adds a workaround: it checks if the variable is zero-tagged, and if so, it will instead use the neighboring variable, which must have a different (hence non-zero) tag. Fixing the stack allocation tagging is left as an exercise for the reader. It is non-trivial because, even if the stackTagBase is non-zero, tags for subsequent allocations in the stack frame may wrap around to zero; working around this would require adding multiple instructions to each alloca. --------- Co-authored-by: Thurston Dang --- .../test/hwasan/TestCases/deep-recursion.c | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/compiler-rt/test/hwasan/TestCases/deep-recursion.c b/compiler-rt/test/hwasan/TestCases/deep-recursion.c index bf390d051d472..792f758958270 100644 --- a/compiler-rt/test/hwasan/TestCases/deep-recursion.c +++ b/compiler-rt/test/hwasan/TestCases/deep-recursion.c @@ -17,9 +17,6 @@ // Stack histories are currently not recorded on x86. // XFAIL: target=x86_64{{.*}} -// Flaky on AArch64 Linux, see https://github.com/llvm/llvm-project/issues/69221. -// UNSUPPORTED: target=aarch64{{.*}} - #include // At least -O1 is needed for this function to not have a stack frame on // AArch64. @@ -29,7 +26,23 @@ void USE(void *x) { // pretend_to_do_something(void *x) volatile int four = 4; -__attribute__((noinline)) void OOB() { int x[4]; x[four] = 0; USE(&x[0]); } +__attribute__((noinline)) void OOB() { + int x[4]; + int y[4]; + + // Tags for stack-allocated variables can occasionally be zero, resulting in + // a false negative for this test. This is not easy to fix, hence we work + // around it: if the tag is zero, we use the neighboring variable instead, + // which must have a different (hence non-zero) tag. + // This tag check assumes aarch64. + if (((uintptr_t)&x) >> 56 == 0) { + y[four] = 0; + } else { + x[four] = 0; + } + USE(&x[0]); + USE(&y[0]); +} __attribute__((noinline)) void FUNC1() { int x; USE(&x); OOB(); } __attribute__((noinline)) void FUNC2() { int x; USE(&x); FUNC1(); } __attribute__((noinline)) void FUNC3() { int x; USE(&x); FUNC2(); } From 8ddca6b2c19c826244f58081c591a8baba2040ef Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 16 Oct 2023 22:47:12 -0700 Subject: [PATCH 300/720] [CI] Fix documentation build CI job Currently this job fails when trying to install system dependencies as the apt-get commands are not run with sudo, so they don't have the appropriate permissions. This does not occur with act which is why it wasn't caught in the first place. The change has been validated as fixing the problem against my fork. --- .github/workflows/docs.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 4af4083a77b8e..5133309eb8cf9 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -36,7 +36,9 @@ jobs: - name: Install python dependencies run: pip install -r llvm/docs/requirements.txt - name: Install system dependencies - run: apt-get update && apt-get install -y cmake ninja-build + run: | + sudo apt-get update + sudo apt-get install -y cmake ninja-build - name: Build docs run: | mkdir build From fbde19a664e5fd7196080fb4ff0aeaa31dce8508 Mon Sep 17 00:00:00 2001 From: Christian Ulmann Date: Tue, 17 Oct 2023 07:55:00 +0200 Subject: [PATCH 301/720] [MLIR][LLVM] Change addressof builders to use opaque pointers (#69215) This commit changes the builders of the `llvm.mlir.addressof` operations to no longer produce typed pointers. As a consequence, a GPU to NVVM pattern and the toy example LLVM lowerings had to be updated, as they still relied on typed pointers. --- mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp | 10 +++---- mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp | 10 +++---- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 4 +-- .../Conversion/GPUCommon/GPUOpsLowering.cpp | 19 ++++++------- .../Conversion/GPUToNVVM/gpu-to-nvvm.mlir | 28 +++++++++---------- 5 files changed, 34 insertions(+), 37 deletions(-) diff --git a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp index 684ce37b2398c..e8c5414f8f387 100644 --- a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp +++ b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp @@ -117,8 +117,8 @@ class PrintOpLowering : public ConversionPattern { /// * `i32 (i8*, ...)` static LLVM::LLVMFunctionType getPrintfType(MLIRContext *context) { auto llvmI32Ty = IntegerType::get(context, 32); - auto llvmI8PtrTy = LLVM::LLVMPointerType::get(IntegerType::get(context, 8)); - auto llvmFnType = LLVM::LLVMFunctionType::get(llvmI32Ty, llvmI8PtrTy, + auto llvmPtrTy = LLVM::LLVMPointerType::get(context); + auto llvmFnType = LLVM::LLVMFunctionType::get(llvmI32Ty, llvmPtrTy, /*isVarArg=*/true); return llvmFnType; } @@ -162,9 +162,9 @@ class PrintOpLowering : public ConversionPattern { Value cst0 = builder.create(loc, builder.getI64Type(), builder.getIndexAttr(0)); return builder.create( - loc, - LLVM::LLVMPointerType::get(IntegerType::get(builder.getContext(), 8)), - globalPtr, ArrayRef({cst0, cst0})); + loc, LLVM::LLVMPointerType::get(builder.getContext()), + IntegerType::get(builder.getContext(), 8), globalPtr, + ArrayRef({cst0, cst0})); } }; } // namespace diff --git a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp index 684ce37b2398c..e8c5414f8f387 100644 --- a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp +++ b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp @@ -117,8 +117,8 @@ class PrintOpLowering : public ConversionPattern { /// * `i32 (i8*, ...)` static LLVM::LLVMFunctionType getPrintfType(MLIRContext *context) { auto llvmI32Ty = IntegerType::get(context, 32); - auto llvmI8PtrTy = LLVM::LLVMPointerType::get(IntegerType::get(context, 8)); - auto llvmFnType = LLVM::LLVMFunctionType::get(llvmI32Ty, llvmI8PtrTy, + auto llvmPtrTy = LLVM::LLVMPointerType::get(context); + auto llvmFnType = LLVM::LLVMFunctionType::get(llvmI32Ty, llvmPtrTy, /*isVarArg=*/true); return llvmFnType; } @@ -162,9 +162,9 @@ class PrintOpLowering : public ConversionPattern { Value cst0 = builder.create(loc, builder.getI64Type(), builder.getIndexAttr(0)); return builder.create( - loc, - LLVM::LLVMPointerType::get(IntegerType::get(builder.getContext(), 8)), - globalPtr, ArrayRef({cst0, cst0})); + loc, LLVM::LLVMPointerType::get(builder.getContext()), + IntegerType::get(builder.getContext(), 8), globalPtr, + ArrayRef({cst0, cst0})); } }; } // namespace diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index 8745d14c8d483..2a572ab4de706 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -1071,7 +1071,7 @@ def LLVM_AddressOfOp : LLVM_Op<"mlir.addressof", CArg<"ArrayRef", "{}">:$attrs), [{ build($_builder, $_state, - LLVM::LLVMPointerType::get(global.getType(), global.getAddrSpace()), + LLVM::LLVMPointerType::get($_builder.getContext(), global.getAddrSpace()), global.getSymName()); $_state.addAttributes(attrs); }]>, @@ -1079,7 +1079,7 @@ def LLVM_AddressOfOp : LLVM_Op<"mlir.addressof", CArg<"ArrayRef", "{}">:$attrs), [{ build($_builder, $_state, - LLVM::LLVMPointerType::get(func.getFunctionType()), func.getName()); + LLVM::LLVMPointerType::get($_builder.getContext()), func.getName()); $_state.addAttributes(attrs); }]> ]; diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index 96d8fceba7066..59823c6605fe2 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -441,7 +441,7 @@ LogicalResult GPUPrintfOpToVPrintfLowering::matchAndRewrite( Location loc = gpuPrintfOp->getLoc(); mlir::Type llvmI8 = typeConverter->convertType(rewriter.getIntegerType(8)); - mlir::Type i8Ptr = LLVM::LLVMPointerType::get(llvmI8); + mlir::Type ptrType = LLVM::LLVMPointerType::get(rewriter.getContext()); // Note: this is the GPUModule op, not the ModuleOp that surrounds it // This ensures that global constants and declarations are placed within @@ -449,7 +449,7 @@ LogicalResult GPUPrintfOpToVPrintfLowering::matchAndRewrite( auto moduleOp = gpuPrintfOp->getParentOfType(); auto vprintfType = - LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {i8Ptr, i8Ptr}); + LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {ptrType, ptrType}); LLVM::LLVMFuncOp vprintfDecl = getOrDefineFunction(moduleOp, loc, rewriter, "vprintf", vprintfType); @@ -473,7 +473,7 @@ LogicalResult GPUPrintfOpToVPrintfLowering::matchAndRewrite( // Get a pointer to the format string's first element Value globalPtr = rewriter.create(loc, global); Value stringStart = rewriter.create( - loc, i8Ptr, globalPtr, ArrayRef{0, 0}); + loc, ptrType, ptrType, globalPtr, ArrayRef{0, 0}); SmallVector types; SmallVector args; // Promote and pack the arguments into a stack allocation. @@ -490,18 +490,17 @@ LogicalResult GPUPrintfOpToVPrintfLowering::matchAndRewrite( } Type structType = LLVM::LLVMStructType::getLiteral(gpuPrintfOp.getContext(), types); - Type structPtrType = LLVM::LLVMPointerType::get(structType); Value one = rewriter.create(loc, rewriter.getI64Type(), rewriter.getIndexAttr(1)); - Value tempAlloc = rewriter.create(loc, structPtrType, one, - /*alignment=*/0); + Value tempAlloc = + rewriter.create(loc, ptrType, structType, one, + /*alignment=*/0); for (auto [index, arg] : llvm::enumerate(args)) { - Value ptr = rewriter.create( - loc, LLVM::LLVMPointerType::get(arg.getType()), tempAlloc, - ArrayRef{0, index}); + Value ptr = + rewriter.create(loc, ptrType, arg.getType(), tempAlloc, + ArrayRef{0, index}); rewriter.create(loc, arg, ptr); } - tempAlloc = rewriter.create(loc, i8Ptr, tempAlloc); std::array printfArgs = {stringStart, tempAlloc}; rewriter.create(loc, vprintfDecl, printfArgs); diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index 391ccd74841dc..a33a0797aa565 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -542,16 +542,15 @@ gpu.module @test_module_28 { gpu.module @test_module_29 { // CHECK-DAG: llvm.mlir.global internal constant @[[$PRINT_GLOBAL0:[A-Za-z0-9_]+]]("Hello, world\0A\00") // CHECK-DAG: llvm.mlir.global internal constant @[[$PRINT_GLOBAL1:[A-Za-z0-9_]+]]("Hello: %d\0A\00") - // CHECK-DAG: llvm.func @vprintf(!llvm.ptr, !llvm.ptr) -> i32 + // CHECK-DAG: llvm.func @vprintf(!llvm.ptr, !llvm.ptr) -> i32 // CHECK-LABEL: func @test_const_printf gpu.func @test_const_printf() { - // CHECK-NEXT: %[[FORMATSTR:.*]] = llvm.mlir.addressof @[[$PRINT_GLOBAL0]] : !llvm.ptr> - // CHECK-NEXT: %[[FORMATSTART:.*]] = llvm.getelementptr %[[FORMATSTR]][0, 0] : (!llvm.ptr>) -> !llvm.ptr + // CHECK-NEXT: %[[FORMATSTR:.*]] = llvm.mlir.addressof @[[$PRINT_GLOBAL0]] : !llvm.ptr + // CHECK-NEXT: %[[FORMATSTART:.*]] = llvm.getelementptr %[[FORMATSTR]][0, 0] : (!llvm.ptr) -> !llvm.ptr // CHECK-NEXT: %[[O:.*]] = llvm.mlir.constant(1 : index) : i64 - // CHECK-NEXT: %[[ALLOC:.*]] = llvm.alloca %[[O]] x !llvm.struct<()> : (i64) -> !llvm.ptr> - // CHECK-NEXT: %[[ARGPTR:.*]] = llvm.bitcast %[[ALLOC]] : !llvm.ptr> to !llvm.ptr - // CHECK-NEXT: llvm.call @vprintf(%[[FORMATSTART]], %[[ARGPTR]]) : (!llvm.ptr, !llvm.ptr) -> i32 + // CHECK-NEXT: %[[ALLOC:.*]] = llvm.alloca %[[O]] x !llvm.struct<()> : (i64) -> !llvm.ptr + // CHECK-NEXT: llvm.call @vprintf(%[[FORMATSTART]], %[[ALLOC]]) : (!llvm.ptr, !llvm.ptr) -> i32 gpu.printf "Hello, world\n" gpu.return } @@ -559,17 +558,16 @@ gpu.module @test_module_29 { // CHECK-LABEL: func @test_printf // CHECK: (%[[ARG0:.*]]: i32, %[[ARG1:.*]]: f32) gpu.func @test_printf(%arg0: i32, %arg1: f32) { - // CHECK-NEXT: %[[FORMATSTR:.*]] = llvm.mlir.addressof @[[$PRINT_GLOBAL1]] : !llvm.ptr> - // CHECK-NEXT: %[[FORMATSTART:.*]] = llvm.getelementptr %[[FORMATSTR]][0, 0] : (!llvm.ptr>) -> !llvm.ptr + // CHECK-NEXT: %[[FORMATSTR:.*]] = llvm.mlir.addressof @[[$PRINT_GLOBAL1]] : !llvm.ptr + // CHECK-NEXT: %[[FORMATSTART:.*]] = llvm.getelementptr %[[FORMATSTR]][0, 0] : (!llvm.ptr) -> !llvm.ptr // CHECK-NEXT: %[[EXT:.+]] = llvm.fpext %[[ARG1]] : f32 to f64 // CHECK-NEXT: %[[O:.*]] = llvm.mlir.constant(1 : index) : i64 - // CHECK-NEXT: %[[ALLOC:.*]] = llvm.alloca %[[O]] x !llvm.struct<(i32, f64)> : (i64) -> !llvm.ptr> - // CHECK-NEXT: %[[EL0:.*]] = llvm.getelementptr %[[ALLOC]][0, 0] : (!llvm.ptr>) -> !llvm.ptr - // CHECK-NEXT: llvm.store %[[ARG0]], %[[EL0]] : !llvm.ptr - // CHECK-NEXT: %[[EL1:.*]] = llvm.getelementptr %[[ALLOC]][0, 1] : (!llvm.ptr>) -> !llvm.ptr - // CHECK-NEXT: llvm.store %[[EXT]], %[[EL1]] : !llvm.ptr - // CHECK-NEXT: %[[ARGPTR:.*]] = llvm.bitcast %[[ALLOC]] : !llvm.ptr> to !llvm.ptr - // CHECK-NEXT: llvm.call @vprintf(%[[FORMATSTART]], %[[ARGPTR]]) : (!llvm.ptr, !llvm.ptr) -> i32 + // CHECK-NEXT: %[[ALLOC:.*]] = llvm.alloca %[[O]] x !llvm.struct<(i32, f64)> : (i64) -> !llvm.ptr + // CHECK-NEXT: %[[EL0:.*]] = llvm.getelementptr %[[ALLOC]][0, 0] : (!llvm.ptr) -> !llvm.ptr + // CHECK-NEXT: llvm.store %[[ARG0]], %[[EL0]] : i32, !llvm.ptr + // CHECK-NEXT: %[[EL1:.*]] = llvm.getelementptr %[[ALLOC]][0, 1] : (!llvm.ptr) -> !llvm.ptr + // CHECK-NEXT: llvm.store %[[EXT]], %[[EL1]] : f64, !llvm.ptr + // CHECK-NEXT: llvm.call @vprintf(%[[FORMATSTART]], %[[ALLOC]]) : (!llvm.ptr, !llvm.ptr) -> i32 gpu.printf "Hello: %d\n" %arg0, %arg1 : i32, f32 gpu.return } From 9397e5f581b121430f42e0559b87a475abf70c09 Mon Sep 17 00:00:00 2001 From: Christian Ulmann Date: Tue, 17 Oct 2023 06:31:48 +0000 Subject: [PATCH 302/720] Revert "[MLIR][LLVM] Change addressof builders to use opaque pointers (#69215)" This reverts commit fbde19a664e5fd7196080fb4ff0aeaa31dce8508 due to breaking integration tests. --- mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp | 10 +++---- mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp | 10 +++---- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 4 +-- .../Conversion/GPUCommon/GPUOpsLowering.cpp | 19 +++++++------ .../Conversion/GPUToNVVM/gpu-to-nvvm.mlir | 28 ++++++++++--------- 5 files changed, 37 insertions(+), 34 deletions(-) diff --git a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp index e8c5414f8f387..684ce37b2398c 100644 --- a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp +++ b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp @@ -117,8 +117,8 @@ class PrintOpLowering : public ConversionPattern { /// * `i32 (i8*, ...)` static LLVM::LLVMFunctionType getPrintfType(MLIRContext *context) { auto llvmI32Ty = IntegerType::get(context, 32); - auto llvmPtrTy = LLVM::LLVMPointerType::get(context); - auto llvmFnType = LLVM::LLVMFunctionType::get(llvmI32Ty, llvmPtrTy, + auto llvmI8PtrTy = LLVM::LLVMPointerType::get(IntegerType::get(context, 8)); + auto llvmFnType = LLVM::LLVMFunctionType::get(llvmI32Ty, llvmI8PtrTy, /*isVarArg=*/true); return llvmFnType; } @@ -162,9 +162,9 @@ class PrintOpLowering : public ConversionPattern { Value cst0 = builder.create(loc, builder.getI64Type(), builder.getIndexAttr(0)); return builder.create( - loc, LLVM::LLVMPointerType::get(builder.getContext()), - IntegerType::get(builder.getContext(), 8), globalPtr, - ArrayRef({cst0, cst0})); + loc, + LLVM::LLVMPointerType::get(IntegerType::get(builder.getContext(), 8)), + globalPtr, ArrayRef({cst0, cst0})); } }; } // namespace diff --git a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp index e8c5414f8f387..684ce37b2398c 100644 --- a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp +++ b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp @@ -117,8 +117,8 @@ class PrintOpLowering : public ConversionPattern { /// * `i32 (i8*, ...)` static LLVM::LLVMFunctionType getPrintfType(MLIRContext *context) { auto llvmI32Ty = IntegerType::get(context, 32); - auto llvmPtrTy = LLVM::LLVMPointerType::get(context); - auto llvmFnType = LLVM::LLVMFunctionType::get(llvmI32Ty, llvmPtrTy, + auto llvmI8PtrTy = LLVM::LLVMPointerType::get(IntegerType::get(context, 8)); + auto llvmFnType = LLVM::LLVMFunctionType::get(llvmI32Ty, llvmI8PtrTy, /*isVarArg=*/true); return llvmFnType; } @@ -162,9 +162,9 @@ class PrintOpLowering : public ConversionPattern { Value cst0 = builder.create(loc, builder.getI64Type(), builder.getIndexAttr(0)); return builder.create( - loc, LLVM::LLVMPointerType::get(builder.getContext()), - IntegerType::get(builder.getContext(), 8), globalPtr, - ArrayRef({cst0, cst0})); + loc, + LLVM::LLVMPointerType::get(IntegerType::get(builder.getContext(), 8)), + globalPtr, ArrayRef({cst0, cst0})); } }; } // namespace diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index 2a572ab4de706..8745d14c8d483 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -1071,7 +1071,7 @@ def LLVM_AddressOfOp : LLVM_Op<"mlir.addressof", CArg<"ArrayRef", "{}">:$attrs), [{ build($_builder, $_state, - LLVM::LLVMPointerType::get($_builder.getContext(), global.getAddrSpace()), + LLVM::LLVMPointerType::get(global.getType(), global.getAddrSpace()), global.getSymName()); $_state.addAttributes(attrs); }]>, @@ -1079,7 +1079,7 @@ def LLVM_AddressOfOp : LLVM_Op<"mlir.addressof", CArg<"ArrayRef", "{}">:$attrs), [{ build($_builder, $_state, - LLVM::LLVMPointerType::get($_builder.getContext()), func.getName()); + LLVM::LLVMPointerType::get(func.getFunctionType()), func.getName()); $_state.addAttributes(attrs); }]> ]; diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index 59823c6605fe2..96d8fceba7066 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -441,7 +441,7 @@ LogicalResult GPUPrintfOpToVPrintfLowering::matchAndRewrite( Location loc = gpuPrintfOp->getLoc(); mlir::Type llvmI8 = typeConverter->convertType(rewriter.getIntegerType(8)); - mlir::Type ptrType = LLVM::LLVMPointerType::get(rewriter.getContext()); + mlir::Type i8Ptr = LLVM::LLVMPointerType::get(llvmI8); // Note: this is the GPUModule op, not the ModuleOp that surrounds it // This ensures that global constants and declarations are placed within @@ -449,7 +449,7 @@ LogicalResult GPUPrintfOpToVPrintfLowering::matchAndRewrite( auto moduleOp = gpuPrintfOp->getParentOfType(); auto vprintfType = - LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {ptrType, ptrType}); + LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {i8Ptr, i8Ptr}); LLVM::LLVMFuncOp vprintfDecl = getOrDefineFunction(moduleOp, loc, rewriter, "vprintf", vprintfType); @@ -473,7 +473,7 @@ LogicalResult GPUPrintfOpToVPrintfLowering::matchAndRewrite( // Get a pointer to the format string's first element Value globalPtr = rewriter.create(loc, global); Value stringStart = rewriter.create( - loc, ptrType, ptrType, globalPtr, ArrayRef{0, 0}); + loc, i8Ptr, globalPtr, ArrayRef{0, 0}); SmallVector types; SmallVector args; // Promote and pack the arguments into a stack allocation. @@ -490,17 +490,18 @@ LogicalResult GPUPrintfOpToVPrintfLowering::matchAndRewrite( } Type structType = LLVM::LLVMStructType::getLiteral(gpuPrintfOp.getContext(), types); + Type structPtrType = LLVM::LLVMPointerType::get(structType); Value one = rewriter.create(loc, rewriter.getI64Type(), rewriter.getIndexAttr(1)); - Value tempAlloc = - rewriter.create(loc, ptrType, structType, one, - /*alignment=*/0); + Value tempAlloc = rewriter.create(loc, structPtrType, one, + /*alignment=*/0); for (auto [index, arg] : llvm::enumerate(args)) { - Value ptr = - rewriter.create(loc, ptrType, arg.getType(), tempAlloc, - ArrayRef{0, index}); + Value ptr = rewriter.create( + loc, LLVM::LLVMPointerType::get(arg.getType()), tempAlloc, + ArrayRef{0, index}); rewriter.create(loc, arg, ptr); } + tempAlloc = rewriter.create(loc, i8Ptr, tempAlloc); std::array printfArgs = {stringStart, tempAlloc}; rewriter.create(loc, vprintfDecl, printfArgs); diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index a33a0797aa565..391ccd74841dc 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -542,15 +542,16 @@ gpu.module @test_module_28 { gpu.module @test_module_29 { // CHECK-DAG: llvm.mlir.global internal constant @[[$PRINT_GLOBAL0:[A-Za-z0-9_]+]]("Hello, world\0A\00") // CHECK-DAG: llvm.mlir.global internal constant @[[$PRINT_GLOBAL1:[A-Za-z0-9_]+]]("Hello: %d\0A\00") - // CHECK-DAG: llvm.func @vprintf(!llvm.ptr, !llvm.ptr) -> i32 + // CHECK-DAG: llvm.func @vprintf(!llvm.ptr, !llvm.ptr) -> i32 // CHECK-LABEL: func @test_const_printf gpu.func @test_const_printf() { - // CHECK-NEXT: %[[FORMATSTR:.*]] = llvm.mlir.addressof @[[$PRINT_GLOBAL0]] : !llvm.ptr - // CHECK-NEXT: %[[FORMATSTART:.*]] = llvm.getelementptr %[[FORMATSTR]][0, 0] : (!llvm.ptr) -> !llvm.ptr + // CHECK-NEXT: %[[FORMATSTR:.*]] = llvm.mlir.addressof @[[$PRINT_GLOBAL0]] : !llvm.ptr> + // CHECK-NEXT: %[[FORMATSTART:.*]] = llvm.getelementptr %[[FORMATSTR]][0, 0] : (!llvm.ptr>) -> !llvm.ptr // CHECK-NEXT: %[[O:.*]] = llvm.mlir.constant(1 : index) : i64 - // CHECK-NEXT: %[[ALLOC:.*]] = llvm.alloca %[[O]] x !llvm.struct<()> : (i64) -> !llvm.ptr - // CHECK-NEXT: llvm.call @vprintf(%[[FORMATSTART]], %[[ALLOC]]) : (!llvm.ptr, !llvm.ptr) -> i32 + // CHECK-NEXT: %[[ALLOC:.*]] = llvm.alloca %[[O]] x !llvm.struct<()> : (i64) -> !llvm.ptr> + // CHECK-NEXT: %[[ARGPTR:.*]] = llvm.bitcast %[[ALLOC]] : !llvm.ptr> to !llvm.ptr + // CHECK-NEXT: llvm.call @vprintf(%[[FORMATSTART]], %[[ARGPTR]]) : (!llvm.ptr, !llvm.ptr) -> i32 gpu.printf "Hello, world\n" gpu.return } @@ -558,16 +559,17 @@ gpu.module @test_module_29 { // CHECK-LABEL: func @test_printf // CHECK: (%[[ARG0:.*]]: i32, %[[ARG1:.*]]: f32) gpu.func @test_printf(%arg0: i32, %arg1: f32) { - // CHECK-NEXT: %[[FORMATSTR:.*]] = llvm.mlir.addressof @[[$PRINT_GLOBAL1]] : !llvm.ptr - // CHECK-NEXT: %[[FORMATSTART:.*]] = llvm.getelementptr %[[FORMATSTR]][0, 0] : (!llvm.ptr) -> !llvm.ptr + // CHECK-NEXT: %[[FORMATSTR:.*]] = llvm.mlir.addressof @[[$PRINT_GLOBAL1]] : !llvm.ptr> + // CHECK-NEXT: %[[FORMATSTART:.*]] = llvm.getelementptr %[[FORMATSTR]][0, 0] : (!llvm.ptr>) -> !llvm.ptr // CHECK-NEXT: %[[EXT:.+]] = llvm.fpext %[[ARG1]] : f32 to f64 // CHECK-NEXT: %[[O:.*]] = llvm.mlir.constant(1 : index) : i64 - // CHECK-NEXT: %[[ALLOC:.*]] = llvm.alloca %[[O]] x !llvm.struct<(i32, f64)> : (i64) -> !llvm.ptr - // CHECK-NEXT: %[[EL0:.*]] = llvm.getelementptr %[[ALLOC]][0, 0] : (!llvm.ptr) -> !llvm.ptr - // CHECK-NEXT: llvm.store %[[ARG0]], %[[EL0]] : i32, !llvm.ptr - // CHECK-NEXT: %[[EL1:.*]] = llvm.getelementptr %[[ALLOC]][0, 1] : (!llvm.ptr) -> !llvm.ptr - // CHECK-NEXT: llvm.store %[[EXT]], %[[EL1]] : f64, !llvm.ptr - // CHECK-NEXT: llvm.call @vprintf(%[[FORMATSTART]], %[[ALLOC]]) : (!llvm.ptr, !llvm.ptr) -> i32 + // CHECK-NEXT: %[[ALLOC:.*]] = llvm.alloca %[[O]] x !llvm.struct<(i32, f64)> : (i64) -> !llvm.ptr> + // CHECK-NEXT: %[[EL0:.*]] = llvm.getelementptr %[[ALLOC]][0, 0] : (!llvm.ptr>) -> !llvm.ptr + // CHECK-NEXT: llvm.store %[[ARG0]], %[[EL0]] : !llvm.ptr + // CHECK-NEXT: %[[EL1:.*]] = llvm.getelementptr %[[ALLOC]][0, 1] : (!llvm.ptr>) -> !llvm.ptr + // CHECK-NEXT: llvm.store %[[EXT]], %[[EL1]] : !llvm.ptr + // CHECK-NEXT: %[[ARGPTR:.*]] = llvm.bitcast %[[ALLOC]] : !llvm.ptr> to !llvm.ptr + // CHECK-NEXT: llvm.call @vprintf(%[[FORMATSTART]], %[[ARGPTR]]) : (!llvm.ptr, !llvm.ptr) -> i32 gpu.printf "Hello: %d\n" %arg0, %arg1 : i32, f32 gpu.return } From 041a786c78fbcee3537ca636bf796bb18fb6f313 Mon Sep 17 00:00:00 2001 From: Zhaoxuan Jiang Date: Tue, 17 Oct 2023 14:34:04 +0800 Subject: [PATCH 303/720] [AArch64] Fix pairing different types of registers when computing CSRs. (#66642) If a function has odd number of same type of registers to save, and the calling convention also requires odd number of such type of CSRs, an FP register would be accidentally marked as saved when producePairRegisters returns true. This patch also fixes the AArch64LowerHomogeneousPrologEpilog pass not handling AArch64::NoRegister; actually this pass must be fixed along with the register pairing so i can write a test for it. --- .../Target/AArch64/AArch64FrameLowering.cpp | 62 +++++++++++-- .../AArch64LowerHomogeneousPrologEpilog.cpp | 91 +++++++++++++++---- ...rm64-homogeneous-prolog-epilog-odd-csrs.ll | 31 +++++++ 3 files changed, 160 insertions(+), 24 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index e68d67c6e78de..880de7d0306a7 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -330,6 +330,23 @@ bool AArch64FrameLowering::homogeneousPrologEpilog( if (AFI->hasSwiftAsyncContext()) return false; + // If there are an odd number of GPRs before LR and FP in the CSRs list, + // they will not be paired into one RegPairInfo, which is incompatible with + // the assumption made by the homogeneous prolog epilog pass. + const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); + unsigned NumGPRs = 0; + for (unsigned I = 0; CSRegs[I]; ++I) { + Register Reg = CSRegs[I]; + if (Reg == AArch64::LR) { + assert(CSRegs[I + 1] == AArch64::FP); + if (NumGPRs % 2 != 0) + return false; + break; + } + if (AArch64::GPR64RegClass.contains(Reg)) + ++NumGPRs; + } + return true; } @@ -2750,7 +2767,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( // Update register live in. if (!MRI.isReserved(RPI.Reg1)) MBB.addLiveIn(RPI.Reg1); - if (!MRI.isReserved(RPI.Reg2)) + if (RPI.isPaired() && !MRI.isReserved(RPI.Reg2)) MBB.addLiveIn(RPI.Reg2); } return true; @@ -3000,6 +3017,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, : (unsigned)AArch64::NoRegister; unsigned ExtraCSSpill = 0; + bool HasUnpairedGPR64 = false; // Figure out which callee-saved registers to save/restore. for (unsigned i = 0; CSRegs[i]; ++i) { const unsigned Reg = CSRegs[i]; @@ -3010,10 +3028,29 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, bool RegUsed = SavedRegs.test(Reg); unsigned PairedReg = AArch64::NoRegister; - if (AArch64::GPR64RegClass.contains(Reg) || - AArch64::FPR64RegClass.contains(Reg) || - AArch64::FPR128RegClass.contains(Reg)) - PairedReg = CSRegs[i ^ 1]; + const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg); + if (RegIsGPR64 || AArch64::FPR64RegClass.contains(Reg) || + AArch64::FPR128RegClass.contains(Reg)) { + // Compensate for odd numbers of GP CSRs. + // For now, all the known cases of odd number of CSRs are of GPRs. + if (HasUnpairedGPR64) + PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1]; + else + PairedReg = CSRegs[i ^ 1]; + } + + // If the function requires all the GP registers to save (SavedRegs), + // and there are an odd number of GP CSRs at the same time (CSRegs), + // PairedReg could be in a different register class from Reg, which would + // lead to a FPR (usually D8) accidentally being marked saved. + if (RegIsGPR64 && !AArch64::GPR64RegClass.contains(PairedReg)) { + PairedReg = AArch64::NoRegister; + HasUnpairedGPR64 = true; + } + assert(PairedReg == AArch64::NoRegister || + AArch64::GPR64RegClass.contains(Reg, PairedReg) || + AArch64::FPR64RegClass.contains(Reg, PairedReg) || + AArch64::FPR128RegClass.contains(Reg, PairedReg)); if (!RegUsed) { if (AArch64::GPR64RegClass.contains(Reg) && @@ -3112,12 +3149,21 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo) << " to get a scratch register.\n"); SavedRegs.set(UnspilledCSGPR); + ExtraCSSpill = UnspilledCSGPR; + // MachO's compact unwind format relies on all registers being stored in // pairs, so if we need to spill one extra for BigStack, then we need to // store the pair. - if (producePairRegisters(MF)) - SavedRegs.set(UnspilledCSGPRPaired); - ExtraCSSpill = UnspilledCSGPR; + if (producePairRegisters(MF)) { + if (UnspilledCSGPRPaired == AArch64::NoRegister) { + // Failed to make a pair for compact unwind format, revert spilling. + if (produceCompactUnwindFrame(MF)) { + SavedRegs.reset(UnspilledCSGPR); + ExtraCSSpill = AArch64::NoRegister; + } + } else + SavedRegs.set(UnspilledCSGPRPaired); + } } // If we didn't find an extra callee-saved register to spill, create diff --git a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp index d054fe509be0b..4ebe1c9e0e660 100644 --- a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp +++ b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp @@ -146,8 +146,11 @@ static std::string getFrameHelperName(SmallVectorImpl &Regs, break; } - for (auto Reg : Regs) + for (auto Reg : Regs) { + if (Reg == AArch64::NoRegister) + continue; RegStream << AArch64InstPrinter::getRegisterName(Reg); + } return RegStream.str(); } @@ -195,46 +198,84 @@ static MachineFunction &createFrameHelperMachineFunction(Module *M, } /// Emit a store-pair instruction for frame-setup. +/// If Reg2 is AArch64::NoRegister, emit STR instead. static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPreDec) { + assert(Reg1 != AArch64::NoRegister); + const bool IsPaired = Reg2 != AArch64::NoRegister; bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); unsigned Opc; - if (IsPreDec) - Opc = IsFloat ? AArch64::STPDpre : AArch64::STPXpre; - else - Opc = IsFloat ? AArch64::STPDi : AArch64::STPXi; + if (IsPreDec) { + if (IsFloat) + Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre; + else + Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre; + } else { + if (IsFloat) + Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui; + else + Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui; + } + // The implicit scale for Offset is 8. + TypeSize Scale(0U, false); + unsigned Width; + int64_t MinOffset, MaxOffset; + bool Success = + AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset); + assert(Success && "Invalid Opcode"); + Offset *= (8 / (int)Scale); MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc)); if (IsPreDec) MIB.addDef(AArch64::SP); - MIB.addReg(Reg2) - .addReg(Reg1) + if (IsPaired) + MIB.addReg(Reg2); + MIB.addReg(Reg1) .addReg(AArch64::SP) .addImm(Offset) .setMIFlag(MachineInstr::FrameSetup); } /// Emit a load-pair instruction for frame-destroy. +/// If Reg2 is AArch64::NoRegister, emit LDR instead. static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPostDec) { + assert(Reg1 != AArch64::NoRegister); + const bool IsPaired = Reg2 != AArch64::NoRegister; bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); unsigned Opc; - if (IsPostDec) - Opc = IsFloat ? AArch64::LDPDpost : AArch64::LDPXpost; - else - Opc = IsFloat ? AArch64::LDPDi : AArch64::LDPXi; + if (IsPostDec) { + if (IsFloat) + Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost; + else + Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost; + } else { + if (IsFloat) + Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui; + else + Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui; + } + // The implicit scale for Offset is 8. + TypeSize Scale(0U, false); + unsigned Width; + int64_t MinOffset, MaxOffset; + bool Success = + AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset); + assert(Success && "Invalid Opcode"); + Offset *= (8 / (int)Scale); MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc)); if (IsPostDec) MIB.addDef(AArch64::SP); - MIB.addReg(Reg2, getDefRegState(true)) - .addReg(Reg1, getDefRegState(true)) + if (IsPaired) + MIB.addReg(Reg2, getDefRegState(true)); + MIB.addReg(Reg1, getDefRegState(true)) .addReg(AArch64::SP) .addImm(Offset) .setMIFlag(MachineInstr::FrameDestroy); @@ -433,9 +474,18 @@ bool AArch64LowerHomogeneousPE::lowerEpilog( DebugLoc DL = MI.getDebugLoc(); SmallVector Regs; + bool HasUnpairedReg = false; for (auto &MO : MI.operands()) - if (MO.isReg()) + if (MO.isReg()) { + if (!MO.getReg().isValid()) { + // For now we are only expecting unpaired GP registers which should + // occur exactly once. + assert(!HasUnpairedReg); + HasUnpairedReg = true; + } Regs.push_back(MO.getReg()); + } + (void)HasUnpairedReg; int Size = (int)Regs.size(); if (Size == 0) return false; @@ -507,17 +557,26 @@ bool AArch64LowerHomogeneousPE::lowerProlog( DebugLoc DL = MI.getDebugLoc(); SmallVector Regs; + bool HasUnpairedReg = false; int LRIdx = 0; std::optional FpOffset; for (auto &MO : MI.operands()) { if (MO.isReg()) { - if (MO.getReg() == AArch64::LR) - LRIdx = Regs.size(); + if (MO.getReg().isValid()) { + if (MO.getReg() == AArch64::LR) + LRIdx = Regs.size(); + } else { + // For now we are only expecting unpaired GP registers which should + // occur exactly once. + assert(!HasUnpairedReg); + HasUnpairedReg = true; + } Regs.push_back(MO.getReg()); } else if (MO.isImm()) { FpOffset = MO.getImm(); } } + (void)HasUnpairedReg; int Size = (int)Regs.size(); if (Size == 0) return false; diff --git a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll new file mode 100644 index 0000000000000..3b90163e6d295 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -homogeneous-prolog-epilog | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -homogeneous-prolog-epilog | FileCheck %s --check-prefixes=CHECK-LINUX + +declare void @bar(i32 %i) + +define void @odd_num_callee_saved_registers(ptr swifterror %error, i32 %i) nounwind minsize { + call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~{x20},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28}"() nounwind + call void @bar(i32 %i) + ret void +} + +define void @odd_num_callee_saved_registers_with_fpr(ptr swifterror %error, i32 %i) nounwind minsize { + call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~{x20},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{d8},~{d9}"() nounwind + call void @bar(i32 %i) + ret void +} + +; CHECK-LABEL: _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x22x23x24x25x26x27x28: +; CHECK: str x28, [sp, #-80]! +; CHECK-LABEL: _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x22x23x24x25x26x27x28: +; CHECK: ldr x28, [sp], #96 + +; CHECK-LABEL: _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x22x23x24x25x26x27x28d8d9: +; CHECK: stp d9, d8, [sp, #-96]! +; CHECK: str x28, [sp, #16] +; CHECK-LABEL: _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x22x23x24x25x26x27x28d8d9 +; CHECK: ldr x28, [sp, #16] +; CHECK: ldp d9, d8, [sp], #112 + +; CHECK-LINUX-NOT: OUTLINED_FUNCTION_PROLOG +; CHECK-LINUX-NOT: OUTLINED_FUNCTION_EPILOG From 8e674e8a01314597770563041b61cc5c85680d32 Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Tue, 17 Oct 2023 08:48:14 +0200 Subject: [PATCH 304/720] [flang] Deallocate INTENT(OUT) dummy allocatable components (#69164) Non POINTER/ALLOCATABLE INTENT(OUT) dummy arguments with allocatable components were reset without a proper deallocation if needed. Add a call to Destroy runtime to deallocate the components on entry. Notes: 1. The same logic is not needed on the callee side of BIND(C) call because BIND(C) arguments cannot be derived type with allocatable components (C1806). 2. When the argument is an INTENT(OUT) polymorphic, the dynamic type of the actual may contain allocatable components. This case is covered by the call to Destroy that uses dynamic type and was already inserted for INTENT(OUT) polymorphic dummies. --- flang/lib/Lower/ConvertVariable.cpp | 8 ++++- .../intentout-allocatable-components.f90 | 32 +++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 flang/test/Lower/HLFIR/intentout-allocatable-components.f90 diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 46a59b38ae6ab..895ae2451125d 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -714,7 +714,10 @@ needDummyIntentoutFinalization(const Fortran::lower::pft::Variable &var) { return true; // Intent(out) dummies must be finalized at runtime if their type has a // finalization. - return hasFinalization(sym); + // Allocatable components of INTENT(OUT) dummies must be deallocated (9.7.3.2 + // p6). Calling finalization runtime for this works even if the components + // have no final procedures. + return hasFinalization(sym) || hasAllocatableDirectComponent(sym); } /// Call default initialization runtime routine to initialize \p var. @@ -747,6 +750,9 @@ static void finalizeAtRuntime(Fortran::lower::AbstractConverter &converter, // is deallocated; any allocated allocatable object that is a subobject of an // actual argument corresponding to an INTENT(OUT) dummy argument is // deallocated. +// Note that allocatable components of non-ALLOCATABLE INTENT(OUT) dummy +// arguments are dealt with needDummyIntentoutFinalization (finalization runtime +// is called to reach the intended component deallocation effect). static void deallocateIntentOut(Fortran::lower::AbstractConverter &converter, const Fortran::lower::pft::Variable &var, Fortran::lower::SymMap &symMap) { diff --git a/flang/test/Lower/HLFIR/intentout-allocatable-components.f90 b/flang/test/Lower/HLFIR/intentout-allocatable-components.f90 new file mode 100644 index 0000000000000..932fafd322a3e --- /dev/null +++ b/flang/test/Lower/HLFIR/intentout-allocatable-components.f90 @@ -0,0 +1,32 @@ +! Test that allocatable components of non pointer/non allocatable INTENT(OUT) +! dummy arguments are deallocated. +! RUN: bbc -emit-hlfir -polymorphic-type %s -o - -I nowhere | FileCheck %s + +subroutine test_intentout_component_deallocate(a) + type :: t + integer, allocatable :: x + end type + type(t), intent(out) :: a +end subroutine +! CHECK-LABEL: func.func @_QPtest_intentout_component_deallocate( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>}>> +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_intentout_component_deallocateEa"} +! CHECK: %[[VAL_2:.*]] = fir.embox %[[VAL_1]]#1 : (!fir.ref>}>>) -> !fir.box>}>> +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.box>}>>) -> !fir.box +! CHECK: %[[VAL_4:.*]] = fir.call @_FortranADestroy(%[[VAL_3]]) fastmath : (!fir.box) -> none + +subroutine test_intentout_optional_component_deallocate(a) + type :: t + integer, allocatable :: x + end type + type(t), optional, intent(out) :: a +end subroutine +! CHECK-LABEL: func.func @_QPtest_intentout_optional_component_deallocate( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>}>> +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_intentout_optional_component_deallocateEa"} +! CHECK: %[[VAL_2:.*]] = fir.is_present %[[VAL_1]]#1 : (!fir.ref>}>>) -> i1 +! CHECK: fir.if %[[VAL_2]] { +! CHECK: %[[VAL_3:.*]] = fir.embox %[[VAL_1]]#1 : (!fir.ref>}>>) -> !fir.box>}>> +! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (!fir.box>}>>) -> !fir.box +! CHECK: %[[VAL_5:.*]] = fir.call @_FortranADestroy(%[[VAL_4]]) fastmath : (!fir.box) -> none +! CHECK: } From bef3e8ea6d241a7e249410e85cff36cddfa98720 Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Tue, 17 Oct 2023 08:49:43 +0200 Subject: [PATCH 305/720] [flang][runtime] Fix another IsContiguous edge case (#69199) A recent PR addressed zero and one element edge cases but did not cover another case where the descriptors of arrays with more than two elements may have byte strides that are not perfect multiples, like when creating a descriptor for A(:, 1:1:2). In general, the byte stride in a dimension is only meaningful if that dimension has more than one element. Update IsContiguous and CFI_is_contiguous to reflect that. --- flang/include/flang/Runtime/descriptor.h | 10 ++++-- flang/runtime/ISO_Fortran_binding.cpp | 9 +++--- .../Evaluate/ISO-Fortran-binding.cpp | 32 +++++++++++++++++++ 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/flang/include/flang/Runtime/descriptor.h b/flang/include/flang/Runtime/descriptor.h index c69bb336dd29e..85240353e8ae9 100644 --- a/flang/include/flang/Runtime/descriptor.h +++ b/flang/include/flang/Runtime/descriptor.h @@ -393,13 +393,17 @@ class Descriptor { bool stridesAreContiguous{true}; for (int j{0}; j < leadingDimensions; ++j) { const Dimension &dim{GetDimension(j)}; - stridesAreContiguous &= bytes == dim.ByteStride(); + stridesAreContiguous &= (bytes == dim.ByteStride()) | (dim.Extent() == 1); bytes *= dim.Extent(); } // One and zero element arrays are contiguous even if the descriptor // byte strides are not perfect multiples. - return stridesAreContiguous || bytes == 0 || - bytes == static_cast(ElementBytes()); + // Arrays with more than 2 elements may also be contiguous even if a + // byte stride in one dimension is not a perfect multiple, as long as + // this is the last dimension, or if the dimension has one extent and + // the following dimension have either one extents or contiguous byte + // strides. + return stridesAreContiguous || bytes == 0; } // Establishes a pointer to a section or element. diff --git a/flang/runtime/ISO_Fortran_binding.cpp b/flang/runtime/ISO_Fortran_binding.cpp index 103413cb7140a..c2e82758ae08a 100644 --- a/flang/runtime/ISO_Fortran_binding.cpp +++ b/flang/runtime/ISO_Fortran_binding.cpp @@ -125,16 +125,15 @@ RT_API_ATTRS int CFI_establish(CFI_cdesc_t *descriptor, void *base_addr, } RT_API_ATTRS int CFI_is_contiguous(const CFI_cdesc_t *descriptor) { + // See Descriptor::IsContiguous for the rationale. bool stridesAreContiguous{true}; CFI_index_t bytes = descriptor->elem_len; for (int j{0}; j < descriptor->rank; ++j) { - stridesAreContiguous &= bytes == descriptor->dim[j].sm; + stridesAreContiguous &= + (bytes == descriptor->dim[j].sm) | (descriptor->dim[j].extent == 1); bytes *= descriptor->dim[j].extent; } - // One and zero element arrays are contiguous even if the descriptor - // byte strides are not perfect multiples. - if (stridesAreContiguous || bytes == 0 || - bytes == static_cast(descriptor->elem_len)) { + if (stridesAreContiguous || bytes == 0) { return 1; } return 0; diff --git a/flang/unittests/Evaluate/ISO-Fortran-binding.cpp b/flang/unittests/Evaluate/ISO-Fortran-binding.cpp index d1f0a31454056..3c98363f90046 100644 --- a/flang/unittests/Evaluate/ISO-Fortran-binding.cpp +++ b/flang/unittests/Evaluate/ISO-Fortran-binding.cpp @@ -736,6 +736,38 @@ static void run_CFI_is_contiguous_tests() { MATCH(true, retCode == CFI_SUCCESS); MATCH(true, CFI_is_contiguous(section) == 0); MATCH(false, sectionDesc->IsContiguous()); + + // Test section B = A(0:3:1,0:0:2) is contiguous. + lb[0] = 0; + lb[1] = 0; + ub[0] = 3; + ub[1] = 0; + strides[0] = 1; + strides[1] = 2; + retCode = CFI_section(section, dv, lb, ub, strides); + MATCH(true, retCode == CFI_SUCCESS); + MATCH(true, CFI_is_contiguous(section) == 1); + MATCH(true, sectionDesc->IsContiguous()); + + // INTEGER :: C(0:0, 0:3) + CFI_index_t c_extents[rank] = {1, 4}; + CFI_CDESC_T(rank) c_dv_storage; + CFI_cdesc_t *cdv{&c_dv_storage}; + retCode = CFI_establish(cdv, base_addr, CFI_attribute_other, CFI_type_int, + /*elem_len=*/0, rank, c_extents); + MATCH(retCode == CFI_SUCCESS, true); + + // Test section B = C(0:0:2, 0:3:1) is contiguous. + lb[0] = 0; + lb[1] = 0; + ub[0] = 0; + ub[1] = 3; + strides[0] = 2; + strides[1] = 1; + retCode = CFI_section(section, cdv, lb, ub, strides); + MATCH(true, retCode == CFI_SUCCESS); + MATCH(true, CFI_is_contiguous(section) == 1); + MATCH(true, sectionDesc->IsContiguous()); } int main() { From 77ab08e1ffa875f0e739357b81cdb197ff19ecb0 Mon Sep 17 00:00:00 2001 From: Jean Perier Date: Mon, 16 Oct 2023 23:59:15 -0700 Subject: [PATCH 306/720] [flang][runtime] fix buildbot failure after #69199 Fix https://lab.llvm.org/buildbot/#/builders/268/builds/360 --- flang/include/flang/Runtime/descriptor.h | 3 ++- flang/runtime/ISO_Fortran_binding.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/flang/include/flang/Runtime/descriptor.h b/flang/include/flang/Runtime/descriptor.h index 85240353e8ae9..a5747f98ff2bd 100644 --- a/flang/include/flang/Runtime/descriptor.h +++ b/flang/include/flang/Runtime/descriptor.h @@ -393,7 +393,8 @@ class Descriptor { bool stridesAreContiguous{true}; for (int j{0}; j < leadingDimensions; ++j) { const Dimension &dim{GetDimension(j)}; - stridesAreContiguous &= (bytes == dim.ByteStride()) | (dim.Extent() == 1); + stridesAreContiguous &= + (bytes == dim.ByteStride()) || (dim.Extent() == 1); bytes *= dim.Extent(); } // One and zero element arrays are contiguous even if the descriptor diff --git a/flang/runtime/ISO_Fortran_binding.cpp b/flang/runtime/ISO_Fortran_binding.cpp index c2e82758ae08a..ce146844533a0 100644 --- a/flang/runtime/ISO_Fortran_binding.cpp +++ b/flang/runtime/ISO_Fortran_binding.cpp @@ -130,7 +130,7 @@ RT_API_ATTRS int CFI_is_contiguous(const CFI_cdesc_t *descriptor) { CFI_index_t bytes = descriptor->elem_len; for (int j{0}; j < descriptor->rank; ++j) { stridesAreContiguous &= - (bytes == descriptor->dim[j].sm) | (descriptor->dim[j].extent == 1); + (bytes == descriptor->dim[j].sm) || (descriptor->dim[j].extent == 1); bytes *= descriptor->dim[j].extent; } if (stridesAreContiguous || bytes == 0) { From bfcd05317d0fbe90474eda13a4dbf33c2cee4130 Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Tue, 17 Oct 2023 09:11:53 +0200 Subject: [PATCH 307/720] [flang][hlfir] Do not emit extra declare for dummy used in BLOCK (#69184) When a variable is used in a specification expression in a scope, it is added to the list of variables that must be instantiated when lowering the scope. When lowering a BLOCK, this caused instantiateVar to be called again on all the host block variables appearing in block variable specification expressions. This caused an extra declare to be emitted for dummy inside block (for non dummy, instantiateVar is a no-op if the symbol is already mapped). Only call instantiateVar if the symbol is not mapped when lowering BLOCK variables. --- flang/lib/Lower/Bridge.cpp | 8 ++++-- .../Lower/HLFIR/convert-variable-block.f90 | 25 +++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 flang/test/Lower/HLFIR/convert-variable-block.f90 diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index ef8540c35a372..f26a1aaf0236f 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2610,8 +2610,12 @@ class FirConverter : public Fortran::lower::AbstractConverter { scopeBlockIdMap.try_emplace(&scope, ++blockId); Fortran::lower::AggregateStoreMap storeMap; for (const Fortran::lower::pft::Variable &var : - Fortran::lower::pft::getScopeVariableList(scope)) - instantiateVar(var, storeMap); + Fortran::lower::pft::getScopeVariableList(scope)) { + // Do no instantiate again variables from the block host + // that appears in specification of block variables. + if (!var.hasSymbol() || !lookupSymbol(var.getSymbol())) + instantiateVar(var, storeMap); + } } else if (e.getIf()) { if (eval.lowerAsUnstructured()) maybeStartBlock(e.block); diff --git a/flang/test/Lower/HLFIR/convert-variable-block.f90 b/flang/test/Lower/HLFIR/convert-variable-block.f90 new file mode 100644 index 0000000000000..30f8eacaaed17 --- /dev/null +++ b/flang/test/Lower/HLFIR/convert-variable-block.f90 @@ -0,0 +1,25 @@ +! Test that hlfir.declare is not created again for dummy arguments +! used in specifications of BLOCK variables. +! RUN: bbc -emit-hlfir %s -o - | FileCheck %s + +subroutine test(n) + integer(8) :: n + call before_block() + block + real :: x(n) + call foo(x) + end block +end subroutine +! CHECK-LABEL: func.func @_QPtest( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "n"}) { +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFtestEn"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: fir.call @_QPbefore_block() {{.*}}: () -> () +! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref +! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i64) -> index +! CHECK: %[[VAL_5:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_6:.*]] = arith.cmpi sgt, %[[VAL_4]], %[[VAL_5]] : index +! CHECK: %[[VAL_7:.*]] = arith.select %[[VAL_6]], %[[VAL_4]], %[[VAL_5]] : index +! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.array, %[[VAL_7]] {bindc_name = "x", uniq_name = "_QFtestB1Ex"} +! CHECK: %[[VAL_9:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_8]](%[[VAL_9]]) {uniq_name = "_QFtestB1Ex"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) +! CHECK: fir.call @_QPfoo(%[[VAL_10]]#1) {{.*}}: (!fir.ref>) -> () From cbf7d5f82b72d40770050c29d28a67a71497dac9 Mon Sep 17 00:00:00 2001 From: Jie Fu Date: Tue, 17 Oct 2023 15:23:46 +0800 Subject: [PATCH 308/720] [AArch64] Fix -Wunused-variable in AArch64LowerHomogeneousPrologEpilog.cpp (NFC) /llvm-project/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp:268:8: error: unused variable 'Success' [-Werror,-Wunused-variable] bool Success = ^ 2 errors generated. --- .../Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp index 4ebe1c9e0e660..b8b74ae8404d3 100644 --- a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp +++ b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp @@ -223,7 +223,7 @@ static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, TypeSize Scale(0U, false); unsigned Width; int64_t MinOffset, MaxOffset; - bool Success = + [[maybe_unused]] bool Success = AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset); assert(Success && "Invalid Opcode"); Offset *= (8 / (int)Scale); @@ -265,7 +265,7 @@ static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, TypeSize Scale(0U, false); unsigned Width; int64_t MinOffset, MaxOffset; - bool Success = + [[maybe_unused]] bool Success = AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset); assert(Success && "Invalid Opcode"); Offset *= (8 / (int)Scale); From 4606712ef5b422edbe3799b665dcad7dcf348b90 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 17 Oct 2023 09:01:07 +0100 Subject: [PATCH 309/720] [lldb][lldb-vscode] Add example configuration for connecting to a remote gdbserver (#68866) This can be used to have VS Code debug various emulators, remote systems, hardware probes, etc. In my case I was doing this for the Gameboy Advance, https://github.com/stuij/gba-llvm-devkit/blob/main/docs/Debugging.md#debugging-using-visual-studio-code. It's not very complex if you know LLDB well, but when using another plugin, CodeLLDB, I was very glad that they had an example for it. So we should have one too. --- lldb/tools/lldb-vscode/README.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/lldb/tools/lldb-vscode/README.md b/lldb/tools/lldb-vscode/README.md index 6f930293126d5..078129026cb0c 100644 --- a/lldb/tools/lldb-vscode/README.md +++ b/lldb/tools/lldb-vscode/README.md @@ -212,6 +212,38 @@ This loads the coredump file `/cores/123.core` associated with the program } ``` +### Connect to a Debug Server on the Current Machine + +This connects to a debug server (e.g. `lldb-server`, `gdbserver`) on +the current machine, that is debugging the program `/tmp/a.out` and listening +locally on port `2345`. + +```javascript +{ + "name": "Local Debug Server", + "type": "lldb-vscode", + "request": "attach", + "program": "/tmp/a.out", + "attachCommands": ["gdb-remote 2345"], +} +``` + +### Connect to a Debug Server on Another Machine + +This connects to a debug server running on another machine with hostname +`hostnmame`. Which is debugging the program `/tmp/a.out` and listening on +port `5678` of that other machine. + +```javascript +{ + "name": "Remote Debug Server", + "type": "lldb-vscode", + "request": "attach", + "program": "/tmp/a.out", + "attachCommands": ["gdb-remote hostname:5678"], +} +``` + # Custom debugger commands The `lldb-vscode` tool includes additional custom commands to support the Debug From e483673246bdee06e54ec06fd04236bc9fee7f63 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 17 Oct 2023 08:03:50 +0000 Subject: [PATCH 310/720] [compiler-rt][HWASAN] Add missing include in deep-recursion.c test --- compiler-rt/test/hwasan/TestCases/deep-recursion.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/compiler-rt/test/hwasan/TestCases/deep-recursion.c b/compiler-rt/test/hwasan/TestCases/deep-recursion.c index 792f758958270..19d2b50726bee 100644 --- a/compiler-rt/test/hwasan/TestCases/deep-recursion.c +++ b/compiler-rt/test/hwasan/TestCases/deep-recursion.c @@ -17,7 +17,9 @@ // Stack histories are currently not recorded on x86. // XFAIL: target=x86_64{{.*}} +#include #include + // At least -O1 is needed for this function to not have a stack frame on // AArch64. void USE(void *x) { // pretend_to_do_something(void *x) From 11f5e5eb90c883d4b9ddba318e8fc57914b22ef3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Tue, 17 Oct 2023 10:05:37 +0200 Subject: [PATCH 311/720] [clang][Interp][NFC] Add thread_local tests --- clang/test/AST/Interp/cxx23.cpp | 84 +++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 clang/test/AST/Interp/cxx23.cpp diff --git a/clang/test/AST/Interp/cxx23.cpp b/clang/test/AST/Interp/cxx23.cpp new file mode 100644 index 0000000000000..e284a66626fb3 --- /dev/null +++ b/clang/test/AST/Interp/cxx23.cpp @@ -0,0 +1,84 @@ +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=ref20 %s +// RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -verify=ref23 %s +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=expected20 %s -fexperimental-new-constant-interpreter +// RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -verify=expected23 %s -fexperimental-new-constant-interpreter + + +// expected23-no-diagnostics + + +/// FIXME: The new interpreter is missing all the 'control flows through...' diagnostics. + +constexpr int f(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ + // ref23-error {{constexpr function never produces a constant expression}} + static const int m = n; // ref20-note {{control flows through the definition of a static variable}} \ + // ref20-warning {{is a C++23 extension}} \ + // ref23-note {{control flows through the definition of a static variable}} \ + // expected20-warning {{is a C++23 extension}} + + return m; +} +constexpr int g(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ + // ref23-error {{constexpr function never produces a constant expression}} + thread_local const int m = n; // ref20-note {{control flows through the definition of a thread_local variable}} \ + // ref20-warning {{is a C++23 extension}} \ + // ref23-note {{control flows through the definition of a thread_local variable}} \ + // expected20-warning {{is a C++23 extension}} + return m; +} + +constexpr int c_thread_local(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ + // ref23-error {{constexpr function never produces a constant expression}} + static _Thread_local int m = 0; // ref20-note {{control flows through the definition of a thread_local variable}} \ + // ref20-warning {{is a C++23 extension}} \ + // ref23-note {{control flows through the definition of a thread_local variable}} \ + // expected20-warning {{is a C++23 extension}} + return m; +} + + +constexpr int gnu_thread_local(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ + // ref23-error {{constexpr function never produces a constant expression}} + static __thread int m = 0; // ref20-note {{control flows through the definition of a thread_local variable}} \ + // ref20-warning {{is a C++23 extension}} \ + // ref23-note {{control flows through the definition of a thread_local variable}} \ + // expected20-warning {{is a C++23 extension}} + return m; +} + +constexpr int h(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ + // ref23-error {{constexpr function never produces a constant expression}} + static const int m = n; // ref20-note {{control flows through the definition of a static variable}} \ + // ref20-warning {{is a C++23 extension}} \ + // ref23-note {{control flows through the definition of a static variable}} \ + // expected20-warning {{is a C++23 extension}} + return &m - &m; +} + +constexpr int i(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ + // ref23-error {{constexpr function never produces a constant expression}} + thread_local const int m = n; // ref20-note {{control flows through the definition of a thread_local variable}} \ + // ref20-warning {{is a C++23 extension}} \ + // ref23-note {{control flows through the definition of a thread_local variable}} \ + // expected20-warning {{is a C++23 extension}} + return &m - &m; +} + +constexpr int j(int n) { + if (!n) + return 0; + static const int m = n; // ref20-warning {{is a C++23 extension}} \ + // expected20-warning {{is a C++23 extension}} + return m; +} +constexpr int j0 = j(0); + +constexpr int k(int n) { + if (!n) + return 0; + thread_local const int m = n; // ref20-warning {{is a C++23 extension}} \ + // expected20-warning {{is a C++23 extension}} + + return m; +} +constexpr int k0 = k(0); From 0841955bf3b79a33091333aba9a3157be72b535c Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Tue, 17 Oct 2023 10:39:59 +0200 Subject: [PATCH 312/720] [TableGen] Use buildConstant to emit apply pattern immediates (#66077) Use `MachineIRBuilder::buildConstant` to emit typed immediates in 'apply' MIR patterns. This adds flexibility, e.g. it allows us to seamlessly handle vector cases, where a `G_BUILD_VECTOR` is needed to create a splat. --- llvm/docs/GlobalISel/MIRPatterns.rst | 4 +- .../CodeGen/GlobalISel/GIMatchTableExecutor.h | 5 ++ .../GlobalISel/GIMatchTableExecutorImpl.h | 10 +++ .../match-table-imms.td | 12 ++-- .../match-table-patfrag-root.td | 36 ++++------ .../match-table-permutations.td | 68 +++++++------------ .../GlobalISelCombinerEmitter/match-table.td | 12 ++-- .../TableGen/GlobalISelCombinerEmitter.cpp | 11 ++- llvm/utils/TableGen/GlobalISelMatchTable.cpp | 10 +++ llvm/utils/TableGen/GlobalISelMatchTable.h | 19 ++++++ 10 files changed, 101 insertions(+), 86 deletions(-) diff --git a/llvm/docs/GlobalISel/MIRPatterns.rst b/llvm/docs/GlobalISel/MIRPatterns.rst index 51d1850a12360..fa70311f48572 100644 --- a/llvm/docs/GlobalISel/MIRPatterns.rst +++ b/llvm/docs/GlobalISel/MIRPatterns.rst @@ -257,8 +257,8 @@ Common Pattern #3: Emitting a Constant Value When an immediate operand appears in an 'apply' pattern, the behavior depends on whether it's typed or not. -* If the immediate is typed, a ``G_CONSTANT`` is implicitly emitted - (= a register operand is added to the instruction). +* If the immediate is typed, ``MachineIRBuilder::buildConstant`` is used + to create a ``G_CONSTANT``. A ``G_BUILD_VECTOR`` will be used for vectors. * If the immediate is untyped, a simple immediate is added (``MachineInstrBuilder::addImm``). diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h index 45da6d96aa3de..209f80c6d6d28 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h @@ -292,6 +292,11 @@ enum { /// - Opcode - The new opcode to use GIR_BuildMI, + /// Builds a constant and stores its result in a TempReg. + /// - TempRegID - Temp Register to define. + /// - Imm - The immediate to add + GIR_BuildConstant, + /// Copy an operand to the specified instruction /// - NewInsnID - Instruction ID to modify /// - OldInsnID - Instruction ID to copy from diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h index 6f0f9a6a46c7c..fb03d5ec0bc89 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h @@ -933,6 +933,16 @@ bool GIMatchTableExecutor::executeMatchTable( break; } + case GIR_BuildConstant: { + int64_t TempRegID = MatchTable[CurrentIdx++]; + int64_t Imm = MatchTable[CurrentIdx++]; + Builder.buildConstant(State.TempRegisters[TempRegID], Imm); + DEBUG_WITH_TYPE(TgtExecutor::getName(), + dbgs() << CurrentIdx << ": GIR_BuildConstant(TempReg[" + << TempRegID << "], Imm=" << Imm << ")\n"); + break; + } + case GIR_Copy: { int64_t NewInsnID = MatchTable[CurrentIdx++]; int64_t OldInsnID = MatchTable[CurrentIdx++]; diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-imms.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-imms.td index efe1b4b50dfda..0495a66a7c577 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-imms.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-imms.td @@ -34,7 +34,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK: const int64_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static int64_t MatchTable0[] = { -// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/19, 126, /*)*//*default:*//*Label 3*/ 202, +// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/19, 126, /*)*//*default:*//*Label 3*/ 194, // CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ 112, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // CHECK-NEXT: /*TargetOpcode::G_CONSTANT*//*Label 1*/ 138, 0, 0, 0, 0, 0, // CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 2*/ 165, @@ -69,25 +69,23 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: // Label 5: @164 // CHECK-NEXT: GIM_Reject, // CHECK-NEXT: // Label 2: @165 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 6*/ 201, // Rule ID 1 // +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 6*/ 193, // Rule ID 1 // // CHECK-NEXT: GIM_CheckSimplePredicate, GICXXPred_Simple_IsRule1Enabled, // CHECK-NEXT: // MIs[0] a // CHECK-NEXT: // No operand predicates // CHECK-NEXT: // MIs[0] Operand 1 // CHECK-NEXT: GIM_CheckConstantInt, /*MI*/0, /*Op*/1, 0, // CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32, -// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::G_CONSTANT, -// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/0, -// CHECK-NEXT: GIR_AddCImm, /*InsnID*/1, /*Type*/GILLT_s32, /*Imm*/0, +// CHECK-NEXT: GIR_BuildConstant, /*TempRegID*/0, /*Val*/0, // CHECK-NEXT: // Combiner Rule #1: InstTest1 // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/TargetOpcode::COPY, // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // a // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/0, /*TempRegFlags*/0, // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 6: @201 +// CHECK-NEXT: // Label 6: @193 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 3: @202 +// CHECK-NEXT: // Label 3: @194 // CHECK-NEXT: GIM_Reject, // CHECK-NEXT: }; // CHECK-NEXT: return MatchTable0; diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-patfrag-root.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-patfrag-root.td index b6296cf9024da..5cb9206ca5f2c 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-patfrag-root.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-patfrag-root.td @@ -28,31 +28,29 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK: const int64_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static int64_t MatchTable0[] = { -// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/118, 181, /*)*//*default:*//*Label 3*/ 176, +// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/118, 181, /*)*//*default:*//*Label 3*/ 152, // CHECK-NEXT: /*TargetOpcode::G_TRUNC*//*Label 0*/ 68, 0, 0, 0, 0, 0, 0, -// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 1*/ 101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -// CHECK-NEXT: /*TargetOpcode::G_FPEXT*//*Label 2*/ 143, +// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 1*/ 93, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +// CHECK-NEXT: /*TargetOpcode::G_FPEXT*//*Label 2*/ 127, // CHECK-NEXT: // Label 0: @68 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 4*/ 100, // Rule ID 1 // +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 4*/ 92, // Rule ID 1 // // CHECK-NEXT: GIM_CheckSimplePredicate, GICXXPred_Simple_IsRule0Enabled, // CHECK-NEXT: // MIs[0] root // CHECK-NEXT: // No operand predicates // CHECK-NEXT: // MIs[0] __Test0_match_0.z // CHECK-NEXT: // No operand predicates // CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32, -// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::G_CONSTANT, -// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/0, -// CHECK-NEXT: GIR_AddCImm, /*InsnID*/1, /*Type*/GILLT_s32, /*Imm*/0, +// CHECK-NEXT: GIR_BuildConstant, /*TempRegID*/0, /*Val*/0, // CHECK-NEXT: // Combiner Rule #0: Test0 @ [__Test0_match_0[1]] // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/TargetOpcode::COPY, // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // root // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/0, /*TempRegFlags*/0, // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 4: @100 +// CHECK-NEXT: // Label 4: @92 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 1: @101 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 5*/ 142, // Rule ID 0 // +// CHECK-NEXT: // Label 1: @93 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 5*/ 126, // Rule ID 0 // // CHECK-NEXT: GIM_CheckSimplePredicate, GICXXPred_Simple_IsRule0Enabled, // CHECK-NEXT: // MIs[0] root // CHECK-NEXT: // No operand predicates @@ -63,37 +61,33 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: // No operand predicates // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/1, // CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32, -// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::G_CONSTANT, -// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/0, -// CHECK-NEXT: GIR_AddCImm, /*InsnID*/1, /*Type*/GILLT_s32, /*Imm*/0, +// CHECK-NEXT: GIR_BuildConstant, /*TempRegID*/0, /*Val*/0, // CHECK-NEXT: // Combiner Rule #0: Test0 @ [__Test0_match_0[0]] // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/TargetOpcode::COPY, // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // root // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/0, /*TempRegFlags*/0, // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 5: @142 +// CHECK-NEXT: // Label 5: @126 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 2: @143 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 6*/ 175, // Rule ID 2 // +// CHECK-NEXT: // Label 2: @127 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 6*/ 151, // Rule ID 2 // // CHECK-NEXT: GIM_CheckSimplePredicate, GICXXPred_Simple_IsRule0Enabled, // CHECK-NEXT: // MIs[0] root // CHECK-NEXT: // No operand predicates // CHECK-NEXT: // MIs[0] __Test0_match_0.z // CHECK-NEXT: // No operand predicates // CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32, -// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::G_CONSTANT, -// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/0, -// CHECK-NEXT: GIR_AddCImm, /*InsnID*/1, /*Type*/GILLT_s32, /*Imm*/0, +// CHECK-NEXT: GIR_BuildConstant, /*TempRegID*/0, /*Val*/0, // CHECK-NEXT: // Combiner Rule #0: Test0 @ [__Test0_match_0[2]] // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/TargetOpcode::COPY, // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // root // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/0, /*TempRegFlags*/0, // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 6: @175 +// CHECK-NEXT: // Label 6: @151 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 3: @176 +// CHECK-NEXT: // Label 3: @152 // CHECK-NEXT: GIM_Reject, // CHECK-NEXT: }; // CHECK-NEXT: return MatchTable0; diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-permutations.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-permutations.td index b0651c971c023..22cd2be819de2 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-permutations.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-permutations.td @@ -159,9 +159,9 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK: const int64_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static int64_t MatchTable0[] = { -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 0*/ 746, +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 0*/ 682, // CHECK-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_AND, -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 1*/ 84, // Rule ID 7 // +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 1*/ 76, // Rule ID 7 // // CHECK-NEXT: GIM_CheckSimplePredicate, GICXXPred_Simple_IsRule0Enabled, // CHECK-NEXT: // MIs[0] dst // CHECK-NEXT: // No operand predicates @@ -191,9 +191,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/3, // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/4, // CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32, -// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::G_CONSTANT, -// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/0, -// CHECK-NEXT: GIR_AddCImm, /*InsnID*/1, /*Type*/GILLT_s32, /*Imm*/0, +// CHECK-NEXT: GIR_BuildConstant, /*TempRegID*/0, /*Val*/0, // CHECK-NEXT: // Combiner Rule #0: Test0 @ [a[1], b[1], c[1]] // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/TargetOpcode::COPY, // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst @@ -201,8 +199,8 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_CustomAction, GICXXCustomAction_CombineApplyGICombiner0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 1: @84 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 2*/ 172, // Rule ID 6 // +// CHECK-NEXT: // Label 1: @76 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 2*/ 156, // Rule ID 6 // // CHECK-NEXT: GIM_CheckSimplePredicate, GICXXPred_Simple_IsRule0Enabled, // CHECK-NEXT: // MIs[0] dst // CHECK-NEXT: // No operand predicates @@ -236,9 +234,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/4, // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/5, // CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32, -// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::G_CONSTANT, -// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/0, -// CHECK-NEXT: GIR_AddCImm, /*InsnID*/1, /*Type*/GILLT_s32, /*Imm*/0, +// CHECK-NEXT: GIR_BuildConstant, /*TempRegID*/0, /*Val*/0, // CHECK-NEXT: // Combiner Rule #0: Test0 @ [a[1], b[1], c[0]] // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/TargetOpcode::COPY, // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst @@ -246,8 +242,8 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_CustomAction, GICXXCustomAction_CombineApplyGICombiner0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 2: @172 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 3*/ 260, // Rule ID 5 // +// CHECK-NEXT: // Label 2: @156 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 3*/ 236, // Rule ID 5 // // CHECK-NEXT: GIM_CheckSimplePredicate, GICXXPred_Simple_IsRule0Enabled, // CHECK-NEXT: // MIs[0] dst // CHECK-NEXT: // No operand predicates @@ -281,9 +277,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/4, // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/5, // CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32, -// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::G_CONSTANT, -// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/0, -// CHECK-NEXT: GIR_AddCImm, /*InsnID*/1, /*Type*/GILLT_s32, /*Imm*/0, +// CHECK-NEXT: GIR_BuildConstant, /*TempRegID*/0, /*Val*/0, // CHECK-NEXT: // Combiner Rule #0: Test0 @ [a[1], b[0], c[1]] // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/TargetOpcode::COPY, // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst @@ -291,8 +285,8 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_CustomAction, GICXXCustomAction_CombineApplyGICombiner0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 3: @260 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 4*/ 357, // Rule ID 4 // +// CHECK-NEXT: // Label 3: @236 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 4*/ 325, // Rule ID 4 // // CHECK-NEXT: GIM_CheckSimplePredicate, GICXXPred_Simple_IsRule0Enabled, // CHECK-NEXT: // MIs[0] dst // CHECK-NEXT: // No operand predicates @@ -330,9 +324,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/5, // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/6, // CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32, -// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::G_CONSTANT, -// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/0, -// CHECK-NEXT: GIR_AddCImm, /*InsnID*/1, /*Type*/GILLT_s32, /*Imm*/0, +// CHECK-NEXT: GIR_BuildConstant, /*TempRegID*/0, /*Val*/0, // CHECK-NEXT: // Combiner Rule #0: Test0 @ [a[1], b[0], c[0]] // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/TargetOpcode::COPY, // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst @@ -340,8 +332,8 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_CustomAction, GICXXCustomAction_CombineApplyGICombiner0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 4: @357 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 5*/ 445, // Rule ID 3 // +// CHECK-NEXT: // Label 4: @325 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 5*/ 405, // Rule ID 3 // // CHECK-NEXT: GIM_CheckSimplePredicate, GICXXPred_Simple_IsRule0Enabled, // CHECK-NEXT: // MIs[0] dst // CHECK-NEXT: // No operand predicates @@ -375,9 +367,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/4, // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/5, // CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32, -// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::G_CONSTANT, -// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/0, -// CHECK-NEXT: GIR_AddCImm, /*InsnID*/1, /*Type*/GILLT_s32, /*Imm*/0, +// CHECK-NEXT: GIR_BuildConstant, /*TempRegID*/0, /*Val*/0, // CHECK-NEXT: // Combiner Rule #0: Test0 @ [a[0], b[1], c[1]] // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/TargetOpcode::COPY, // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst @@ -385,8 +375,8 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_CustomAction, GICXXCustomAction_CombineApplyGICombiner0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 5: @445 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 6*/ 542, // Rule ID 2 // +// CHECK-NEXT: // Label 5: @405 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 6*/ 494, // Rule ID 2 // // CHECK-NEXT: GIM_CheckSimplePredicate, GICXXPred_Simple_IsRule0Enabled, // CHECK-NEXT: // MIs[0] dst // CHECK-NEXT: // No operand predicates @@ -424,9 +414,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/5, // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/6, // CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32, -// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::G_CONSTANT, -// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/0, -// CHECK-NEXT: GIR_AddCImm, /*InsnID*/1, /*Type*/GILLT_s32, /*Imm*/0, +// CHECK-NEXT: GIR_BuildConstant, /*TempRegID*/0, /*Val*/0, // CHECK-NEXT: // Combiner Rule #0: Test0 @ [a[0], b[1], c[0]] // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/TargetOpcode::COPY, // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst @@ -434,8 +422,8 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_CustomAction, GICXXCustomAction_CombineApplyGICombiner0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 6: @542 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 7*/ 639, // Rule ID 1 // +// CHECK-NEXT: // Label 6: @494 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 7*/ 583, // Rule ID 1 // // CHECK-NEXT: GIM_CheckSimplePredicate, GICXXPred_Simple_IsRule0Enabled, // CHECK-NEXT: // MIs[0] dst // CHECK-NEXT: // No operand predicates @@ -473,9 +461,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/5, // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/6, // CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32, -// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::G_CONSTANT, -// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/0, -// CHECK-NEXT: GIR_AddCImm, /*InsnID*/1, /*Type*/GILLT_s32, /*Imm*/0, +// CHECK-NEXT: GIR_BuildConstant, /*TempRegID*/0, /*Val*/0, // CHECK-NEXT: // Combiner Rule #0: Test0 @ [a[0], b[0], c[1]] // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/TargetOpcode::COPY, // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst @@ -483,8 +469,8 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_CustomAction, GICXXCustomAction_CombineApplyGICombiner0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 7: @639 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 8*/ 745, // Rule ID 0 // +// CHECK-NEXT: // Label 7: @583 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 8*/ 681, // Rule ID 0 // // CHECK-NEXT: GIM_CheckSimplePredicate, GICXXPred_Simple_IsRule0Enabled, // CHECK-NEXT: // MIs[0] dst // CHECK-NEXT: // No operand predicates @@ -526,9 +512,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/6, // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/7, // CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32, -// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::G_CONSTANT, -// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/0, -// CHECK-NEXT: GIR_AddCImm, /*InsnID*/1, /*Type*/GILLT_s32, /*Imm*/0, +// CHECK-NEXT: GIR_BuildConstant, /*TempRegID*/0, /*Val*/0, // CHECK-NEXT: // Combiner Rule #0: Test0 @ [a[0], b[0], c[0]] // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/TargetOpcode::COPY, // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst @@ -536,9 +520,9 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_CustomAction, GICXXCustomAction_CombineApplyGICombiner0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 8: @745 +// CHECK-NEXT: // Label 8: @681 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 0: @746 +// CHECK-NEXT: // Label 0: @682 // CHECK-NEXT: GIM_Reject, // CHECK-NEXT: }; // CHECK-NEXT: return MatchTable0; diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td index f51a18c4d3e73..a74f7fbbe1cce 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td @@ -132,7 +132,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // Verify match table. // CHECK: const int64_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static int64_t MatchTable0[] = { -// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/19, 126, /*)*//*default:*//*Label 6*/ 275, +// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/19, 126, /*)*//*default:*//*Label 6*/ 267, // CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ 112, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // CHECK-NEXT: /*TargetOpcode::G_AND*//*Label 1*/ 141, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // CHECK-NEXT: /*TargetOpcode::G_STORE*//*Label 2*/ 181, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -232,7 +232,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: // Label 13: @238 // CHECK-NEXT: GIM_Reject, // CHECK-NEXT: // Label 5: @239 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 14*/ 274, // Rule ID 7 // +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 14*/ 266, // Rule ID 7 // // CHECK-NEXT: GIM_CheckSimplePredicate, GICXXPred_Simple_IsRule6Enabled, // CHECK-NEXT: // MIs[0] dst // CHECK-NEXT: // No operand predicates @@ -240,18 +240,16 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: // No operand predicates // CHECK-NEXT: GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GICXXPred_MI_Predicate_GICombiner1, // CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32, -// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::G_CONSTANT, -// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/0, -// CHECK-NEXT: GIR_AddCImm, /*InsnID*/1, /*Type*/GILLT_s32, /*Imm*/0, +// CHECK-NEXT: GIR_BuildConstant, /*TempRegID*/0, /*Val*/0, // CHECK-NEXT: // Combiner Rule #6: PatFragTest0 @ [__PatFragTest0_match_1[0]] // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/TargetOpcode::COPY, // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/0, /*TempRegFlags*/0, // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 14: @274 +// CHECK-NEXT: // Label 14: @266 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 6: @275 +// CHECK-NEXT: // Label 6: @267 // CHECK-NEXT: GIM_Reject, // CHECK-NEXT: }; // CHECK-NEXT: return MatchTable0; diff --git a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp index 809415aeff153..f6251cb671885 100644 --- a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp @@ -3108,13 +3108,10 @@ bool CombineRuleBuilder::emitCodeGenInstructionApplyImmOperand( } unsigned TempRegID = M.allocateTempRegID(); - auto ActIt = M.insertAction( - M.actions_begin(), M.allocateOutputInsnID(), &getGConstant()); - // Ensure MakeTempReg occurs before the BuildMI of th G_CONSTANT. - M.insertAction(ActIt, LLT, TempRegID); - auto &ConstantMI = *static_cast(ActIt->get()); - ConstantMI.addRenderer(TempRegID); - ConstantMI.addRenderer(O.getImmValue(), LLT); + // Ensure MakeTempReg & the BuildConstantAction occur at the beginning. + auto InsertIt = + M.insertAction(M.actions_begin(), LLT, TempRegID); + M.insertAction(++InsertIt, TempRegID, O.getImmValue()); DstMI.addRenderer(TempRegID); return true; } diff --git a/llvm/utils/TableGen/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/GlobalISelMatchTable.cpp index dcfd0a34beb07..9a4a375f34bdb 100644 --- a/llvm/utils/TableGen/GlobalISelMatchTable.cpp +++ b/llvm/utils/TableGen/GlobalISelMatchTable.cpp @@ -2014,6 +2014,16 @@ void BuildMIAction::emitActionOpcodes(MatchTable &Table, EraseInstAction::emitActionOpcodes(Table, Rule, /*InsnID*/ 0); } +//===- BuildConstantAction ------------------------------------------------===// + +void BuildConstantAction::emitActionOpcodes(MatchTable &Table, + RuleMatcher &Rule) const { + Table << MatchTable::Opcode("GIR_BuildConstant") + << MatchTable::Comment("TempRegID") << MatchTable::IntValue(TempRegID) + << MatchTable::Comment("Val") << MatchTable::IntValue(Val) + << MatchTable::LineBreak; +} + //===- EraseInstAction ----------------------------------------------------===// void EraseInstAction::emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule, diff --git a/llvm/utils/TableGen/GlobalISelMatchTable.h b/llvm/utils/TableGen/GlobalISelMatchTable.h index 549d7ccde18bd..5608bab482bfd 100644 --- a/llvm/utils/TableGen/GlobalISelMatchTable.h +++ b/llvm/utils/TableGen/GlobalISelMatchTable.h @@ -2093,6 +2093,7 @@ class MatchAction { AK_DebugComment, AK_CustomCXX, AK_BuildMI, + AK_BuildConstantMI, AK_EraseInst, AK_ReplaceReg, AK_ConstraintOpsToDef, @@ -2187,6 +2188,24 @@ class BuildMIAction : public MatchAction { void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override; }; +/// Generates code to create a constant that defines a TempReg. +/// The instruction created is usually a G_CONSTANT but it could also be a +/// G_BUILD_VECTOR for vector types. +class BuildConstantAction : public MatchAction { + unsigned TempRegID; + int64_t Val; + +public: + BuildConstantAction(unsigned TempRegID, int64_t Val) + : MatchAction(AK_BuildConstantMI), TempRegID(TempRegID), Val(Val) {} + + static bool classof(const MatchAction *A) { + return A->getKind() == AK_BuildConstantMI; + } + + void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override; +}; + class EraseInstAction : public MatchAction { unsigned InsnID; From d2b74d7e4217b03e9f127505fe42410ab096afe6 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Tue, 17 Oct 2023 10:59:32 +0200 Subject: [PATCH 313/720] [TableGen] Handle duplicate rules in combiners (#69296) We would crash when a rule was accidentally added twice to a combiner. This patch adds a warning instead to skip the already-processed rules. --- .../misc/redundant-combine-in-list.td | 30 +++++++++++ .../TableGen/GlobalISelCombinerEmitter.cpp | 50 ++++++++++++------- 2 files changed, 62 insertions(+), 18 deletions(-) create mode 100644 llvm/test/TableGen/GlobalISelCombinerEmitter/misc/redundant-combine-in-list.td diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/misc/redundant-combine-in-list.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/misc/redundant-combine-in-list.td new file mode 100644 index 0000000000000..da38a228f672b --- /dev/null +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/misc/redundant-combine-in-list.td @@ -0,0 +1,30 @@ +// RUN: llvm-tblgen -I %p/../../../../include -gen-global-isel-combiner \ +// RUN: -combiners=Combiner %s 2>&1 | FileCheck %s + +include "llvm/Target/Target.td" +include "llvm/Target/GlobalISel/Combine.td" + +// Check we don't crash if a combine is present twice in the list. + +def MyTargetISA : InstrInfo; +def MyTarget : Target { let InstructionSet = MyTargetISA; } + +def dummy; + +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: warning: skipping rule 'Foo' because it has already been processed +def Foo : GICombineRule< + (defs root:$root), + (match (G_ZEXT $root, $x)), + (apply (G_TRUNC $root, $x))>; + +def Bar : GICombineRule< + (defs root:$root), + (match (G_TRUNC $root, $x)), + (apply (G_ZEXT $root, $x))>; + +def FooBar : GICombineGroup<[ Foo, Bar ]>; + +def Combiner: GICombiner<"GenMyCombiner", [ + FooBar, + Foo +]>; diff --git a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp index f6251cb671885..7992cb4362a17 100644 --- a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp @@ -3307,6 +3307,10 @@ class GICombinerEmitter final : public GlobalISelMatchTableExecutorEmitter { // combine rule used to disable/enable it. std::vector> AllCombineRules; + // Keep track of all rules we've seen so far to ensure we don't process + // the same rule twice. + StringSet<> RulesSeen; + MatchTable buildMatchTable(MutableArrayRef Rules); void emitRuleConfigImpl(raw_ostream &OS); @@ -3624,27 +3628,37 @@ void GICombinerEmitter::gatherRules( std::vector &ActiveRules, const std::vector &&RulesAndGroups) { for (Record *Rec : RulesAndGroups) { - if (Rec->isValueUnset("Rules")) { - AllCombineRules.emplace_back(NextRuleID, Rec->getName().str()); - CombineRuleBuilder CRB(Target, SubtargetFeatures, *Rec, NextRuleID++, - ActiveRules); + if (!Rec->isValueUnset("Rules")) { + gatherRules(ActiveRules, Rec->getValueAsListOfDefs("Rules")); + continue; + } - if (!CRB.parseAll()) { - assert(ErrorsPrinted && "Parsing failed without errors!"); - continue; - } + StringRef RuleName = Rec->getName(); + if (!RulesSeen.insert(RuleName).second) { + PrintWarning(Rec->getLoc(), + "skipping rule '" + Rec->getName() + + "' because it has already been processed"); + continue; + } - if (StopAfterParse) { - CRB.print(outs()); - continue; - } + AllCombineRules.emplace_back(NextRuleID, Rec->getName().str()); + CombineRuleBuilder CRB(Target, SubtargetFeatures, *Rec, NextRuleID++, + ActiveRules); - if (!CRB.emitRuleMatchers()) { - assert(ErrorsPrinted && "Emission failed without errors!"); - continue; - } - } else - gatherRules(ActiveRules, Rec->getValueAsListOfDefs("Rules")); + if (!CRB.parseAll()) { + assert(ErrorsPrinted && "Parsing failed without errors!"); + continue; + } + + if (StopAfterParse) { + CRB.print(outs()); + continue; + } + + if (!CRB.emitRuleMatchers()) { + assert(ErrorsPrinted && "Emission failed without errors!"); + continue; + } } } From f0601c7569c6e2001b180136e1b699f577fd5c06 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Tue, 17 Oct 2023 13:04:49 +0400 Subject: [PATCH 314/720] [clang][NFC] Replace TypeAlignment with alignof(T) (#69185) This patch replaces usages of `TypeAlignment` with `alignof(T)`, where `T` is type that will be created in allocated storage with placement-new. This is now possible, because `alignof` reports the correct alignment for `Type` and classes derived from it after #68377 was merged. While preparing #68377 I verified via `static_assert` that there are no mismatches of alignment between `TypeAlignment` and alignment of types derived from `Type`, so no changes are expected to codegen. --- clang/lib/AST/ASTContext.cpp | 214 +++++++++++++++++++---------------- clang/lib/Sema/SemaType.cpp | 4 +- 2 files changed, 117 insertions(+), 101 deletions(-) diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 4c4bcbf8a68f7..27a675b832117 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -1234,7 +1234,7 @@ TypedefDecl *ASTContext::getUInt128Decl() const { } void ASTContext::InitBuiltinType(CanQualType &R, BuiltinType::Kind K) { - auto *Ty = new (*this, TypeAlignment) BuiltinType(K); + auto *Ty = new (*this, alignof(BuiltinType)) BuiltinType(K); R = CanQualType::CreateUnsafe(QualType(Ty, 0)); Types.push_back(Ty); } @@ -3066,7 +3066,7 @@ ASTContext::getExtQualType(const Type *baseType, Qualifiers quals) const { (void) ExtQualNodes.FindNodeOrInsertPos(ID, insertPos); } - auto *eq = new (*this, TypeAlignment) ExtQuals(baseType, canon, quals); + auto *eq = new (*this, alignof(ExtQuals)) ExtQuals(baseType, canon, quals); ExtQualNodes.InsertNode(eq, insertPos); return QualType(eq, fastQuals); } @@ -3310,7 +3310,7 @@ QualType ASTContext::getComplexType(QualType T) const { ComplexType *NewIP = ComplexTypes.FindNodeOrInsertPos(ID, InsertPos); assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP; } - auto *New = new (*this, TypeAlignment) ComplexType(T, Canonical); + auto *New = new (*this, alignof(ComplexType)) ComplexType(T, Canonical); Types.push_back(New); ComplexTypes.InsertNode(New, InsertPos); return QualType(New, 0); @@ -3338,7 +3338,7 @@ QualType ASTContext::getPointerType(QualType T) const { PointerType *NewIP = PointerTypes.FindNodeOrInsertPos(ID, InsertPos); assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP; } - auto *New = new (*this, TypeAlignment) PointerType(T, Canonical); + auto *New = new (*this, alignof(PointerType)) PointerType(T, Canonical); Types.push_back(New); PointerTypes.InsertNode(New, InsertPos); return QualType(New, 0); @@ -3358,7 +3358,7 @@ QualType ASTContext::getAdjustedType(QualType Orig, QualType New) const { AT = AdjustedTypes.FindNodeOrInsertPos(ID, InsertPos); assert(!AT && "Shouldn't be in the map!"); - AT = new (*this, TypeAlignment) + AT = new (*this, alignof(AdjustedType)) AdjustedType(Type::Adjusted, Orig, New, Canonical); Types.push_back(AT); AdjustedTypes.InsertNode(AT, InsertPos); @@ -3379,7 +3379,7 @@ QualType ASTContext::getDecayedType(QualType Orig, QualType Decayed) const { AT = AdjustedTypes.FindNodeOrInsertPos(ID, InsertPos); assert(!AT && "Shouldn't be in the map!"); - AT = new (*this, TypeAlignment) DecayedType(Orig, Decayed, Canonical); + AT = new (*this, alignof(DecayedType)) DecayedType(Orig, Decayed, Canonical); Types.push_back(AT); AdjustedTypes.InsertNode(AT, InsertPos); return QualType(AT, 0); @@ -3433,7 +3433,8 @@ QualType ASTContext::getBlockPointerType(QualType T) const { BlockPointerTypes.FindNodeOrInsertPos(ID, InsertPos); assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP; } - auto *New = new (*this, TypeAlignment) BlockPointerType(T, Canonical); + auto *New = + new (*this, alignof(BlockPointerType)) BlockPointerType(T, Canonical); Types.push_back(New); BlockPointerTypes.InsertNode(New, InsertPos); return QualType(New, 0); @@ -3472,8 +3473,8 @@ ASTContext::getLValueReferenceType(QualType T, bool SpelledAsLValue) const { assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP; } - auto *New = new (*this, TypeAlignment) LValueReferenceType(T, Canonical, - SpelledAsLValue); + auto *New = new (*this, alignof(LValueReferenceType)) + LValueReferenceType(T, Canonical, SpelledAsLValue); Types.push_back(New); LValueReferenceTypes.InsertNode(New, InsertPos); @@ -3512,7 +3513,8 @@ QualType ASTContext::getRValueReferenceType(QualType T) const { assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP; } - auto *New = new (*this, TypeAlignment) RValueReferenceType(T, Canonical); + auto *New = new (*this, alignof(RValueReferenceType)) + RValueReferenceType(T, Canonical); Types.push_back(New); RValueReferenceTypes.InsertNode(New, InsertPos); return QualType(New, 0); @@ -3542,7 +3544,8 @@ QualType ASTContext::getMemberPointerType(QualType T, const Type *Cls) const { MemberPointerTypes.FindNodeOrInsertPos(ID, InsertPos); assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP; } - auto *New = new (*this, TypeAlignment) MemberPointerType(T, Cls, Canonical); + auto *New = new (*this, alignof(MemberPointerType)) + MemberPointerType(T, Cls, Canonical); Types.push_back(New); MemberPointerTypes.InsertNode(New, InsertPos); return QualType(New, 0); @@ -3596,7 +3599,7 @@ QualType ASTContext::getConstantArrayType(QualType EltTy, void *Mem = Allocate( ConstantArrayType::totalSizeToAlloc(SizeExpr ? 1 : 0), - TypeAlignment); + alignof(ConstantArrayType)); auto *New = new (Mem) ConstantArrayType(EltTy, Canon, ArySize, SizeExpr, ASM, IndexTypeQuals); ConstantArrayTypes.InsertNode(New, InsertPos); @@ -3765,8 +3768,8 @@ QualType ASTContext::getVariableArrayType(QualType EltTy, Canon = getQualifiedType(Canon, canonSplit.Quals); } - auto *New = new (*this, TypeAlignment) - VariableArrayType(EltTy, Canon, NumElts, ASM, IndexTypeQuals, Brackets); + auto *New = new (*this, alignof(VariableArrayType)) + VariableArrayType(EltTy, Canon, NumElts, ASM, IndexTypeQuals, Brackets); VariableArrayTypes.push_back(New); Types.push_back(New); @@ -3790,8 +3793,9 @@ QualType ASTContext::getDependentSizedArrayType(QualType elementType, // initializer. We do no canonicalization here at all, which is okay // because they can't be used in most locations. if (!numElements) { - auto *newType = new (*this, TypeAlignment) DependentSizedArrayType( - elementType, QualType(), numElements, ASM, elementTypeQuals, brackets); + auto *newType = new (*this, alignof(DependentSizedArrayType)) + DependentSizedArrayType(elementType, QualType(), numElements, ASM, + elementTypeQuals, brackets); Types.push_back(newType); return QualType(newType, 0); } @@ -3813,7 +3817,7 @@ QualType ASTContext::getDependentSizedArrayType(QualType elementType, // If we don't have one, build one. if (!canonTy) { - canonTy = new (*this, TypeAlignment) + canonTy = new (*this, alignof(DependentSizedArrayType)) DependentSizedArrayType(QualType(canonElementType.Ty, 0), QualType(), numElements, ASM, elementTypeQuals, brackets); DependentSizedArrayTypes.InsertNode(canonTy, insertPos); @@ -3832,8 +3836,9 @@ QualType ASTContext::getDependentSizedArrayType(QualType elementType, // Otherwise, we need to build a type which follows the spelling // of the element type. - auto *sugaredType = new (*this, TypeAlignment) DependentSizedArrayType( - elementType, canon, numElements, ASM, elementTypeQuals, brackets); + auto *sugaredType = new (*this, alignof(DependentSizedArrayType)) + DependentSizedArrayType(elementType, canon, numElements, ASM, + elementTypeQuals, brackets); Types.push_back(sugaredType); return QualType(sugaredType, 0); } @@ -3867,8 +3872,8 @@ QualType ASTContext::getIncompleteArrayType(QualType elementType, assert(!existing && "Shouldn't be in the map!"); (void) existing; } - auto *newType = new (*this, TypeAlignment) - IncompleteArrayType(elementType, canon, ASM, elementTypeQuals); + auto *newType = new (*this, alignof(IncompleteArrayType)) + IncompleteArrayType(elementType, canon, ASM, elementTypeQuals); IncompleteArrayTypes.InsertNode(newType, insertPos); Types.push_back(newType); @@ -4088,8 +4093,8 @@ QualType ASTContext::getVectorType(QualType vecType, unsigned NumElts, VectorType *NewIP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos); assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP; } - auto *New = new (*this, TypeAlignment) - VectorType(vecType, NumElts, Canonical, VecKind); + auto *New = new (*this, alignof(VectorType)) + VectorType(vecType, NumElts, Canonical, VecKind); VectorTypes.InsertNode(New, InsertPos); Types.push_back(New); return QualType(New, 0); @@ -4108,12 +4113,12 @@ ASTContext::getDependentVectorType(QualType VecType, Expr *SizeExpr, DependentVectorType *New; if (Canon) { - New = new (*this, TypeAlignment) DependentVectorType( + New = new (*this, alignof(DependentVectorType)) DependentVectorType( VecType, QualType(Canon, 0), SizeExpr, AttrLoc, VecKind); } else { QualType CanonVecTy = getCanonicalType(VecType); if (CanonVecTy == VecType) { - New = new (*this, TypeAlignment) + New = new (*this, alignof(DependentVectorType)) DependentVectorType(VecType, QualType(), SizeExpr, AttrLoc, VecKind); DependentVectorType *CanonCheck = @@ -4125,7 +4130,7 @@ ASTContext::getDependentVectorType(QualType VecType, Expr *SizeExpr, } else { QualType CanonTy = getDependentVectorType(CanonVecTy, SizeExpr, SourceLocation(), VecKind); - New = new (*this, TypeAlignment) + New = new (*this, alignof(DependentVectorType)) DependentVectorType(VecType, CanonTy, SizeExpr, AttrLoc, VecKind); } } @@ -4162,8 +4167,8 @@ QualType ASTContext::getExtVectorType(QualType vecType, VectorType *NewIP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos); assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP; } - auto *New = new (*this, TypeAlignment) - ExtVectorType(vecType, NumElts, Canonical); + auto *New = new (*this, alignof(ExtVectorType)) + ExtVectorType(vecType, NumElts, Canonical); VectorTypes.InsertNode(New, InsertPos); Types.push_back(New); return QualType(New, 0); @@ -4184,12 +4189,13 @@ ASTContext::getDependentSizedExtVectorType(QualType vecType, if (Canon) { // We already have a canonical version of this array type; use it as // the canonical type for a newly-built type. - New = new (*this, TypeAlignment) DependentSizedExtVectorType( - vecType, QualType(Canon, 0), SizeExpr, AttrLoc); + New = new (*this, alignof(DependentSizedExtVectorType)) + DependentSizedExtVectorType(vecType, QualType(Canon, 0), SizeExpr, + AttrLoc); } else { QualType CanonVecTy = getCanonicalType(vecType); if (CanonVecTy == vecType) { - New = new (*this, TypeAlignment) + New = new (*this, alignof(DependentSizedExtVectorType)) DependentSizedExtVectorType(vecType, QualType(), SizeExpr, AttrLoc); DependentSizedExtVectorType *CanonCheck @@ -4200,7 +4206,7 @@ ASTContext::getDependentSizedExtVectorType(QualType vecType, } else { QualType CanonExtTy = getDependentSizedExtVectorType(CanonVecTy, SizeExpr, SourceLocation()); - New = new (*this, TypeAlignment) + New = new (*this, alignof(DependentSizedExtVectorType)) DependentSizedExtVectorType(vecType, CanonExtTy, SizeExpr, AttrLoc); } } @@ -4234,7 +4240,7 @@ QualType ASTContext::getConstantMatrixType(QualType ElementTy, unsigned NumRows, (void)NewIP; } - auto *New = new (*this, TypeAlignment) + auto *New = new (*this, alignof(ConstantMatrixType)) ConstantMatrixType(ElementTy, NumRows, NumColumns, Canonical); MatrixTypes.InsertNode(New, InsertPos); Types.push_back(New); @@ -4255,8 +4261,9 @@ QualType ASTContext::getDependentSizedMatrixType(QualType ElementTy, DependentSizedMatrixTypes.FindNodeOrInsertPos(ID, InsertPos); if (!Canon) { - Canon = new (*this, TypeAlignment) DependentSizedMatrixType( - CanonElementTy, QualType(), RowExpr, ColumnExpr, AttrLoc); + Canon = new (*this, alignof(DependentSizedMatrixType)) + DependentSizedMatrixType(CanonElementTy, QualType(), RowExpr, + ColumnExpr, AttrLoc); #ifndef NDEBUG DependentSizedMatrixType *CanonCheck = DependentSizedMatrixTypes.FindNodeOrInsertPos(ID, InsertPos); @@ -4274,7 +4281,7 @@ QualType ASTContext::getDependentSizedMatrixType(QualType ElementTy, return QualType(Canon, 0); // Use Canon as the canonical type for newly-built type. - DependentSizedMatrixType *New = new (*this, TypeAlignment) + DependentSizedMatrixType *New = new (*this, alignof(DependentSizedMatrixType)) DependentSizedMatrixType(ElementTy, QualType(Canon, 0), RowExpr, ColumnExpr, AttrLoc); Types.push_back(New); @@ -4297,8 +4304,9 @@ QualType ASTContext::getDependentAddressSpaceType(QualType PointeeType, DependentAddressSpaceTypes.FindNodeOrInsertPos(ID, insertPos); if (!canonTy) { - canonTy = new (*this, TypeAlignment) DependentAddressSpaceType( - canonPointeeType, QualType(), AddrSpaceExpr, AttrLoc); + canonTy = new (*this, alignof(DependentAddressSpaceType)) + DependentAddressSpaceType(canonPointeeType, QualType(), AddrSpaceExpr, + AttrLoc); DependentAddressSpaceTypes.InsertNode(canonTy, insertPos); Types.push_back(canonTy); } @@ -4307,8 +4315,9 @@ QualType ASTContext::getDependentAddressSpaceType(QualType PointeeType, canonTy->getAddrSpaceExpr() == AddrSpaceExpr) return QualType(canonTy, 0); - auto *sugaredType = new (*this, TypeAlignment) DependentAddressSpaceType( - PointeeType, QualType(canonTy, 0), AddrSpaceExpr, AttrLoc); + auto *sugaredType = new (*this, alignof(DependentAddressSpaceType)) + DependentAddressSpaceType(PointeeType, QualType(canonTy, 0), + AddrSpaceExpr, AttrLoc); Types.push_back(sugaredType); return QualType(sugaredType, 0); } @@ -4352,8 +4361,8 @@ ASTContext::getFunctionNoProtoType(QualType ResultTy, assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP; } - auto *New = new (*this, TypeAlignment) - FunctionNoProtoType(ResultTy, Canonical, Info); + auto *New = new (*this, alignof(FunctionNoProtoType)) + FunctionNoProtoType(ResultTy, Canonical, Info); Types.push_back(New); FunctionNoProtoTypes.InsertNode(New, InsertPos); return QualType(New, 0); @@ -4539,7 +4548,7 @@ QualType ASTContext::getFunctionTypeInternal( EPI.ExtParameterInfos ? NumArgs : 0, EPI.TypeQuals.hasNonFastQualifiers() ? 1 : 0); - auto *FTP = (FunctionProtoType *)Allocate(Size, TypeAlignment); + auto *FTP = (FunctionProtoType *)Allocate(Size, alignof(FunctionProtoType)); FunctionProtoType::ExtProtoInfo newEPI = EPI; new (FTP) FunctionProtoType(ResultTy, ArgArray, Canonical, newEPI); Types.push_back(FTP); @@ -4567,7 +4576,7 @@ QualType ASTContext::getPipeType(QualType T, bool ReadOnly) const { assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP; } - auto *New = new (*this, TypeAlignment) PipeType(T, Canonical, ReadOnly); + auto *New = new (*this, alignof(PipeType)) PipeType(T, Canonical, ReadOnly); Types.push_back(New); PipeTypes.InsertNode(New, InsertPos); return QualType(New, 0); @@ -4595,7 +4604,7 @@ QualType ASTContext::getBitIntType(bool IsUnsigned, unsigned NumBits) const { if (BitIntType *EIT = BitIntTypes.FindNodeOrInsertPos(ID, InsertPos)) return QualType(EIT, 0); - auto *New = new (*this, TypeAlignment) BitIntType(IsUnsigned, NumBits); + auto *New = new (*this, alignof(BitIntType)) BitIntType(IsUnsigned, NumBits); BitIntTypes.InsertNode(New, InsertPos); Types.push_back(New); return QualType(New, 0); @@ -4612,8 +4621,8 @@ QualType ASTContext::getDependentBitIntType(bool IsUnsigned, DependentBitIntTypes.FindNodeOrInsertPos(ID, InsertPos)) return QualType(Existing, 0); - auto *New = - new (*this, TypeAlignment) DependentBitIntType(IsUnsigned, NumBitsExpr); + auto *New = new (*this, alignof(DependentBitIntType)) + DependentBitIntType(IsUnsigned, NumBitsExpr); DependentBitIntTypes.InsertNode(New, InsertPos); Types.push_back(New); @@ -4645,8 +4654,8 @@ QualType ASTContext::getInjectedClassNameType(CXXRecordDecl *Decl, Decl->TypeForDecl = PrevDecl->TypeForDecl; assert(isa(Decl->TypeForDecl)); } else { - Type *newType = - new (*this, TypeAlignment) InjectedClassNameType(Decl, TST); + Type *newType = new (*this, alignof(InjectedClassNameType)) + InjectedClassNameType(Decl, TST); Decl->TypeForDecl = newType; Types.push_back(newType); } @@ -4687,7 +4696,7 @@ QualType ASTContext::getTypedefType(const TypedefNameDecl *Decl, if (!Decl->TypeForDecl) { if (Underlying.isNull()) Underlying = Decl->getUnderlyingType(); - auto *NewType = new (*this, TypeAlignment) TypedefType( + auto *NewType = new (*this, alignof(TypedefType)) TypedefType( Type::Typedef, Decl, QualType(), getCanonicalType(Underlying)); Decl->TypeForDecl = NewType; Types.push_back(NewType); @@ -4707,8 +4716,8 @@ QualType ASTContext::getTypedefType(const TypedefNameDecl *Decl, return QualType(T, 0); } - void *Mem = - Allocate(TypedefType::totalSizeToAlloc(true), TypeAlignment); + void *Mem = Allocate(TypedefType::totalSizeToAlloc(true), + alignof(TypedefType)); auto *NewType = new (Mem) TypedefType(Type::Typedef, Decl, Underlying, getCanonicalType(Underlying)); TypedefTypes.InsertNode(NewType, InsertPos); @@ -4736,7 +4745,7 @@ QualType ASTContext::getUsingType(const UsingShadowDecl *Found, Underlying = QualType(); void *Mem = Allocate(UsingType::totalSizeToAlloc(!Underlying.isNull()), - TypeAlignment); + alignof(UsingType)); UsingType *NewType = new (Mem) UsingType(Found, Underlying, Canon); Types.push_back(NewType); UsingTypes.InsertNode(NewType, InsertPos); @@ -4750,7 +4759,7 @@ QualType ASTContext::getRecordType(const RecordDecl *Decl) const { if (PrevDecl->TypeForDecl) return QualType(Decl->TypeForDecl = PrevDecl->TypeForDecl, 0); - auto *newType = new (*this, TypeAlignment) RecordType(Decl); + auto *newType = new (*this, alignof(RecordType)) RecordType(Decl); Decl->TypeForDecl = newType; Types.push_back(newType); return QualType(newType, 0); @@ -4763,7 +4772,7 @@ QualType ASTContext::getEnumType(const EnumDecl *Decl) const { if (PrevDecl->TypeForDecl) return QualType(Decl->TypeForDecl = PrevDecl->TypeForDecl, 0); - auto *newType = new (*this, TypeAlignment) EnumType(Decl); + auto *newType = new (*this, alignof(EnumType)) EnumType(Decl); Decl->TypeForDecl = newType; Types.push_back(newType); return QualType(newType, 0); @@ -4779,7 +4788,8 @@ QualType ASTContext::getUnresolvedUsingType( if (CanonicalDecl->TypeForDecl) return QualType(Decl->TypeForDecl = CanonicalDecl->TypeForDecl, 0); - Type *newType = new (*this, TypeAlignment) UnresolvedUsingType(Decl); + Type *newType = + new (*this, alignof(UnresolvedUsingType)) UnresolvedUsingType(Decl); Decl->TypeForDecl = newType; Types.push_back(newType); return QualType(newType, 0); @@ -4796,7 +4806,7 @@ QualType ASTContext::getAttributedType(attr::Kind attrKind, if (type) return QualType(type, 0); QualType canon = getCanonicalType(equivalentType); - type = new (*this, TypeAlignment) + type = new (*this, alignof(AttributedType)) AttributedType(canon, attrKind, modifiedType, equivalentType); Types.push_back(type); @@ -4817,7 +4827,8 @@ QualType ASTContext::getBTFTagAttributedType(const BTFTypeTagAttr *BTFAttr, return QualType(Ty, 0); QualType Canon = getCanonicalType(Wrapped); - Ty = new (*this, TypeAlignment) BTFTagAttributedType(Canon, Wrapped, BTFAttr); + Ty = new (*this, alignof(BTFTagAttributedType)) + BTFTagAttributedType(Canon, Wrapped, BTFAttr); Types.push_back(Ty); BTFTagAttributedTypes.InsertNode(Ty, InsertPos); @@ -4839,7 +4850,7 @@ QualType ASTContext::getSubstTemplateTypeParmType( if (!SubstParm) { void *Mem = Allocate(SubstTemplateTypeParmType::totalSizeToAlloc( !Replacement.isCanonical()), - TypeAlignment); + alignof(SubstTemplateTypeParmType)); SubstParm = new (Mem) SubstTemplateTypeParmType(Replacement, AssociatedDecl, Index, PackIndex); Types.push_back(SubstParm); @@ -4880,8 +4891,9 @@ ASTContext::getSubstTemplateTypeParmPackType(Decl *AssociatedDecl, } } - auto *SubstParm = new (*this, TypeAlignment) SubstTemplateTypeParmPackType( - Canon, AssociatedDecl, Index, Final, ArgPack); + auto *SubstParm = new (*this, alignof(SubstTemplateTypeParmPackType)) + SubstTemplateTypeParmPackType(Canon, AssociatedDecl, Index, Final, + ArgPack); Types.push_back(SubstParm); SubstTemplateTypeParmPackTypes.InsertNode(SubstParm, InsertPos); return QualType(SubstParm, 0); @@ -4904,15 +4916,16 @@ QualType ASTContext::getTemplateTypeParmType(unsigned Depth, unsigned Index, if (TTPDecl) { QualType Canon = getTemplateTypeParmType(Depth, Index, ParameterPack); - TypeParm = new (*this, TypeAlignment) TemplateTypeParmType(TTPDecl, Canon); + TypeParm = new (*this, alignof(TemplateTypeParmType)) + TemplateTypeParmType(TTPDecl, Canon); TemplateTypeParmType *TypeCheck = TemplateTypeParmTypes.FindNodeOrInsertPos(ID, InsertPos); assert(!TypeCheck && "Template type parameter canonical type broken"); (void)TypeCheck; } else - TypeParm = new (*this, TypeAlignment) - TemplateTypeParmType(Depth, Index, ParameterPack); + TypeParm = new (*this, alignof(TemplateTypeParmType)) + TemplateTypeParmType(Depth, Index, ParameterPack); Types.push_back(TypeParm); TemplateTypeParmTypes.InsertNode(TypeParm, InsertPos); @@ -4995,9 +5008,9 @@ ASTContext::getTemplateSpecializationType(TemplateName Template, // try to unique it: these types typically have location information that // we don't unique and don't want to lose. void *Mem = Allocate(sizeof(TemplateSpecializationType) + - sizeof(TemplateArgument) * Args.size() + - (IsTypeAlias? sizeof(QualType) : 0), - TypeAlignment); + sizeof(TemplateArgument) * Args.size() + + (IsTypeAlias ? sizeof(QualType) : 0), + alignof(TemplateSpecializationType)); auto *Spec = new (Mem) TemplateSpecializationType(Template, Args, CanonType, IsTypeAlias ? Underlying : QualType()); @@ -5035,7 +5048,7 @@ QualType ASTContext::getCanonicalTemplateSpecializationType( // Allocate a new canonical template specialization type. void *Mem = Allocate((sizeof(TemplateSpecializationType) + sizeof(TemplateArgument) * CanonArgs.size()), - TypeAlignment); + alignof(TemplateSpecializationType)); Spec = new (Mem) TemplateSpecializationType(CanonTemplate, CanonArgs, QualType(), QualType()); @@ -5068,8 +5081,9 @@ QualType ASTContext::getElaboratedType(ElaboratedTypeKeyword Keyword, (void)CheckT; } - void *Mem = Allocate(ElaboratedType::totalSizeToAlloc(!!OwnedTagDecl), - TypeAlignment); + void *Mem = + Allocate(ElaboratedType::totalSizeToAlloc(!!OwnedTagDecl), + alignof(ElaboratedType)); T = new (Mem) ElaboratedType(Keyword, NNS, NamedType, Canon, OwnedTagDecl); Types.push_back(T); @@ -5095,7 +5109,7 @@ ASTContext::getParenType(QualType InnerType) const { (void)CheckT; } - T = new (*this, TypeAlignment) ParenType(InnerType, Canon); + T = new (*this, alignof(ParenType)) ParenType(InnerType, Canon); Types.push_back(T); ParenTypes.InsertNode(T, InsertPos); return QualType(T, 0); @@ -5108,7 +5122,7 @@ ASTContext::getMacroQualifiedType(QualType UnderlyingTy, if (!Canon.isCanonical()) Canon = getCanonicalType(UnderlyingTy); - auto *newType = new (*this, TypeAlignment) + auto *newType = new (*this, alignof(MacroQualifiedType)) MacroQualifiedType(UnderlyingTy, Canon, MacroII); Types.push_back(newType); return QualType(newType, 0); @@ -5133,7 +5147,8 @@ QualType ASTContext::getDependentNameType(ElaboratedTypeKeyword Keyword, if (T) return QualType(T, 0); - T = new (*this, TypeAlignment) DependentNameType(Keyword, NNS, Name, Canon); + T = new (*this, alignof(DependentNameType)) + DependentNameType(Keyword, NNS, Name, Canon); Types.push_back(T); DependentNameTypes.InsertNode(T, InsertPos); return QualType(T, 0); @@ -5191,7 +5206,7 @@ ASTContext::getDependentTemplateSpecializationType( void *Mem = Allocate((sizeof(DependentTemplateSpecializationType) + sizeof(TemplateArgument) * Args.size()), - TypeAlignment); + alignof(DependentTemplateSpecializationType)); T = new (Mem) DependentTemplateSpecializationType(Keyword, NNS, Name, Args, Canon); Types.push_back(T); @@ -5271,7 +5286,7 @@ QualType ASTContext::getPackExpansionType(QualType Pattern, PackExpansionTypes.FindNodeOrInsertPos(ID, InsertPos); } - T = new (*this, TypeAlignment) + T = new (*this, alignof(PackExpansionType)) PackExpansionType(Pattern, Canon, NumExpansions); Types.push_back(T); PackExpansionTypes.InsertNode(T, InsertPos); @@ -5387,7 +5402,7 @@ QualType ASTContext::getObjCObjectType( unsigned size = sizeof(ObjCObjectTypeImpl); size += typeArgs.size() * sizeof(QualType); size += protocols.size() * sizeof(ObjCProtocolDecl *); - void *mem = Allocate(size, TypeAlignment); + void *mem = Allocate(size, alignof(ObjCObjectTypeImpl)); auto *T = new (mem) ObjCObjectTypeImpl(canonical, baseType, typeArgs, protocols, isKindOf); @@ -5494,7 +5509,7 @@ ASTContext::getObjCTypeParamType(const ObjCTypeParamDecl *Decl, unsigned size = sizeof(ObjCTypeParamType); size += protocols.size() * sizeof(ObjCProtocolDecl *); - void *mem = Allocate(size, TypeAlignment); + void *mem = Allocate(size, alignof(ObjCTypeParamType)); auto *newType = new (mem) ObjCTypeParamType(Decl, Canonical, protocols); Types.push_back(newType); @@ -5600,7 +5615,8 @@ QualType ASTContext::getObjCObjectPointerType(QualType ObjectT) const { } // No match. - void *Mem = Allocate(sizeof(ObjCObjectPointerType), TypeAlignment); + void *Mem = + Allocate(sizeof(ObjCObjectPointerType), alignof(ObjCObjectPointerType)); auto *QType = new (Mem) ObjCObjectPointerType(Canonical, ObjectT); @@ -5626,7 +5642,7 @@ QualType ASTContext::getObjCInterfaceType(const ObjCInterfaceDecl *Decl, if (const ObjCInterfaceDecl *Def = Decl->getDefinition()) Decl = Def; - void *Mem = Allocate(sizeof(ObjCInterfaceType), TypeAlignment); + void *Mem = Allocate(sizeof(ObjCInterfaceType), alignof(ObjCInterfaceType)); auto *T = new (Mem) ObjCInterfaceType(Decl); Decl->TypeForDecl = T; Types.push_back(T); @@ -5651,17 +5667,19 @@ QualType ASTContext::getTypeOfExprType(Expr *tofExpr, TypeOfKind Kind) const { if (Canon) { // We already have a "canonical" version of an identical, dependent // typeof(expr) type. Use that as our canonical type. - toe = new (*this, TypeAlignment) + toe = new (*this, alignof(TypeOfExprType)) TypeOfExprType(tofExpr, Kind, QualType((TypeOfExprType *)Canon, 0)); } else { // Build a new, canonical typeof(expr) type. - Canon = new (*this, TypeAlignment) DependentTypeOfExprType(tofExpr, Kind); + Canon = new (*this, alignof(DependentTypeOfExprType)) + DependentTypeOfExprType(tofExpr, Kind); DependentTypeOfExprTypes.InsertNode(Canon, InsertPos); toe = Canon; } } else { QualType Canonical = getCanonicalType(tofExpr->getType()); - toe = new (*this, TypeAlignment) TypeOfExprType(tofExpr, Kind, Canonical); + toe = new (*this, alignof(TypeOfExprType)) + TypeOfExprType(tofExpr, Kind, Canonical); } Types.push_back(toe); return QualType(toe, 0); @@ -5675,7 +5693,7 @@ QualType ASTContext::getTypeOfExprType(Expr *tofExpr, TypeOfKind Kind) const { QualType ASTContext::getTypeOfType(QualType tofType, TypeOfKind Kind) const { QualType Canonical = getCanonicalType(tofType); auto *tot = - new (*this, TypeAlignment) TypeOfType(tofType, Canonical, Kind); + new (*this, alignof(TypeOfType)) TypeOfType(tofType, Canonical, Kind); Types.push_back(tot); return QualType(tot, 0); } @@ -5723,13 +5741,14 @@ QualType ASTContext::getDecltypeType(Expr *e, QualType UnderlyingType) const { = DependentDecltypeTypes.FindNodeOrInsertPos(ID, InsertPos); if (!Canon) { // Build a new, canonical decltype(expr) type. - Canon = new (*this, TypeAlignment) DependentDecltypeType(e, DependentTy); + Canon = new (*this, alignof(DependentDecltypeType)) + DependentDecltypeType(e, DependentTy); DependentDecltypeTypes.InsertNode(Canon, InsertPos); } - dt = new (*this, TypeAlignment) + dt = new (*this, alignof(DecltypeType)) DecltypeType(e, UnderlyingType, QualType((DecltypeType *)Canon, 0)); } else { - dt = new (*this, TypeAlignment) + dt = new (*this, alignof(DecltypeType)) DecltypeType(e, UnderlyingType, getCanonicalType(UnderlyingType)); } Types.push_back(dt); @@ -5755,19 +5774,16 @@ QualType ASTContext::getUnaryTransformType(QualType BaseType, if (!Canon) { // Build a new, canonical __underlying_type(type) type. - Canon = new (*this, TypeAlignment) - DependentUnaryTransformType(*this, getCanonicalType(BaseType), - Kind); + Canon = new (*this, alignof(DependentUnaryTransformType)) + DependentUnaryTransformType(*this, getCanonicalType(BaseType), Kind); DependentUnaryTransformTypes.InsertNode(Canon, InsertPos); } - ut = new (*this, TypeAlignment) UnaryTransformType (BaseType, - QualType(), Kind, - QualType(Canon, 0)); + ut = new (*this, alignof(UnaryTransformType)) + UnaryTransformType(BaseType, QualType(), Kind, QualType(Canon, 0)); } else { QualType CanonType = getCanonicalType(UnderlyingType); - ut = new (*this, TypeAlignment) UnaryTransformType (BaseType, - UnderlyingType, Kind, - CanonType); + ut = new (*this, alignof(UnaryTransformType)) + UnaryTransformType(BaseType, UnderlyingType, Kind, CanonType); } Types.push_back(ut); return QualType(ut, 0); @@ -5812,7 +5828,7 @@ QualType ASTContext::getAutoTypeInternal( void *Mem = Allocate(sizeof(AutoType) + sizeof(TemplateArgument) * TypeConstraintArgs.size(), - TypeAlignment); + alignof(AutoType)); auto *AT = new (Mem) AutoType( DeducedType, Keyword, (IsDependent ? TypeDependence::DependentInstantiation @@ -5873,7 +5889,7 @@ QualType ASTContext::getDeducedTemplateSpecializationType( DeducedTemplateSpecializationTypes.FindNodeOrInsertPos(ID, InsertPos)) return QualType(DTST, 0); - auto *DTST = new (*this, TypeAlignment) + auto *DTST = new (*this, alignof(DeducedTemplateSpecializationType)) DeducedTemplateSpecializationType(Template, DeducedType, IsDependent); llvm::FoldingSetNodeID TempID; DTST->Profile(TempID); @@ -5905,7 +5921,7 @@ QualType ASTContext::getAtomicType(QualType T) const { AtomicType *NewIP = AtomicTypes.FindNodeOrInsertPos(ID, InsertPos); assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP; } - auto *New = new (*this, TypeAlignment) AtomicType(T, Canonical); + auto *New = new (*this, alignof(AtomicType)) AtomicType(T, Canonical); Types.push_back(New); AtomicTypes.InsertNode(New, InsertPos); return QualType(New, 0); @@ -5914,7 +5930,7 @@ QualType ASTContext::getAtomicType(QualType T) const { /// getAutoDeductType - Get type pattern for deducing against 'auto'. QualType ASTContext::getAutoDeductType() const { if (AutoDeductTy.isNull()) - AutoDeductTy = QualType(new (*this, TypeAlignment) + AutoDeductTy = QualType(new (*this, alignof(AutoType)) AutoType(QualType(), AutoTypeKeyword::Auto, TypeDependence::None, QualType(), /*concept*/ nullptr, /*args*/ {}), diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 2182fa6f7550c..b1bdf670f1788 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -6684,8 +6684,8 @@ ParsedType Sema::CreateParsedType(QualType T, TypeSourceInfo *TInfo) { // FIXME: LocInfoTypes are "transient", only needed for passing to/from Parser // and Sema during declaration parsing. Try deallocating/caching them when // it's appropriate, instead of allocating them and keeping them around. - LocInfoType *LocT = (LocInfoType*)BumpAlloc.Allocate(sizeof(LocInfoType), - TypeAlignment); + LocInfoType *LocT = (LocInfoType *)BumpAlloc.Allocate(sizeof(LocInfoType), + alignof(LocInfoType)); new (LocT) LocInfoType(T, TInfo); assert(LocT->getTypeClass() != T->getTypeClass() && "LocInfoType's TypeClass conflicts with an existing Type class"); From 815193f6be08e0f5876c9a0cb88c9df104710e68 Mon Sep 17 00:00:00 2001 From: Mikhail Goncharov Date: Tue, 17 Oct 2023 11:24:43 +0200 Subject: [PATCH 315/720] [ci] diff with main merge-base (#69308) Basically a reland of ec9d80ec43f5761a34c4a785c67d9e7d21ec8bda but now with fetching of main before that to get a correct merge base. --- .ci/generate-buildkite-pipeline-premerge | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/.ci/generate-buildkite-pipeline-premerge b/.ci/generate-buildkite-pipeline-premerge index 1028c08e20fcd..9c6f5aefd6de0 100755 --- a/.ci/generate-buildkite-pipeline-premerge +++ b/.ci/generate-buildkite-pipeline-premerge @@ -21,18 +21,11 @@ set -eu set -o pipefail # Environment variables script works with: -# List of files affected by this commit -: ${MODIFIED_FILES:=$(git diff --name-only HEAD~1)} + # Fetch origin/main to have an up to date merge base for main...HEAD diff. git fetch origin main:main -echo "files modified HEAD~1" >&2 -git --no-pager diff --name-only HEAD~1 >&2 -echo "files modified main...HEAD" >&2 -git --no-pager diff --name-only main...HEAD | head -n 10 >&2 -merge_base=$(git merge-base main HEAD) -echo "merge base with main $merge_base" >&2 -echo "git log" >&2 -git --no-pager log --oneline --abbrev-commit -n 5 >&2 +# List of files affected by this commit +: ${MODIFIED_FILES:=$(git diff --name-only main...HEAD)} # Filter rules for generic windows tests : ${WINDOWS_AGENTS:='{"queue": "windows"}'} # Filter rules for generic linux tests From 484668c7597d9198e21332b30d2f15ece536a0bb Mon Sep 17 00:00:00 2001 From: Christian Ulmann Date: Tue, 17 Oct 2023 11:33:45 +0200 Subject: [PATCH 316/720] Reland "[MLIR][LLVM] Change addressof builders to use opaque pointers" (#69292) This relands fbde19a664e5fd7196080fb4ff0aeaa31dce8508, which was broken due to incorrect GEP element type creation. This commit changes the builders of the `llvm.mlir.addressof` operations to no longer produce typed pointers. As a consequence, a GPU to NVVM pattern had to be updated, that still relied on typed pointers. --- mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp | 7 ++--- mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp | 7 ++--- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 4 +-- .../Conversion/GPUCommon/GPUOpsLowering.cpp | 18 ++++++------ .../Conversion/GPUToNVVM/gpu-to-nvvm.mlir | 28 +++++++++---------- 5 files changed, 30 insertions(+), 34 deletions(-) diff --git a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp index 684ce37b2398c..f05f1c2dc3388 100644 --- a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp +++ b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp @@ -117,8 +117,8 @@ class PrintOpLowering : public ConversionPattern { /// * `i32 (i8*, ...)` static LLVM::LLVMFunctionType getPrintfType(MLIRContext *context) { auto llvmI32Ty = IntegerType::get(context, 32); - auto llvmI8PtrTy = LLVM::LLVMPointerType::get(IntegerType::get(context, 8)); - auto llvmFnType = LLVM::LLVMFunctionType::get(llvmI32Ty, llvmI8PtrTy, + auto llvmPtrTy = LLVM::LLVMPointerType::get(context); + auto llvmFnType = LLVM::LLVMFunctionType::get(llvmI32Ty, llvmPtrTy, /*isVarArg=*/true); return llvmFnType; } @@ -162,8 +162,7 @@ class PrintOpLowering : public ConversionPattern { Value cst0 = builder.create(loc, builder.getI64Type(), builder.getIndexAttr(0)); return builder.create( - loc, - LLVM::LLVMPointerType::get(IntegerType::get(builder.getContext(), 8)), + loc, LLVM::LLVMPointerType::get(builder.getContext()), global.getType(), globalPtr, ArrayRef({cst0, cst0})); } }; diff --git a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp index 684ce37b2398c..f05f1c2dc3388 100644 --- a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp +++ b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp @@ -117,8 +117,8 @@ class PrintOpLowering : public ConversionPattern { /// * `i32 (i8*, ...)` static LLVM::LLVMFunctionType getPrintfType(MLIRContext *context) { auto llvmI32Ty = IntegerType::get(context, 32); - auto llvmI8PtrTy = LLVM::LLVMPointerType::get(IntegerType::get(context, 8)); - auto llvmFnType = LLVM::LLVMFunctionType::get(llvmI32Ty, llvmI8PtrTy, + auto llvmPtrTy = LLVM::LLVMPointerType::get(context); + auto llvmFnType = LLVM::LLVMFunctionType::get(llvmI32Ty, llvmPtrTy, /*isVarArg=*/true); return llvmFnType; } @@ -162,8 +162,7 @@ class PrintOpLowering : public ConversionPattern { Value cst0 = builder.create(loc, builder.getI64Type(), builder.getIndexAttr(0)); return builder.create( - loc, - LLVM::LLVMPointerType::get(IntegerType::get(builder.getContext(), 8)), + loc, LLVM::LLVMPointerType::get(builder.getContext()), global.getType(), globalPtr, ArrayRef({cst0, cst0})); } }; diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index 8745d14c8d483..2a572ab4de706 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -1071,7 +1071,7 @@ def LLVM_AddressOfOp : LLVM_Op<"mlir.addressof", CArg<"ArrayRef", "{}">:$attrs), [{ build($_builder, $_state, - LLVM::LLVMPointerType::get(global.getType(), global.getAddrSpace()), + LLVM::LLVMPointerType::get($_builder.getContext(), global.getAddrSpace()), global.getSymName()); $_state.addAttributes(attrs); }]>, @@ -1079,7 +1079,7 @@ def LLVM_AddressOfOp : LLVM_Op<"mlir.addressof", CArg<"ArrayRef", "{}">:$attrs), [{ build($_builder, $_state, - LLVM::LLVMPointerType::get(func.getFunctionType()), func.getName()); + LLVM::LLVMPointerType::get($_builder.getContext()), func.getName()); $_state.addAttributes(attrs); }]> ]; diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index 96d8fceba7066..6d2585aa30ab4 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -441,7 +441,7 @@ LogicalResult GPUPrintfOpToVPrintfLowering::matchAndRewrite( Location loc = gpuPrintfOp->getLoc(); mlir::Type llvmI8 = typeConverter->convertType(rewriter.getIntegerType(8)); - mlir::Type i8Ptr = LLVM::LLVMPointerType::get(llvmI8); + mlir::Type ptrType = LLVM::LLVMPointerType::get(rewriter.getContext()); // Note: this is the GPUModule op, not the ModuleOp that surrounds it // This ensures that global constants and declarations are placed within @@ -449,7 +449,7 @@ LogicalResult GPUPrintfOpToVPrintfLowering::matchAndRewrite( auto moduleOp = gpuPrintfOp->getParentOfType(); auto vprintfType = - LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {i8Ptr, i8Ptr}); + LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {ptrType, ptrType}); LLVM::LLVMFuncOp vprintfDecl = getOrDefineFunction(moduleOp, loc, rewriter, "vprintf", vprintfType); @@ -473,7 +473,8 @@ LogicalResult GPUPrintfOpToVPrintfLowering::matchAndRewrite( // Get a pointer to the format string's first element Value globalPtr = rewriter.create(loc, global); Value stringStart = rewriter.create( - loc, i8Ptr, globalPtr, ArrayRef{0, 0}); + loc, getTypeConverter()->getPointerType(globalType), globalType, + globalPtr, ArrayRef{0, 0}); SmallVector types; SmallVector args; // Promote and pack the arguments into a stack allocation. @@ -490,18 +491,17 @@ LogicalResult GPUPrintfOpToVPrintfLowering::matchAndRewrite( } Type structType = LLVM::LLVMStructType::getLiteral(gpuPrintfOp.getContext(), types); - Type structPtrType = LLVM::LLVMPointerType::get(structType); Value one = rewriter.create(loc, rewriter.getI64Type(), rewriter.getIndexAttr(1)); - Value tempAlloc = rewriter.create(loc, structPtrType, one, - /*alignment=*/0); + Value tempAlloc = + rewriter.create(loc, ptrType, structType, one, + /*alignment=*/0); for (auto [index, arg] : llvm::enumerate(args)) { Value ptr = rewriter.create( - loc, LLVM::LLVMPointerType::get(arg.getType()), tempAlloc, - ArrayRef{0, index}); + loc, getTypeConverter()->getPointerType(structType), structType, + tempAlloc, ArrayRef{0, index}); rewriter.create(loc, arg, ptr); } - tempAlloc = rewriter.create(loc, i8Ptr, tempAlloc); std::array printfArgs = {stringStart, tempAlloc}; rewriter.create(loc, vprintfDecl, printfArgs); diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index 391ccd74841dc..a8c02e32ef92b 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -542,16 +542,15 @@ gpu.module @test_module_28 { gpu.module @test_module_29 { // CHECK-DAG: llvm.mlir.global internal constant @[[$PRINT_GLOBAL0:[A-Za-z0-9_]+]]("Hello, world\0A\00") // CHECK-DAG: llvm.mlir.global internal constant @[[$PRINT_GLOBAL1:[A-Za-z0-9_]+]]("Hello: %d\0A\00") - // CHECK-DAG: llvm.func @vprintf(!llvm.ptr, !llvm.ptr) -> i32 + // CHECK-DAG: llvm.func @vprintf(!llvm.ptr, !llvm.ptr) -> i32 // CHECK-LABEL: func @test_const_printf gpu.func @test_const_printf() { - // CHECK-NEXT: %[[FORMATSTR:.*]] = llvm.mlir.addressof @[[$PRINT_GLOBAL0]] : !llvm.ptr> - // CHECK-NEXT: %[[FORMATSTART:.*]] = llvm.getelementptr %[[FORMATSTR]][0, 0] : (!llvm.ptr>) -> !llvm.ptr + // CHECK-NEXT: %[[FORMATSTR:.*]] = llvm.mlir.addressof @[[$PRINT_GLOBAL0]] : !llvm.ptr + // CHECK-NEXT: %[[FORMATSTART:.*]] = llvm.getelementptr %[[FORMATSTR]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<14 x i8> // CHECK-NEXT: %[[O:.*]] = llvm.mlir.constant(1 : index) : i64 - // CHECK-NEXT: %[[ALLOC:.*]] = llvm.alloca %[[O]] x !llvm.struct<()> : (i64) -> !llvm.ptr> - // CHECK-NEXT: %[[ARGPTR:.*]] = llvm.bitcast %[[ALLOC]] : !llvm.ptr> to !llvm.ptr - // CHECK-NEXT: llvm.call @vprintf(%[[FORMATSTART]], %[[ARGPTR]]) : (!llvm.ptr, !llvm.ptr) -> i32 + // CHECK-NEXT: %[[ALLOC:.*]] = llvm.alloca %[[O]] x !llvm.struct<()> : (i64) -> !llvm.ptr + // CHECK-NEXT: llvm.call @vprintf(%[[FORMATSTART]], %[[ALLOC]]) : (!llvm.ptr, !llvm.ptr) -> i32 gpu.printf "Hello, world\n" gpu.return } @@ -559,17 +558,16 @@ gpu.module @test_module_29 { // CHECK-LABEL: func @test_printf // CHECK: (%[[ARG0:.*]]: i32, %[[ARG1:.*]]: f32) gpu.func @test_printf(%arg0: i32, %arg1: f32) { - // CHECK-NEXT: %[[FORMATSTR:.*]] = llvm.mlir.addressof @[[$PRINT_GLOBAL1]] : !llvm.ptr> - // CHECK-NEXT: %[[FORMATSTART:.*]] = llvm.getelementptr %[[FORMATSTR]][0, 0] : (!llvm.ptr>) -> !llvm.ptr + // CHECK-NEXT: %[[FORMATSTR:.*]] = llvm.mlir.addressof @[[$PRINT_GLOBAL1]] : !llvm.ptr + // CHECK-NEXT: %[[FORMATSTART:.*]] = llvm.getelementptr %[[FORMATSTR]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<11 x i8> // CHECK-NEXT: %[[EXT:.+]] = llvm.fpext %[[ARG1]] : f32 to f64 // CHECK-NEXT: %[[O:.*]] = llvm.mlir.constant(1 : index) : i64 - // CHECK-NEXT: %[[ALLOC:.*]] = llvm.alloca %[[O]] x !llvm.struct<(i32, f64)> : (i64) -> !llvm.ptr> - // CHECK-NEXT: %[[EL0:.*]] = llvm.getelementptr %[[ALLOC]][0, 0] : (!llvm.ptr>) -> !llvm.ptr - // CHECK-NEXT: llvm.store %[[ARG0]], %[[EL0]] : !llvm.ptr - // CHECK-NEXT: %[[EL1:.*]] = llvm.getelementptr %[[ALLOC]][0, 1] : (!llvm.ptr>) -> !llvm.ptr - // CHECK-NEXT: llvm.store %[[EXT]], %[[EL1]] : !llvm.ptr - // CHECK-NEXT: %[[ARGPTR:.*]] = llvm.bitcast %[[ALLOC]] : !llvm.ptr> to !llvm.ptr - // CHECK-NEXT: llvm.call @vprintf(%[[FORMATSTART]], %[[ARGPTR]]) : (!llvm.ptr, !llvm.ptr) -> i32 + // CHECK-NEXT: %[[ALLOC:.*]] = llvm.alloca %[[O]] x !llvm.struct<(i32, f64)> : (i64) -> !llvm.ptr + // CHECK-NEXT: %[[EL0:.*]] = llvm.getelementptr %[[ALLOC]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(i32, f64)> + // CHECK-NEXT: llvm.store %[[ARG0]], %[[EL0]] : i32, !llvm.ptr + // CHECK-NEXT: %[[EL1:.*]] = llvm.getelementptr %[[ALLOC]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(i32, f64)> + // CHECK-NEXT: llvm.store %[[EXT]], %[[EL1]] : f64, !llvm.ptr + // CHECK-NEXT: llvm.call @vprintf(%[[FORMATSTART]], %[[ALLOC]]) : (!llvm.ptr, !llvm.ptr) -> i32 gpu.printf "Hello: %d\n" %arg0, %arg1 : i32, f32 gpu.return } From 838f2890fd30295b771908e234fb06cb169cf355 Mon Sep 17 00:00:00 2001 From: Amirreza Ashouri Date: Tue, 17 Oct 2023 13:08:12 +0330 Subject: [PATCH 317/720] [libc++] Eliminate extra allocations from `std::move(oss).str()` (#67294) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add test coverage for the new behaviors, especially to verify that the returned string uses the correct allocator. Fixes https://github.com/llvm/llvm-project/issues/64644 Migrated from https://reviews.llvm.org/D157776 — @philnik777 @pfusik @ldionne @mordante please take another look! --- libcxx/include/sstream | 10 +- libcxx/include/string | 21 ++- .../str.allocator_propagation.pass.cpp | 144 ++++++++++++++++++ .../istringstream.members/str.move.pass.cpp | 8 + .../str.allocator_propagation.pass.cpp | 115 ++++++++++++++ .../ostringstream.members/str.move.pass.cpp | 8 + .../stringbuf.members/str.move.pass.cpp | 43 ++++++ .../stringbuf/stringbuf.members/str.pass.cpp | 45 +++++- .../stringbuf/stringbuf.members/view.pass.cpp | 28 ++++ .../str.allocator_propagation.pass.cpp | 144 ++++++++++++++++++ .../stringstream.members/str.move.pass.cpp | 8 + libcxx/test/support/test_allocator.h | 25 +++ 12 files changed, 582 insertions(+), 17 deletions(-) create mode 100644 libcxx/test/std/input.output/string.streams/istringstream/istringstream.members/str.allocator_propagation.pass.cpp create mode 100644 libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.members/str.allocator_propagation.pass.cpp create mode 100644 libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/str.allocator_propagation.pass.cpp diff --git a/libcxx/include/sstream b/libcxx/include/sstream index 7db5409871873..4fec465d57480 100644 --- a/libcxx/include/sstream +++ b/libcxx/include/sstream @@ -400,12 +400,12 @@ public: _LIBCPP_HIDE_FROM_ABI_SSTREAM string_type str() const & { return str(__str_.get_allocator()); } _LIBCPP_HIDE_FROM_ABI_SSTREAM string_type str() && { - string_type __result; const basic_string_view<_CharT, _Traits> __view = view(); - if (!__view.empty()) { - auto __pos = __view.data() - __str_.data(); - __result.assign(std::move(__str_), __pos, __view.size()); - } + typename string_type::size_type __pos = __view.empty() ? 0 : __view.data() - __str_.data(); + // In C++23, this is just string_type(std::move(__str_), __pos, __view.size(), __str_.get_allocator()); + // But we need something that works in C++20 also. + string_type __result(__str_.get_allocator()); + __result.__move_assign(std::move(__str_), __pos, __view.size()); __str_.clear(); __init_buf_ptrs(); return __result; diff --git a/libcxx/include/string b/libcxx/include/string index 3078715e02b35..91935162f0238 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -979,12 +979,7 @@ public: auto __len = std::min(__n, __str.size() - __pos); if (__alloc_traits::is_always_equal::value || __alloc == __str.__alloc()) { - __r_.first() = __str.__r_.first(); - __str.__r_.first() = __rep(); - - _Traits::move(data(), data() + __pos, __len); - __set_size(__len); - _Traits::assign(data()[__len], value_type()); + __move_assign(std::move(__str), __pos, __len); } else { // Perform a copy because the allocators are not compatible. __init(__str.data() + __pos, __len); @@ -1329,6 +1324,20 @@ public: return assign(__sv.data(), __sv.size()); } +#if _LIBCPP_STD_VER >= 20 + _LIBCPP_HIDE_FROM_ABI constexpr + void __move_assign(basic_string&& __str, size_type __pos, size_type __len) { + // Pilfer the allocation from __str. + _LIBCPP_ASSERT_INTERNAL(__alloc() == __str.__alloc(), "__move_assign called with wrong allocator"); + __r_.first() = __str.__r_.first(); + __str.__r_.first() = __rep(); + + _Traits::move(data(), data() + __pos, __len); + __set_size(__len); + _Traits::assign(data()[__len], value_type()); + } +#endif + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& assign(const basic_string& __str) { return *this = __str; } #ifndef _LIBCPP_CXX03_LANG diff --git a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.members/str.allocator_propagation.pass.cpp b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.members/str.allocator_propagation.pass.cpp new file mode 100644 index 0000000000000..ab41103aa8568 --- /dev/null +++ b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.members/str.allocator_propagation.pass.cpp @@ -0,0 +1,144 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// TODO: Change to XFAIL once https://github.com/llvm/llvm-project/issues/40340 is fixed +// UNSUPPORTED: availability-pmr-missing + +// This test ensures that we properly propagate allocators from istringstream's +// inner string object to the new string returned from .str(). +// `str() const&` is specified to preserve the allocator (not copy the string). +// `str() &&` isn't specified, but should preserve the allocator (move the string). + +#include +#include +#include +#include +#include +#include +#include + +#include "make_string.h" +#include "test_allocator.h" +#include "test_macros.h" + +template +void test_soccc_behavior() { + using Alloc = SocccAllocator; + using SS = std::basic_istringstream, Alloc>; + using S = std::basic_string, Alloc>; + { + SS ss = SS(std::ios_base::in, Alloc(10)); + + // [stringbuf.members]/6 specifies that the allocator is copied, + // not select_on_container_copy_construction'ed. + // + S copied = ss.str(); + assert(copied.get_allocator().count_ == 10); + assert(ss.rdbuf()->get_allocator().count_ == 10); + assert(copied.empty()); + + // sanity-check that SOCCC does in fact work + assert(S(copied).get_allocator().count_ == 11); + + // [stringbuf.members]/10 doesn't specify the allocator to use, + // but copying the allocator as-if-by moving the string makes sense. + // + S moved = std::move(ss).str(); + assert(moved.get_allocator().count_ == 10); + assert(ss.rdbuf()->get_allocator().count_ == 10); + assert(moved.empty()); + } +} + +template , std::pmr::polymorphic_allocator>> +struct StringBuf : Base { + explicit StringBuf(std::pmr::memory_resource* mr) : Base(std::ios_base::in, mr) {} + void public_setg(int a, int b, int c) { + CharT* p = this->eback(); + assert(this->view().data() == p); + this->setg(p + a, p + b, p + c); + assert(this->eback() == p + a); + assert(this->view().data() == p + a); + } +}; + +template +void test_allocation_is_pilfered() { + using SS = std::basic_istringstream, std::pmr::polymorphic_allocator>; + using S = std::pmr::basic_string; + alignas(void*) char buf[80 * sizeof(CharT)]; + const CharT* initial = + MAKE_CSTRING(CharT, "a very long string that exceeds the small string optimization buffer length"); + { + std::pmr::set_default_resource(std::pmr::null_memory_resource()); + auto mr1 = std::pmr::monotonic_buffer_resource(buf, sizeof(buf), std::pmr::null_memory_resource()); + SS ss = SS(S(initial, &mr1)); + S s = std::move(ss).str(); + assert(s == initial); + } + { + // Try moving-out-of a stringbuf whose view() is not the entire string. + // This is libc++'s behavior; libstdc++ doesn't allow such stringbufs to be created. + // + std::pmr::set_default_resource(std::pmr::null_memory_resource()); + auto mr1 = std::pmr::monotonic_buffer_resource(buf, sizeof(buf), std::pmr::null_memory_resource()); + auto src = StringBuf(&mr1); + src.str(S(initial, &mr1)); + src.public_setg(2, 6, 40); + SS ss(std::ios_base::in, &mr1); + *ss.rdbuf() = std::move(src); + LIBCPP_ASSERT(ss.view() == std::basic_string_view(initial).substr(2, 38)); + S s = std::move(ss).str(); + LIBCPP_ASSERT(s == std::basic_string_view(initial).substr(2, 38)); + } +} + +template +void test_no_foreign_allocations() { + using SS = std::basic_istringstream, std::pmr::polymorphic_allocator>; + using S = std::pmr::basic_string; + const CharT* initial = + MAKE_CSTRING(CharT, "a very long string that exceeds the small string optimization buffer length"); + { + std::pmr::set_default_resource(std::pmr::null_memory_resource()); + auto mr1 = std::pmr::monotonic_buffer_resource(std::pmr::new_delete_resource()); + auto ss = SS(S(initial, &mr1)); + assert(ss.rdbuf()->get_allocator().resource() == &mr1); + + // [stringbuf.members]/6 specifies that the result of `str() const &` + // does NOT use the default allocator; it uses the original allocator. + // + S copied = ss.str(); + assert(copied.get_allocator().resource() == &mr1); + assert(ss.rdbuf()->get_allocator().resource() == &mr1); + assert(copied == initial); + + // [stringbuf.members]/10 doesn't specify the allocator to use, + // but copying the allocator as-if-by moving the string makes sense. + // + S moved = std::move(ss).str(); + assert(moved.get_allocator().resource() == &mr1); + assert(ss.rdbuf()->get_allocator().resource() == &mr1); + assert(moved == initial); + } +} + +int main(int, char**) { + test_soccc_behavior(); + test_allocation_is_pilfered(); + test_no_foreign_allocations(); +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + test_soccc_behavior(); + test_allocation_is_pilfered(); + test_no_foreign_allocations(); +#endif + + return 0; +} diff --git a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.members/str.move.pass.cpp b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.members/str.move.pass.cpp index 546f82166aaef..0bd076af5e9cd 100644 --- a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.members/str.move.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.members/str.move.pass.cpp @@ -37,6 +37,14 @@ static void test() { assert(s.empty()); assert(ss.view().empty()); } + { + std::basic_istringstream ss( + STR("a very long string that exceeds the small string optimization buffer length")); + const CharT* p = ss.view().data(); + std::basic_string s = std::move(ss).str(); + assert(s.data() == p); // the allocation was pilfered + assert(ss.view().empty()); + } } int main(int, char**) { diff --git a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.members/str.allocator_propagation.pass.cpp b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.members/str.allocator_propagation.pass.cpp new file mode 100644 index 0000000000000..a5ee2afab11e9 --- /dev/null +++ b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.members/str.allocator_propagation.pass.cpp @@ -0,0 +1,115 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// TODO: Change to XFAIL once https://github.com/llvm/llvm-project/issues/40340 is fixed +// UNSUPPORTED: availability-pmr-missing + +// This test ensures that we properly propagate allocators from ostringstream's +// inner string object to the new string returned from .str(). +// `str() const&` is specified to preserve the allocator (not copy the string). +// `str() &&` isn't specified, but should preserve the allocator (move the string). + +#include +#include +#include +#include +#include +#include + +#include "make_string.h" +#include "test_allocator.h" +#include "test_macros.h" + +template +void test_soccc_behavior() { + using Alloc = SocccAllocator; + using SS = std::basic_ostringstream, Alloc>; + using S = std::basic_string, Alloc>; + { + SS ss = SS(std::ios_base::out, Alloc(10)); + + // [stringbuf.members]/6 specifies that the allocator is copied, + // not select_on_container_copy_construction'ed. + // + S copied = ss.str(); + assert(copied.get_allocator().count_ == 10); + assert(ss.rdbuf()->get_allocator().count_ == 10); + assert(copied.empty()); + + // sanity-check that SOCCC does in fact work + assert(S(copied).get_allocator().count_ == 11); + + // [stringbuf.members]/10 doesn't specify the allocator to use, + // but copying the allocator as-if-by moving the string makes sense. + // + S moved = std::move(ss).str(); + assert(moved.get_allocator().count_ == 10); + assert(ss.rdbuf()->get_allocator().count_ == 10); + assert(moved.empty()); + } +} + +template +void test_allocation_is_pilfered() { + using SS = std::basic_ostringstream, std::pmr::polymorphic_allocator>; + using S = std::pmr::basic_string; + alignas(void*) char buf[80 * sizeof(CharT)]; + const CharT* initial = + MAKE_CSTRING(CharT, "a very long string that exceeds the small string optimization buffer length"); + { + std::pmr::set_default_resource(std::pmr::null_memory_resource()); + auto mr1 = std::pmr::monotonic_buffer_resource(buf, sizeof(buf), std::pmr::null_memory_resource()); + SS ss = SS(S(initial, &mr1)); + S s = std::move(ss).str(); + assert(s == initial); + } +} + +template +void test_no_foreign_allocations() { + using SS = std::basic_ostringstream, std::pmr::polymorphic_allocator>; + using S = std::pmr::basic_string; + const CharT* initial = + MAKE_CSTRING(CharT, "a very long string that exceeds the small string optimization buffer length"); + { + std::pmr::set_default_resource(std::pmr::null_memory_resource()); + auto mr1 = std::pmr::monotonic_buffer_resource(std::pmr::new_delete_resource()); + auto ss = SS(S(initial, &mr1)); + assert(ss.rdbuf()->get_allocator().resource() == &mr1); + + // [stringbuf.members]/6 specifies that the result of `str() const &` + // does NOT use the default allocator; it uses the original allocator. + // + S copied = ss.str(); + assert(copied.get_allocator().resource() == &mr1); + assert(ss.rdbuf()->get_allocator().resource() == &mr1); + assert(copied == initial); + + // [stringbuf.members]/10 doesn't specify the allocator to use, + // but copying the allocator as-if-by moving the string makes sense. + // + S moved = std::move(ss).str(); + assert(moved.get_allocator().resource() == &mr1); + assert(ss.rdbuf()->get_allocator().resource() == &mr1); + assert(moved == initial); + } +} + +int main(int, char**) { + test_soccc_behavior(); + test_allocation_is_pilfered(); + test_no_foreign_allocations(); +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + test_soccc_behavior(); + test_allocation_is_pilfered(); + test_no_foreign_allocations(); +#endif + + return 0; +} diff --git a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.members/str.move.pass.cpp b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.members/str.move.pass.cpp index 57f2384bae52c..0e1c06f191933 100644 --- a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.members/str.move.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.members/str.move.pass.cpp @@ -37,6 +37,14 @@ static void test() { assert(s.empty()); assert(ss.view().empty()); } + { + std::basic_ostringstream ss( + STR("a very long string that exceeds the small string optimization buffer length")); + const CharT* p = ss.view().data(); + std::basic_string s = std::move(ss).str(); + assert(s.data() == p); // the allocation was pilfered + assert(ss.view().empty()); + } } int main(int, char**) { diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.members/str.move.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.members/str.move.pass.cpp index 0f0f540a9c247..9d75bf938ad75 100644 --- a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.members/str.move.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.members/str.move.pass.cpp @@ -37,6 +37,48 @@ static void test() { assert(s.empty()); assert(buf.view().empty()); } + { + std::basic_stringbuf buf(STR("a very long string that exceeds the small string optimization buffer length")); + const CharT* p = buf.view().data(); + std::basic_string s = std::move(buf).str(); + assert(s.data() == p); // the allocation was pilfered + assert(buf.view().empty()); + } +} + +struct StringBuf : std::stringbuf { + using basic_stringbuf::basic_stringbuf; + void public_setg(int a, int b, int c) { + char* p = eback(); + this->setg(p + a, p + b, p + c); + } +}; + +static void test_altered_sequence_pointers() { + { + auto src = StringBuf("hello world", std::ios_base::in); + src.public_setg(4, 6, 9); + std::stringbuf dest; + dest = std::move(src); + std::string view = std::string(dest.view()); + std::string str = std::move(dest).str(); + assert(view == str); + LIBCPP_ASSERT(str == "o wor"); + assert(dest.str().empty()); + assert(dest.view().empty()); + } + { + auto src = StringBuf("hello world", std::ios_base::in); + src.public_setg(4, 6, 9); + std::stringbuf dest; + dest.swap(src); + std::string view = std::string(dest.view()); + std::string str = std::move(dest).str(); + assert(view == str); + LIBCPP_ASSERT(str == "o wor"); + assert(dest.str().empty()); + assert(dest.view().empty()); + } } int main(int, char**) { @@ -44,5 +86,6 @@ int main(int, char**) { #ifndef TEST_HAS_NO_WIDE_CHARACTERS test(); #endif + test_altered_sequence_pointers(); return 0; } diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.members/str.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.members/str.pass.cpp index 18a2337f6b783..8cd3840b6841f 100644 --- a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.members/str.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.members/str.pass.cpp @@ -14,18 +14,51 @@ // void str(const basic_string& s); #include +#include #include #include "test_macros.h" +struct StringBuf : std::stringbuf { + explicit StringBuf(const char* s, std::ios_base::openmode mode) : basic_stringbuf(s, mode) {} + void public_setg(int a, int b, int c) { + char* p = eback(); + this->setg(p + a, p + b, p + c); + } +}; + +static void test_altered_sequence_pointers() { + { + StringBuf src("hello world", std::ios_base::in); + src.public_setg(4, 6, 9); + std::stringbuf dest; + dest = std::move(src); + std::string str = dest.str(); + assert(5 <= str.size() && str.size() <= 11); + LIBCPP_ASSERT(str == "o wor"); + LIBCPP_ASSERT(dest.str() == "o wor"); + } + { + StringBuf src("hello world", std::ios_base::in); + src.public_setg(4, 6, 9); + std::stringbuf dest; + dest.swap(src); + std::string str = dest.str(); + assert(5 <= str.size() && str.size() <= 11); + LIBCPP_ASSERT(str == "o wor"); + LIBCPP_ASSERT(dest.str() == "o wor"); + } +} + int main(int, char**) { - { - std::stringbuf buf("testing"); - assert(buf.str() == "testing"); - buf.str("another test"); - assert(buf.str() == "another test"); - } + test_altered_sequence_pointers(); + { + std::stringbuf buf("testing"); + assert(buf.str() == "testing"); + buf.str("another test"); + assert(buf.str() == "another test"); + } #ifndef TEST_HAS_NO_WIDE_CHARACTERS { std::wstringbuf buf(L"testing"); diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.members/view.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.members/view.pass.cpp index 4aa2e4ab23510..67ff506bb9dc4 100644 --- a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.members/view.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.members/view.pass.cpp @@ -50,10 +50,38 @@ static void test() { static_assert(std::is_same_v>>); } +struct StringBuf : std::stringbuf { + using basic_stringbuf::basic_stringbuf; + void public_setg(int a, int b, int c) { + char* p = eback(); + this->setg(p + a, p + b, p + c); + } +}; + +static void test_altered_sequence_pointers() { + { + auto src = StringBuf("hello world", std::ios_base::in); + src.public_setg(4, 6, 9); + std::stringbuf dest; + dest = std::move(src); + assert(dest.view() == dest.str()); + LIBCPP_ASSERT(dest.view() == "o wor"); + } + { + auto src = StringBuf("hello world", std::ios_base::in); + src.public_setg(4, 6, 9); + std::stringbuf dest; + dest.swap(src); + assert(dest.view() == dest.str()); + LIBCPP_ASSERT(dest.view() == "o wor"); + } +} + int main(int, char**) { test(); #ifndef TEST_HAS_NO_WIDE_CHARACTERS test(); #endif + test_altered_sequence_pointers(); return 0; } diff --git a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/str.allocator_propagation.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/str.allocator_propagation.pass.cpp new file mode 100644 index 0000000000000..46a9213eaf919 --- /dev/null +++ b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/str.allocator_propagation.pass.cpp @@ -0,0 +1,144 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// TODO: Change to XFAIL once https://github.com/llvm/llvm-project/issues/40340 is fixed +// UNSUPPORTED: availability-pmr-missing + +// This test ensures that we properly propagate allocators from stringstream's +// inner string object to the new string returned from .str(). +// `str() const&` is specified to preserve the allocator (not copy the string). +// `str() &&` isn't specified, but should preserve the allocator (move the string). + +#include +#include +#include +#include +#include +#include +#include + +#include "make_string.h" +#include "test_allocator.h" +#include "test_macros.h" + +template +void test_soccc_behavior() { + using Alloc = SocccAllocator; + using SS = std::basic_stringstream, Alloc>; + using S = std::basic_string, Alloc>; + { + SS ss = SS(std::ios_base::out, Alloc(10)); + + // [stringbuf.members]/6 specifies that the allocator is copied, + // not select_on_container_copy_construction'ed. + // + S copied = ss.str(); + assert(copied.get_allocator().count_ == 10); + assert(ss.rdbuf()->get_allocator().count_ == 10); + assert(copied.empty()); + + // sanity-check that SOCCC does in fact work + assert(S(copied).get_allocator().count_ == 11); + + // [stringbuf.members]/10 doesn't specify the allocator to use, + // but copying the allocator as-if-by moving the string makes sense. + // + S moved = std::move(ss).str(); + assert(moved.get_allocator().count_ == 10); + assert(ss.rdbuf()->get_allocator().count_ == 10); + assert(moved.empty()); + } +} + +template , std::pmr::polymorphic_allocator>> +struct StringBuf : Base { + explicit StringBuf(std::pmr::memory_resource* mr) : Base(std::ios_base::in, mr) {} + void public_setg(int a, int b, int c) { + CharT* p = this->eback(); + assert(this->view().data() == p); + this->setg(p + a, p + b, p + c); + assert(this->eback() == p + a); + assert(this->view().data() == p + a); + } +}; + +template +void test_allocation_is_pilfered() { + using SS = std::basic_stringstream, std::pmr::polymorphic_allocator>; + using S = std::pmr::basic_string; + alignas(void*) char buf[80 * sizeof(CharT)]; + const CharT* initial = + MAKE_CSTRING(CharT, "a very long string that exceeds the small string optimization buffer length"); + { + std::pmr::set_default_resource(std::pmr::null_memory_resource()); + auto mr1 = std::pmr::monotonic_buffer_resource(buf, sizeof(buf), std::pmr::null_memory_resource()); + SS ss = SS(S(initial, &mr1)); + S s = std::move(ss).str(); + assert(s == initial); + } + { + // Try moving-out-of a stringbuf whose view() is not the entire string. + // This is libc++'s behavior; libstdc++ doesn't allow such stringbufs to be created. + // + std::pmr::set_default_resource(std::pmr::null_memory_resource()); + auto mr1 = std::pmr::monotonic_buffer_resource(buf, sizeof(buf), std::pmr::null_memory_resource()); + auto src = StringBuf(&mr1); + src.str(S(initial, &mr1)); + src.public_setg(2, 6, 40); + SS ss(std::ios_base::in, &mr1); + *ss.rdbuf() = std::move(src); + LIBCPP_ASSERT(ss.view() == std::basic_string_view(initial).substr(2, 38)); + S s = std::move(ss).str(); + LIBCPP_ASSERT(s == std::basic_string_view(initial).substr(2, 38)); + } +} + +template +void test_no_foreign_allocations() { + using SS = std::basic_stringstream, std::pmr::polymorphic_allocator>; + using S = std::pmr::basic_string; + const CharT* initial = + MAKE_CSTRING(CharT, "a very long string that exceeds the small string optimization buffer length"); + { + std::pmr::set_default_resource(std::pmr::null_memory_resource()); + auto mr1 = std::pmr::monotonic_buffer_resource(std::pmr::new_delete_resource()); + auto ss = SS(S(initial, &mr1)); + assert(ss.rdbuf()->get_allocator().resource() == &mr1); + + // [stringbuf.members]/6 specifies that the result of `str() const &` + // does NOT use the default allocator; it uses the original allocator. + // + S copied = ss.str(); + assert(copied.get_allocator().resource() == &mr1); + assert(ss.rdbuf()->get_allocator().resource() == &mr1); + assert(copied == initial); + + // [stringbuf.members]/10 doesn't specify the allocator to use, + // but copying the allocator as-if-by moving the string makes sense. + // + S moved = std::move(ss).str(); + assert(moved.get_allocator().resource() == &mr1); + assert(ss.rdbuf()->get_allocator().resource() == &mr1); + assert(moved == initial); + } +} + +int main(int, char**) { + test_soccc_behavior(); + test_allocation_is_pilfered(); + test_no_foreign_allocations(); +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + test_soccc_behavior(); + test_allocation_is_pilfered(); + test_no_foreign_allocations(); +#endif + + return 0; +} diff --git a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/str.move.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/str.move.pass.cpp index 35349c9c288ec..56a0d84fb68ed 100644 --- a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/str.move.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/str.move.pass.cpp @@ -37,6 +37,14 @@ static void test() { assert(s.empty()); assert(ss.view().empty()); } + { + std::basic_stringstream ss( + STR("a very long string that exceeds the small string optimization buffer length")); + const CharT* p = ss.view().data(); + std::basic_string s = std::move(ss).str(); + assert(s.data() == p); // the allocation was pilfered + assert(ss.view().empty()); + } } int main(int, char**) { diff --git a/libcxx/test/support/test_allocator.h b/libcxx/test/support/test_allocator.h index 9330150a83851..3bde73183ab6e 100644 --- a/libcxx/test/support/test_allocator.h +++ b/libcxx/test/support/test_allocator.h @@ -475,4 +475,29 @@ TEST_CONSTEXPR inline bool operator!=(limited_allocator const& LHS, limite return !(LHS == RHS); } +// Track the "provenance" of this allocator instance: how many times was +// select_on_container_copy_construction called in order to produce it? +// +template +struct SocccAllocator { + using value_type = T; + + int count_ = 0; + explicit SocccAllocator(int i) : count_(i) {} + + template + SocccAllocator(const SocccAllocator& a) : count_(a.count_) {} + + T* allocate(std::size_t n) { return std::allocator().allocate(n); } + void deallocate(T* p, std::size_t n) { std::allocator().deallocate(p, n); } + + SocccAllocator select_on_container_copy_construction() const { return SocccAllocator(count_ + 1); } + + bool operator==(const SocccAllocator&) const { return true; } + + using propagate_on_container_copy_assignment = std::false_type; + using propagate_on_container_move_assignment = std::false_type; + using propagate_on_container_swap = std::false_type; +}; + #endif // TEST_ALLOCATOR_H From 52db7e27458f774fa0c6c6a864ce197fa071a230 Mon Sep 17 00:00:00 2001 From: Guray Ozen Date: Tue, 17 Oct 2023 11:46:47 +0200 Subject: [PATCH 318/720] [mlir][nvgpu] Improve `WarpgroupAccumulator` type to simplify IR (#68728) `WarpgroupAccumulator` (or `!nvgpu.warpgroup.accumulator`) is a type that keeps the accumulator matrix that is used by warp-group level matrix multiplication. It is handy to have a special type for that as the matrix is distributed among the threads of the warp-group. However, current transformations requires to create and use multiple `WarpgroupAccumulator` if the shape of GEMM is larger than the supported shape of `wgmma.mma_async` instruction. This makes IR looks dense. This PR improves the transformation of `WarpgroupAccumulator` type in every nvgpu Op that uses it. **Example: Current GEMM in NVGPU-IR** ``` // Init %m1, %m2 = nvgpu.warpgroup.mma.init.accumulator -> !nvgpu.warpgroup.accumulator>, !nvgpu.warpgroup.accumulator> // GEMM %r1, %r2 = nvgpu.warpgroup.mma %descA, %descB, %m1, %m2 {transposeB}: !nvgpu.warpgroup.descriptor>, !nvgpu.warpgroup.descriptor>, !nvgpu.warpgroup.accumulator>, !nvgpu.warpgroup.accumulator> -> !nvgpu.warpgroup.accumulator>, !nvgpu.warpgroup.accumulator> // Epilogue nvgpu.warpgroup.mma.store [%r1, %r2] to %sharedMemoryBuffer : !nvgpu.warpgroup.accumulator>, !nvgpu.warpgroup.accumulator> into memref<128x128xf32,3> ``` **Example: This PR simplifies the IR as below:** ``` // Init %m = nvgpu.warpgroup.mma.init.accumulator -> !nvgpu.warpgroup.accumulator> // GEMM %r1 = nvgpu.warpgroup.mma %descA, %descB, %m1 {transposeB}: !nvgpu.warpgroup.descriptor>, !nvgpu.warpgroup.descriptor>, !nvgpu.warpgroup.accumulator> -> !nvgpu.warpgroup.accumulator> // Epilogue nvgpu.warpgroup.mma.store [%matrixD1, %matrixD2] to %sharedMemoryBuffer : !nvgpu.warpgroup.accumulator>, !nvgpu.warpgroup.accumulator> into memref<128x128xf32,3> ``` --- mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td | 10 +- .../mlir/Dialect/NVGPU/IR/NVGPUDialect.h | 3 + .../Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp | 112 +++++++++++------- mlir/lib/Dialect/NVGPU/IR/NVGPUDialect.cpp | 99 ++++++---------- .../NVGPU/TransformOps/NVGPUTransformOps.cpp | 24 +++- .../Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir | 65 +++++----- mlir/test/Dialect/NVGPU/invalid.mlir | 22 ++-- 7 files changed, 177 insertions(+), 158 deletions(-) diff --git a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td index 79183acfb71b6..fd16376be3669 100644 --- a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td +++ b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td @@ -719,8 +719,8 @@ def NVGPU_WarpgroupMmaOp : NVGPU_Op<"warpgroup.mma"> { DefaultValuedOptionalAttr:$waitGroup, OptionalAttr:$transposeA, OptionalAttr:$transposeB, - Variadic:$matrixC); - let results = (outs Variadic:$matrixD); + NVGPU_WarpgroupAccumulator:$matrixC); + let results = (outs NVGPU_WarpgroupAccumulator:$matrixD); let assemblyFormat = [{ $descriptorA`,` $descriptorB`,` $matrixC attr-dict `:` type($descriptorA) `,` type($descriptorB) `,` type($matrixC) `->` type($matrixD) @@ -739,11 +739,11 @@ def NVGPU_WarpgroupMmaStoreOp : NVGPU_Op<"warpgroup.mma.store"> { Note that, the op must be run with warp group. }]; - let arguments = (ins Variadic:$matrixD, + let arguments = (ins NVGPU_WarpgroupAccumulator:$matrixD, Arg:$dstMemref); let assemblyFormat = [{ - `[` $matrixD `]` `,` $dstMemref attr-dict `:` type($matrixD) `to` type($dstMemref) + $matrixD `,` $dstMemref attr-dict `:` type($matrixD) `to` type($dstMemref) }]; let hasVerifier = 1; } @@ -755,7 +755,7 @@ def NVGPU_WarpgroupMmaInitAccumulatorOp : NVGPU_Op<"warpgroup.mma.init.accumulat This Op generates and initializes the accumulator matrix for `nvgpu.warpgroup.mma` op to perform matrix-multiply-and-accumulate. }]; - let results = (outs Variadic:$matrixC); + let results = (outs NVGPU_WarpgroupAccumulator:$matrixC); let assemblyFormat = "attr-dict `->` type($matrixC)"; let hasVerifier = 1; } diff --git a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUDialect.h b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUDialect.h index 96af26842dafe..e6bba7e608296 100644 --- a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUDialect.h +++ b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUDialect.h @@ -23,6 +23,9 @@ constexpr int kWarpSize = 32; +/// M size of wgmma.mma_async instruction +constexpr int kWgmmaSizeM = 64; + #define GET_ATTRDEF_CLASSES #include "mlir/Dialect/NVGPU/IR/NVGPUAttrDefs.h.inc" diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp index 84f53a4572294..2d43230938526 100644 --- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp +++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp @@ -412,10 +412,28 @@ struct ConvertNVGPUToNVVMPass return converter.convertType(IntegerType::get(type.getContext(), 32)); }); converter.addConversion([&](nvgpu::WarpgroupAccumulatorType type) -> Type { - VectorType vtype = type.getFragmented(); + Type elemType = type.getFragmented().getElementType(); + int64_t sizeM = type.getFragmented().getDimSize(0); + int64_t sizeN = type.getFragmented().getDimSize(1); + + unsigned numMembers; + if (elemType.isF32() || elemType.isInteger(32)) + numMembers = sizeN / 2; + else if (elemType.isF16()) + numMembers = sizeN / 4; + else + llvm_unreachable("unsupported type for warpgroup accumulator"); + + SmallVector innerStructBody; + for (unsigned i = 0; i < numMembers; i++) + innerStructBody.push_back(elemType); + auto innerStructType = + LLVM::LLVMStructType::getLiteral(type.getContext(), innerStructBody); + SmallVector structBody; - for (unsigned i = 0; i < vtype.getDimSize(0); i++) - structBody.push_back(vtype.getElementType()); + for (int i = 0; i < sizeM; i += kWgmmaSizeM) + structBody.push_back(innerStructType); + auto convertedType = LLVM::LLVMStructType::getLiteral(type.getContext(), structBody); return converter.convertType(convertedType); @@ -1186,7 +1204,6 @@ struct NVGPUWarpgroupMmaOpLowering nvgpu::WarpgroupMmaOp op; ImplicitLocOpBuilder b; OpAdaptor adaptor; - const LLVMTypeConverter &typeConverter; // Entire shape of the given Op int64_t totalM, totalN, totalK; @@ -1330,7 +1347,7 @@ struct NVGPUWarpgroupMmaOpLowering /// This function generates a WgmmaMmaAsyncOp using provided GMMA matrix /// descriptors and arranges them based on induction variables: i, j, and k. - Value generateWgmma(int i, int j, int k, Value matrixC, Value matrixD) { + Value generateWgmma(int i, int j, int k, Value matrixC) { LLVM_DEBUG(DBGS() << "\t wgmma." << "m" << wgmmaM << "n" << wgmmaN << "k" << wgmmaK << "(A[" << (iterationM * wgmmaM) << ":" @@ -1359,34 +1376,36 @@ struct NVGPUWarpgroupMmaOpLowering auto overflow = NVVM::MMAIntOverflowAttr::get( op->getContext(), NVVM::MMAIntOverflow::wrapped); - Type resultStructType = typeConverter.convertType(matrixD.getType()); - return b.create( - resultStructType, matrixC, descriptorA, descriptorB, shape, itypeA, + matrixC.getType(), matrixC, descriptorA, descriptorB, shape, itypeA, itypeB, scaleOut, scaleIn, scaleIn, layoutA, layoutB, overflow); } /// Generates multiple wgmma instructions to complete the given GEMM shape - SmallVector generateWgmmaGroup() { - SmallVector wgmmaResults; + Value generateWgmmaGroup() { + Value wgmmaResult = + b.create(adaptor.getMatrixC().getType()); // Perform GEMM + SmallVector wgmmaResults; for (int i = 0; i < iterationM; ++i) { - Value matrixC = adaptor.getMatrixC()[i]; - Value matrixD = op.getMatrixD()[i]; + Value matrixC = b.create(adaptor.getMatrixC(), i); for (int j = 0; j < iterationN; ++j) for (int k = 0; k < iterationK; ++k) - matrixC = generateWgmma(i, j, k, matrixC, matrixD); + matrixC = generateWgmma(i, j, k, matrixC); wgmmaResults.push_back(matrixC); } - - return wgmmaResults; + for (auto [idx, matrix] : llvm::enumerate(wgmmaResults)) { + wgmmaResult = b.create(wgmmaResult.getType(), + wgmmaResult, matrix, idx); + } + return wgmmaResult; } public: WarpgroupGemm(nvgpu::WarpgroupMmaOp op, ImplicitLocOpBuilder &b, - OpAdaptor adaptor, const LLVMTypeConverter &typeConverter) - : op(op), b(b), adaptor(adaptor), typeConverter(typeConverter) { + OpAdaptor adaptor) + : op(op), b(b), adaptor(adaptor) { // Find the entire GEMM Shape totalM = op.getDescriptorA().getType().getTensor().getDimSize(0); totalN = op.getDescriptorB().getType().getTensor().getDimSize(1); @@ -1411,27 +1430,27 @@ struct NVGPUWarpgroupMmaOpLowering /// instructions and group synchronization, as well as waiting /// (WgmmaGroupSyncAlignedOp) for group synchronization /// (WgmmaWaitGroupSyncOp) after the instructions. - SmallVector generateWarpgroupMma() { + Value generateWarpgroupMma() { b.create(); - SmallVector wgmmaResults = generateWgmmaGroup(); + Value wgmmaResult = generateWgmmaGroup(); b.create(); b.create(op.getWaitGroup()); - return wgmmaResults; + return wgmmaResult; } }; - LogicalResult matchAndRewrite(nvgpu::WarpgroupMmaOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { ImplicitLocOpBuilder b(op->getLoc(), rewriter); + // Step 1. Build a helper class - WarpgroupGemm warpgroupGemm(op, b, adaptor, *this->getTypeConverter()); + WarpgroupGemm warpgroupGemm(op, b, adaptor); // Step 2. Get the entire GEMM Shape - SmallVector wgmmaResults = warpgroupGemm.generateWarpgroupMma(); + Value wgmmaResult = warpgroupGemm.generateWarpgroupMma(); // Step 3. Replace fragmented result struct with the op results - rewriter.replaceOp(op, wgmmaResults); + rewriter.replaceOp(op, wgmmaResult); return success(); } }; @@ -1535,10 +1554,13 @@ struct NVGPUWarpgroupMmaStoreOpLowering matchAndRewrite(nvgpu::WarpgroupMmaStoreOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { int offset = 0; - ImplicitLocOpBuilder lb(op->getLoc(), rewriter); - for (Value matrixD : adaptor.getMatrixD()) { - auto structType = matrixD.getType().cast(); - storeFragmentedMatrix(lb, matrixD, op.getDstMemref(), offset); + ImplicitLocOpBuilder b(op->getLoc(), rewriter); + Value matriDValue = adaptor.getMatrixD(); + auto stype = matriDValue.getType().cast(); + for (auto [idx, matrixD] : llvm::enumerate(stype.getBody())) { + auto structType = matrixD.cast(); + Value innerStructValue = b.create(matriDValue, idx); + storeFragmentedMatrix(b, innerStructValue, op.getDstMemref(), offset); offset += structType.getBody().size(); } rewriter.eraseOp(op); @@ -1554,23 +1576,27 @@ struct NVGPUWarpgroupMmaInitAccumulatorOpLowering matchAndRewrite(nvgpu::WarpgroupMmaInitAccumulatorOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { ImplicitLocOpBuilder b(op->getLoc(), rewriter); - SmallVector results; - for (OpResult m : op.getMatrixC()) { - nvgpu::WarpgroupAccumulatorType mType = - m.getType().cast(); - Type stype = getTypeConverter()->convertType(mType); - Value undefStruct = b.create(stype); - Type elemType = mType.getFragmented().getElementType(); - int64_t elemSize = mType.getFragmented().getDimSize(0); - Value zero = - b.create(elemType, rewriter.getZeroAttr(elemType)); - for (int64_t i = 0; i < elemSize; ++i) { - undefStruct = b.create(stype, undefStruct, zero, - ArrayRef({i})); + LLVM::LLVMStructType structType = + getTypeConverter() + ->convertType(op.getMatrixC().getType()) + .cast(); + Type elemType = structType.getBody() + .front() + .cast() + .getBody() + .front(); + Value zero = b.create(elemType, b.getZeroAttr(elemType)); + Value structValue = b.create(structType); + for (auto [idx, s] : llvm::enumerate(structType.getBody())) { + auto innerStructType = s.cast(); + int ii = idx; + Value innerStructValue = b.create(structValue, ii); + for (unsigned i = 0; i < innerStructType.getBody().size(); ++i) { + innerStructValue = b.create( + innerStructType, innerStructValue, zero, ArrayRef({i})); } - results.push_back(undefStruct); } - rewriter.replaceOp(op, results); + rewriter.replaceOp(op, structValue); return success(); } }; diff --git a/mlir/lib/Dialect/NVGPU/IR/NVGPUDialect.cpp b/mlir/lib/Dialect/NVGPU/IR/NVGPUDialect.cpp index fe71eae899cd6..f5b02fe1b5155 100644 --- a/mlir/lib/Dialect/NVGPU/IR/NVGPUDialect.cpp +++ b/mlir/lib/Dialect/NVGPU/IR/NVGPUDialect.cpp @@ -435,7 +435,11 @@ LogicalResult isAllowedWGMMADataType(Type typeD, Type typeA, Type typeB) { return failure(); } -LogicalResult isAllowedSizeM(int sizeM) { return success(sizeM == 64); } +LogicalResult isAllowedSizeM(int sizeM) { + if (sizeM % kWgmmaSizeM) + return failure(); + return success(); +} LogicalResult isAllowedSizeN(int sizeN, Type typeA) { SmallVector allowedN = {8, 16, 24, 32, 40, 48, 56, 64, @@ -458,35 +462,16 @@ LogicalResult isAllowedSizeN(int sizeN, Type typeA) { LogicalResult WarpgroupMmaOp::verify() { if (getTransposeA() && !getTransposeB()) - return emitOpError() << "supports non-transpose A (Row Major) " - "and transpose B (Column Major) for the time being"; + return emitOpError() + << "supports non-transpose A (Row Major) " + "and transpose B (Column Major) for the time being "; MemRefType matrixA = getDescriptorA().getType().getTensor(); MemRefType matrixB = getDescriptorB().getType().getTensor(); - VectorType matrixC = getMatrixC() - .front() - .getType() - .cast() - .getFragmented(); - VectorType matrixD = getMatrixD() - .front() - .getType() - .cast() - .getFragmented(); - unsigned sizeAcc = getMatrixC().size(); - - if (getMatrixC().size() != getMatrixD().size()) - return emitOpError() << "number of matrix C and matrix D must be the same"; - - if (llvm::all_of(getMatrixC(), - [&](Value rhs) { return rhs.getType() == matrixC; })) { - return emitOpError() - << "types of all operands in matrix C must be the same"; - } - if (llvm::all_of(getMatrixD(), - [&](Value rhs) { return rhs.getType() == matrixC; })) { - return emitOpError() - << "types of all operands in matrix D must be the same as matrix C"; - } + VectorType matrixC = getMatrixC().getType().getFragmented(); + VectorType matrixD = getMatrixD().getType().getFragmented(); + + if (matrixC != matrixD) + return emitOpError() << "type of matrix C and matrix D must be the same"; if (matrixA.getRank() != 2 || matrixB.getRank() != 2 || matrixC.getRank() != 2 || matrixD.getRank() != 2) { @@ -498,7 +483,7 @@ LogicalResult WarpgroupMmaOp::verify() { return emitOpError() << "2nd dim matrix-A (" << matrixA.getShape()[1] << ")!= 1st dim matrix-B (" << matrixB.getShape()[0] << " )"; - if (matrixA.getShape()[0] != (matrixC.getShape()[0] * sizeAcc)) + if (matrixA.getShape()[0] != matrixC.getShape()[0]) return emitOpError() << "1st dim matrix-A ( " << matrixA.getShape()[0] << " )!= 1st dim matrix-C ( " << matrixC.getShape()[0] << " )"; @@ -534,29 +519,16 @@ LogicalResult WarpgroupMmaOp::verify() { LogicalResult WarpgroupMmaStoreOp::verify() { MemRefType dstMemrefType = getDstMemref().getType(); - VectorType firstVtype = getMatrixD() - .front() - .getType() - .cast() - .getFragmented(); - - int64_t totalFirstDimension = 0; - for (Value result : getMatrixD()) { - VectorType vtype = - result.getType().cast().getFragmented(); - if (vtype != firstVtype) - return emitOpError() << "all fragmented types must be the same"; - // Limitation - if (!vtype.getElementType().isF32()) { - return emitOpError() - << "hit a limitation: only f32 results for the time being"; - } - totalFirstDimension += vtype.getDimSize(0); + VectorType vtype = getMatrixD().getType().getFragmented(); + + // Limitation + if (!vtype.getElementType().isF32()) { + return emitOpError() + << "hit a limitation: only f32 results for the time being"; } - if (totalFirstDimension != dstMemrefType.getDimSize(0) || - firstVtype.getDimSize(1) != dstMemrefType.getDimSize(1)) { - return emitOpError() << "results [" << totalFirstDimension << "][" - << firstVtype.getDimSize(1) + if (vtype.getDimSize(0) != dstMemrefType.getDimSize(0) || + vtype.getDimSize(1) != dstMemrefType.getDimSize(1)) { + return emitOpError() << "results [" << vtype << "][" << vtype.getDimSize(1) << "] values. However, destination memref[" << dstMemrefType.getDimSize(0) << "][" << dstMemrefType.getDimSize(1) @@ -570,19 +542,18 @@ LogicalResult WarpgroupMmaStoreOp::verify() { //===----------------------------------------------------------------------===// LogicalResult WarpgroupMmaInitAccumulatorOp::verify() { - for (OpResult matrix : getMatrixC()) { - VectorType vectorType = matrix.getType() - .cast() - .getFragmented(); - // Check [M][N] shape - if (failed(isAllowedSizeM(vectorType.getDimSize(0))) || - failed(isAllowedSizeN(vectorType.getDimSize(1), - vectorType.getElementType()))) { - return emitOpError() << "has type " << vectorType - << ". It does not fit into warp-group " - "level (wgmma) matrix multiplication instruction " - "(or not supported yet)"; - } + + nvgpu::WarpgroupAccumulatorType accType = getMatrixC().getType(); + int64_t sizeM = accType.getFragmented().getDimSize(0); + int64_t sizeN = accType.getFragmented().getDimSize(1); + Type elemType = accType.getFragmented().getElementType(); + + if (failed(isAllowedSizeM(sizeM)) || + failed(isAllowedSizeN(sizeN, elemType))) { + return emitOpError() << "has type " << accType.getFragmented() + << ". It does not fit into warp-group " + "level (wgmma) matrix multiplication instruction " + "(or not supported yet)"; } return success(); } diff --git a/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp b/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp index 94d7d565ff1a9..eaaadbbea4d0a 100644 --- a/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp +++ b/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp @@ -62,10 +62,28 @@ void transform::ApplyNVGPUToNVVMConversionPatternsOp::populatePatterns( }); llvmTypeConverter.addConversion( [&](nvgpu::WarpgroupAccumulatorType type) -> Type { - VectorType vtype = type.getFragmented(); + Type elemType = type.getFragmented().getElementType(); + int64_t sizeM = type.getFragmented().getDimSize(0); + int64_t sizeN = type.getFragmented().getDimSize(1); + + unsigned numMembers; + if (elemType.isF32() || elemType.isInteger(32)) + numMembers = sizeN / 2; + else if (elemType.isF16()) + numMembers = sizeN / 4; + else + llvm_unreachable("unsupported type for warpgroup accumulator"); + + SmallVector innerStructBody; + for (unsigned i = 0; i < numMembers; i++) + innerStructBody.push_back(elemType); + auto innerStructType = LLVM::LLVMStructType::getLiteral( + type.getContext(), innerStructBody); + SmallVector structBody; - for (unsigned i = 0; i < vtype.getDimSize(0); i++) - structBody.push_back(vtype.getElementType()); + for (int i = 0; i < sizeM; i += kWgmmaSizeM) + structBody.push_back(innerStructType); + auto convertedType = LLVM::LLVMStructType::getLiteral(type.getContext(), structBody); return llvmTypeConverter.convertType(convertedType); diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir index ca030575e5e96..bf660e2683158 100644 --- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir +++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir @@ -713,18 +713,18 @@ func.func @create_wgmma_descriptor(%tensorMap : !tensorMap) -> !nvgpu.warpgroup. } // CHECK-LABEL: @warpgroup_mma_128_128_64( -// CHECK-SAME: %[[arg0:[a-zA-Z0-9_]+]]: !nvgpu.warpgroup.descriptor>, %[[arg1:[a-zA-Z0-9_]+]]: !nvgpu.warpgroup.descriptor>, %[[arg2:[a-zA-Z0-9_]+]]: !nvgpu.warpgroup.accumulator>, %[[arg3:[a-zA-Z0-9_]+]]: !nvgpu.warpgroup.accumulator>) +// CHECK-SAME: %[[arg0:[a-zA-Z0-9_]+]]: !nvgpu.warpgroup.descriptor>, %[[arg1:[a-zA-Z0-9_]+]]: !nvgpu.warpgroup.descriptor>, %[[arg2:[a-zA-Z0-9_]+]]: !nvgpu.warpgroup.accumulator>) func.func @warpgroup_mma_128_128_64( %descA: !nvgpu.warpgroup.descriptor>, %descB: !nvgpu.warpgroup.descriptor>, - %acc1: !nvgpu.warpgroup.accumulator< fragmented = vector<64x128xf32>>, - %acc2: !nvgpu.warpgroup.accumulator< fragmented = vector<64x128xf32>>) + %acc: !nvgpu.warpgroup.accumulator>) { // CHECK: %[[S0:.+]] = builtin.unrealized_conversion_cast %[[arg0]] : !nvgpu.warpgroup.descriptor> to i64 // CHECK: %[[S1:.+]] = builtin.unrealized_conversion_cast %[[arg1]] : !nvgpu.warpgroup.descriptor> to i64 -// CHECK: %[[S2:.+]] = builtin.unrealized_conversion_cast %[[arg2]] : !nvgpu.warpgroup.accumulator> to !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> -// CHECK: %[[S3:.+]] = builtin.unrealized_conversion_cast %[[arg3]] : !nvgpu.warpgroup.accumulator> to !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> +// CHECK: %[[ARG:.+]] = builtin.unrealized_conversion_cast %[[arg2]] : !nvgpu.warpgroup.accumulator> to !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> // CHECK: nvvm.wgmma.fence.aligned +// CHECK: %[[UD:.+]] = llvm.mlir.undef : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> +// CHECK: %[[S2:.+]] = llvm.extractvalue %[[ARG]][0] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> // CHECK: %[[S4:.+]] = nvvm.wgmma.mma_async %[[S0]], %[[S1]], , D[%[[S2]], , ], A[, , ], B[, , ] : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> // CHECK: %[[S5:.+]] = llvm.mlir.constant(2 : i32) : i64 // CHECK: %[[S6:.+]] = llvm.add %[[S0]], %[[S5]] : i64 @@ -741,6 +741,7 @@ func.func @warpgroup_mma_128_128_64( // CHECK: %[[S17:.+]] = llvm.mlir.constant(384 : i32) : i64 // CHECK: %[[S18:.+]] = llvm.add %[[S1]], %[[S17]] : i64 // CHECK: %[[S19:.+]] = nvvm.wgmma.mma_async %[[S16]], %[[S18]], , D[%[[S14]], , ], A[, , ], B[, , ] : !llvm.struct +// CHECK: %[[S3:.+]] = llvm.extractvalue %[[ARG]][1] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> // CHECK: %[[S21:.+]] = llvm.mlir.constant(512 : i32) : i64 // CHECK: %[[S22:.+]] = llvm.add %[[S0]], %[[S21]] : i64 // CHECK: %[[S23:.+]] = nvvm.wgmma.mma_async %[[S22]], %[[S1]], , D[%[[S3]], , ], A[, , ], B[, , ] : !llvm.struct @@ -759,27 +760,26 @@ func.func @warpgroup_mma_128_128_64( // CHECK: %[[S36:.+]] = llvm.mlir.constant(384 : i32) : i64 // CHECK: %[[S37:.+]] = llvm.add %[[S1]], %[[S36]] : i64 // CHECK: %[[S38:.+]] = nvvm.wgmma.mma_async %[[S35]], %[[S37]], , D[%[[S33]], , ], A[, , ], B[, , ] : !llvm.struct +// CHECK: %[[S40:.+]] = llvm.insertvalue %[[S19]], %[[UD]][0] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> +// CHECK: %[[S41:.+]] = llvm.insertvalue %[[S38]], %[[S40]][1] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> // CHECK: nvvm.wgmma.commit.group.sync.aligned // CHECK: nvvm.wgmma.wait.group.sync.aligned 1 - %wgmmaResult, %wgmmaResult2 = nvgpu.warpgroup.mma %descA, %descB, %acc1, %acc2 {transposeB}: + %wgmmaResult = nvgpu.warpgroup.mma %descA, %descB, %acc {transposeB}: !nvgpu.warpgroup.descriptor>, !nvgpu.warpgroup.descriptor>, - !nvgpu.warpgroup.accumulator< fragmented = vector<64x128xf32>>, - !nvgpu.warpgroup.accumulator< fragmented = vector<64x128xf32>> + !nvgpu.warpgroup.accumulator> -> - !nvgpu.warpgroup.accumulator< fragmented = vector<64x128xf32>>, - !nvgpu.warpgroup.accumulator< fragmented = vector<64x128xf32>> + !nvgpu.warpgroup.accumulator> return } // CHECK-LABEL: @warpgroup_mma_store( -// CHECK-SAME: %[[arg0:[a-zA-Z0-9_]+]]: !nvgpu.warpgroup.accumulator>, %[[arg1:[a-zA-Z0-9_]+]]: !nvgpu.warpgroup.accumulator>, %[[arg2:[a-zA-Z0-9_]+]]: memref<128x128xf32, 3>) +// CHECK-SAME: %[[arg0:[a-zA-Z0-9_]+]]: !nvgpu.warpgroup.accumulator>, %[[arg2:[a-zA-Z0-9_]+]]: memref<128x128xf32, 3>) func.func @warpgroup_mma_store( - %result1 : !nvgpu.warpgroup.accumulator< fragmented = vector<64x128xf32>>, - %result2 : !nvgpu.warpgroup.accumulator< fragmented = vector<64x128xf32>>, + %result : !nvgpu.warpgroup.accumulator>, %matrixD: memref<128x128xf32,3>) { -// CHECK: %[[S0:.+]] = builtin.unrealized_conversion_cast %[[arg0]] : !nvgpu.warpgroup.accumulator> to !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> -// CHECK: %[[S1:.+]] = builtin.unrealized_conversion_cast %[[arg1]] : !nvgpu.warpgroup.accumulator> to !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> +// CHECK: %[[S0:.+]] = builtin.unrealized_conversion_cast %[[arg0]] : !nvgpu.warpgroup.accumulator> to !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> +// CHECK: %[[EX1:.+]] = llvm.extractvalue %[[S0]][0] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> // CHECK: %[[S6:.+]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[S5:.+]] = llvm.mlir.constant(2 : i32) : i32 // CHECK: %[[S2:.+]] = llvm.mlir.constant(4 : i32) : i32 @@ -807,8 +807,8 @@ func.func @warpgroup_mma_store( // CHECK: %[[S23:.+]] = arith.index_cast %[[S21]] : i32 to index // CHECK: %[[S24:.+]] = llvm.add %[[S21]], %[[S6]] : i32 // CHECK: %[[S25:.+]] = arith.index_cast %[[S24]] : i32 to index -// CHECK: %[[S26:.+]] = llvm.extractvalue %[[S0]][0] : !llvm.struct -// CHECK: %[[S27:.+]] = llvm.extractvalue %[[S0]][1] : !llvm.struct +// CHECK: %[[S26:.+]] = llvm.extractvalue %[[EX1]][0] : !llvm.struct +// CHECK: %[[S27:.+]] = llvm.extractvalue %[[EX1]][1] : !llvm.struct // CHECK: memref.store %[[S26]], %[[arg2]][%[[S22]], %[[S23]]] : memref<128x128xf32, 3> // CHECK: memref.store %[[S27]], %[[arg2]][%[[S22]], %[[S25]]] : memref<128x128xf32, 3> @@ -821,8 +821,8 @@ func.func @warpgroup_mma_store( // CHECK: %[[S32:.+]] = arith.index_cast %[[S30]] : i32 to index // CHECK: %[[S33:.+]] = llvm.add %[[S30]], %[[S6]] : i32 // CHECK: %[[S34:.+]] = arith.index_cast %[[S33]] : i32 to index -// CHECK: %[[S35:.+]] = llvm.extractvalue %[[S0]][4] : !llvm.struct< -// CHECK: %[[S36:.+]] = llvm.extractvalue %[[S0]][5] : !llvm.struct< +// CHECK: %[[S35:.+]] = llvm.extractvalue %[[EX1]][4] : !llvm.struct< +// CHECK: %[[S36:.+]] = llvm.extractvalue %[[EX1]][5] : !llvm.struct< // CHECK: memref.store %[[S35]], %[[arg2]][%[[S31]], %[[S32]]] : memref<128x128xf32, 3> // CHECK: memref.store %[[S36]], %[[arg2]][%[[S31]], %[[S34]]] : memref<128x128xf32, 3> @@ -835,8 +835,8 @@ func.func @warpgroup_mma_store( // CHECK: %[[S41:.+]] = arith.index_cast %[[S39]] : i32 to index // CHECK: %[[S42:.+]] = llvm.add %[[S39]], %[[S6]] : i32 // CHECK: %[[S43:.+]] = arith.index_cast %[[S42]] : i32 to index -// CHECK: %[[S44:.+]] = llvm.extractvalue %[[S0]][8] : !llvm.struct< -// CHECK: %[[S45:.+]] = llvm.extractvalue %[[S0]][9] : !llvm.struct< +// CHECK: %[[S44:.+]] = llvm.extractvalue %[[EX1]][8] : !llvm.struct< +// CHECK: %[[S45:.+]] = llvm.extractvalue %[[EX1]][9] : !llvm.struct< // CHECK: memref.store %[[S44]], %[[arg2]][%[[S40]], %[[S41]]] : memref<128x128xf32, 3> // CHECK: memref.store %[[S45]], %[[arg2]][%[[S40]], %[[S43]]] : memref<128x128xf32, 3> @@ -849,8 +849,8 @@ func.func @warpgroup_mma_store( // CHECK: %[[S50:.+]] = arith.index_cast %[[S48]] : i32 to index // CHECK: %[[S51:.+]] = llvm.add %[[S48]], %[[S6]] : i32 // CHECK: %[[S52:.+]] = arith.index_cast %[[S51]] : i32 to index -// CHECK: %[[S53:.+]] = llvm.extractvalue %[[S0]][12] : !llvm.struct< -// CHECK: %[[S54:.+]] = llvm.extractvalue %[[S0]][13] : !llvm.struct< +// CHECK: %[[S53:.+]] = llvm.extractvalue %[[EX1]][12] : !llvm.struct< +// CHECK: %[[S54:.+]] = llvm.extractvalue %[[EX1]][13] : !llvm.struct< // CHECK: memref.store %[[S53]], %[[arg2]][%[[S49]], %[[S50]]] : memref<128x128xf32, 3> // CHECK: memref.store %[[S54]], %[[arg2]][%[[S49]], %[[S52]]] : memref<128x128xf32, 3> @@ -860,7 +860,7 @@ func.func @warpgroup_mma_store( // CHECK: %[[c2:.+]] = llvm.mlir.constant(2 : i32) : i32 // ### Store {d64, d65} of each thread ### - +// CHECK: %[[EX2:.+]] = llvm.extractvalue %[[S0]][1] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> // CHECK: %[[S315:.+]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[S312:.+]] = llvm.mlir.constant(2 : i32) : i32 // CHECK: %[[S311:.+]] = llvm.mlir.constant(4 : i32) : i32 @@ -887,24 +887,24 @@ func.func @warpgroup_mma_store( // CHECK: %[[S334:.+]] = arith.index_cast %[[S332]] : i32 to index // CHECK: %[[S335:.+]] = llvm.add %[[S332]], %[[S315]] : i32 // CHECK: %[[S336:.+]] = arith.index_cast %[[S335]] : i32 to index -// CHECK: %[[S337:.+]] = llvm.extractvalue %[[S1]][0] -// CHECK: %[[S338:.+]] = llvm.extractvalue %[[S1]][1] +// CHECK: %[[S337:.+]] = llvm.extractvalue %[[EX2]][0] +// CHECK: %[[S338:.+]] = llvm.extractvalue %[[EX2]][1] // CHECK: memref.store %[[S337]], %[[arg2]][%[[S333]], %[[S334]]] : memref<128x128xf32, 3> // CHECK: memref.store %[[S338]], %[[arg2]][%[[S333]], %[[S336]]] : memref<128x128xf32, 3> // Pattern continues similarly 31x times until {... d126, d127} - nvgpu.warpgroup.mma.store [%result1, %result2], %matrixD : - !nvgpu.warpgroup.accumulator< fragmented = vector<64x128xf32>>, - !nvgpu.warpgroup.accumulator< fragmented = vector<64x128xf32>> + nvgpu.warpgroup.mma.store %result, %matrixD : + !nvgpu.warpgroup.accumulator< fragmented = vector<128x128xf32>> to memref<128x128xf32,3> return } func.func @warpgroup_mma_init() { - //CHECK: %[[S0:.+]] = llvm.mlir.undef : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> //CHECK: %[[S1:.+]] = llvm.mlir.constant(0.000000e+00 : f32) : f3 - //CHECK: %[[S2:.+]] = llvm.insertvalue %[[S1]], %[[S0]][0] : !llvm.struct + //CHECK: %[[S0:.+]] = llvm.mlir.undef : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> + //CHECK: %[[EX:.+]] = llvm.extractvalue %[[S0]][0] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> + //CHECK: %[[S2:.+]] = llvm.insertvalue %[[S1]], %[[EX]][0] : !llvm.struct //CHECK: %[[S3:.+]] = llvm.insertvalue %[[S1]], %[[S2]][1] : !llvm.struct //CHECK: %[[S4:.+]] = llvm.insertvalue %[[S1]], %[[S3]][2] : !llvm.struct //CHECK: %[[S5:.+]] = llvm.insertvalue %[[S1]], %[[S4]][3] : !llvm.struct @@ -968,10 +968,11 @@ func.func @warpgroup_mma_init() { //CHECK: %[[S63:.+]] = llvm.insertvalue %[[S1]], %[[S62]][61] : !llvm.struct //CHECK: %[[S64:.+]] = llvm.insertvalue %[[S1]], %[[S63]][62] : !llvm.struct //CHECK: %[[S65:.+]] = llvm.insertvalue %[[S1]], %[[S64]][63] : !llvm.struct - %matrixC = nvgpu.warpgroup.mma.init.accumulator -> !nvgpu.warpgroup.accumulator< fragmented = vector<64x128xf32>> + %matrixC = nvgpu.warpgroup.mma.init.accumulator -> !nvgpu.warpgroup.accumulator< fragmented = vector<128x128xf32>> return } + transform.sequence failures(propagate) { ^bb1(%arg1: !transform.any_op): %0 = transform.structured.match ops{["func.func"]} in %arg1 diff --git a/mlir/test/Dialect/NVGPU/invalid.mlir b/mlir/test/Dialect/NVGPU/invalid.mlir index 66652070ec15f..41b29fa74b125 100644 --- a/mlir/test/Dialect/NVGPU/invalid.mlir +++ b/mlir/test/Dialect/NVGPU/invalid.mlir @@ -224,13 +224,13 @@ func.func @async_cp_size_invalid_f64( // ----- -!tResult = !nvgpu.warpgroup.accumulator> +!tResult = !nvgpu.warpgroup.accumulator> !tDescA = !nvgpu.warpgroup.descriptor> !tDescB = !nvgpu.warpgroup.descriptor> -func.func @warpgroup_mma_wrong_input(%descA: !tDescA, %descB: !tDescB, %acc1: !tResult, %acc2: !tResult) { +func.func @warpgroup_mma_wrong_input(%descA: !tDescA, %descB: !tDescB, %acc: !tResult) { // expected-error @+1 {{'nvgpu.warpgroup.mma' op 2nd dim matrix-B ( 121 ) != 2nd dim matrix-C ( 128 )}} - %0:2 = nvgpu.warpgroup.mma %descA, %descB, %acc1, %acc1: !tDescA, !tDescB, !tResult, !tResult -> !tResult, !tResult + %0 = nvgpu.warpgroup.mma %descA, %descB, %acc: !tDescA, !tDescB, !tResult -> !tResult return } @@ -239,29 +239,29 @@ func.func @warpgroup_mma_wrong_input(%descA: !tDescA, %descB: !tDescB, %acc1: !t !tResult = !nvgpu.warpgroup.accumulator> !tDescA = !nvgpu.warpgroup.descriptor> !tDescB = !nvgpu.warpgroup.descriptor> -func.func @warpgroup_mma_wrong_input(%descA: !tDescA, %descB: !tDescB, %acc1: !tResult, %acc2: !tResult) { +func.func @warpgroup_mma_wrong_input(%descA: !tDescA, %descB: !tDescB, %acc: !tResult) { // expected-error @+1 {{'nvgpu.warpgroup.mma' op has matrices A, B, C and D, they must be 2 dimensional}} - %0:2 = nvgpu.warpgroup.mma %descA, %descB, %acc1, %acc1: !tDescA, !tDescB, !tResult, !tResult -> !tResult, !tResult + %0 = nvgpu.warpgroup.mma %descA, %descB, %acc: !tDescA, !tDescB, !tResult -> !tResult return } // ----- -!tResult = !nvgpu.warpgroup.accumulator> +!tResult = !nvgpu.warpgroup.accumulator> !tDescA = !nvgpu.warpgroup.descriptor> !tDescB = !nvgpu.warpgroup.descriptor> -func.func @warpgroup_mma_wrong_input(%descA: !tDescA, %descB: !tDescB, %acc1: !tResult, %acc2: !tResult) { +func.func @warpgroup_mma_wrong_input(%descA: !tDescA, %descB: !tDescB, %acc: !tResult) { // expected-error @+1 {{'nvgpu.warpgroup.mma' op 'f32' += 'f16' * 'f32', it is not supported.}} - %0:2 = nvgpu.warpgroup.mma %descA, %descB, %acc1, %acc1: !tDescA, !tDescB, !tResult, !tResult -> !tResult, !tResult + %0 = nvgpu.warpgroup.mma %descA, %descB, %acc: !tDescA, !tDescB, !tResult -> !tResult return } // ----- -!tResult = !nvgpu.warpgroup.accumulator> +!tResult = !nvgpu.warpgroup.accumulator> !tDescA = !nvgpu.warpgroup.descriptor> !tDescB = !nvgpu.warpgroup.descriptor> -func.func @warpgroup_mma_wrong_input(%descA: !tDescA, %descB: !tDescB, %acc1: !tResult, %acc2: !tResult) { +func.func @warpgroup_mma_wrong_input(%descA: !tDescA, %descB: !tDescB, %acc: !tResult) { // expected-error @+1 {{'nvgpu.warpgroup.mma' op 2nd dim matrix-B ( 512 ) != 2nd dim matrix-C ( 128 )}} - %0:2 = nvgpu.warpgroup.mma %descA, %descB, %acc1, %acc1: !tDescA, !tDescB, !tResult, !tResult -> !tResult, !tResult + %0 = nvgpu.warpgroup.mma %descA, %descB, %acc: !tDescA, !tDescB, !tResult -> !tResult return } From bea3684944c0d7962cd53ab77aad756cfee76b7c Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 17 Oct 2023 11:30:14 +0100 Subject: [PATCH 319/720] [AArch64] Allow only LSL to be folded into addressing mode (#69235) There was an error in decoding shift type, which permitted shift types other than LSL to be (incorrectly) folded into the addressing mode of a load/store instruction. --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 5 +- .../GlobalISel/sink-and-fold-illegal-shift.ll | 17 ++++ .../AArch64/sink-and-fold-illegal-shift.mir | 95 +++++++++++++++++++ 3 files changed, 116 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/sink-and-fold-illegal-shift.ll create mode 100644 llvm/test/CodeGen/AArch64/sink-and-fold-illegal-shift.mir diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index e03a94de007c9..8f0e272a6fac7 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2978,7 +2978,10 @@ bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, // Don't fold the add if the result would be slower, unless optimising for // size. - int64_t Shift = AddrI.getOperand(3).getImm(); + unsigned Shift = static_cast(AddrI.getOperand(3).getImm()); + if (AArch64_AM::getShiftType(Shift) != AArch64_AM::ShiftExtendType::LSL) + return false; + Shift = AArch64_AM::getShiftValue(Shift); if (!OptSize) { if ((Shift != 2 && Shift != 3) || !Subtarget.hasAddrLSLFast()) return false; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/sink-and-fold-illegal-shift.ll b/llvm/test/CodeGen/AArch64/GlobalISel/sink-and-fold-illegal-shift.ll new file mode 100644 index 0000000000000..b9892fc31bedb --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/sink-and-fold-illegal-shift.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -global-isel --aarch64-enable-sink-fold=true < %s | FileCheck %s + +target triple = "aarch64-linux" + +; Test a non-LSL shift cannot be folded into the addressing mode. +define void @f(ptr %p, i64 %i) optsize { +; CHECK-LABEL: f: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, x1, asr #32 +; CHECK-NEXT: strb wzr, [x8] +; CHECK-NEXT: ret + %d = ashr i64 %i, 32 + %a = getelementptr i8, ptr %p, i64 %d + store i8 0, ptr %a + ret void +} diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold-illegal-shift.mir b/llvm/test/CodeGen/AArch64/sink-and-fold-illegal-shift.mir new file mode 100644 index 0000000000000..d2f6a3ab1aeeb --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sink-and-fold-illegal-shift.mir @@ -0,0 +1,95 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc --run-pass=machine-sink --aarch64-enable-sink-fold=true %s -o - | FileCheck %s +--- | + source_filename = "../llvm/test/CodeGen/AArch64/GlobalISel/sink-and-fold-illegal-shift.ll" + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-linux" + + define void @f(ptr %p, i64 %i) #0 { + %d = ashr i64 %i, 32 + %a = getelementptr i8, ptr %p, i64 %d + store i8 0, ptr %a, align 1 + ret void + } + + attributes #0 = { optsize } + +... +--- +name: f +alignment: 4 +exposesReturnsTwice: false +legalized: true +regBankSelected: true +selected: true +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: gpr64, preferred-register: '' } + - { id: 1, class: gpr64, preferred-register: '' } + - { id: 2, class: gpr, preferred-register: '' } + - { id: 3, class: gpr, preferred-register: '' } + - { id: 4, class: gpr64common, preferred-register: '' } + - { id: 5, class: _, preferred-register: '' } + - { id: 6, class: gpr, preferred-register: '' } + - { id: 7, class: gpr64, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '' } + - { reg: '$x1', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $x0, $x1 + + ; CHECK-LABEL: name: f + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[ADDXrs:%[0-9]+]]:gpr64common = ADDXrs [[COPY]], [[COPY1]], 160 + ; CHECK-NEXT: STRBBui $wzr, [[ADDXrs]], 0 :: (store (s8) into %ir.a) + ; CHECK-NEXT: RET_ReallyLR + %0:gpr64 = COPY $x0 + %1:gpr64 = COPY $x1 + %4:gpr64common = ADDXrs %0, %1, 160 + STRBBui $wzr, %4, 0 :: (store (s8) into %ir.a) + RET_ReallyLR + +... From 22e3bf4eaf6cbbd387a3789e7ee082434e62d072 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ingo=20M=C3=BCller?= Date: Tue, 17 Oct 2023 12:32:16 +0200 Subject: [PATCH 320/720] [mlir][transform] Fix new interpreter and library preloading passes. (#69190) This PR fixes the two recently added passes from #68661, which were non-functional and untested. In particular: * The passes did not declare their dependent dialects, so they could not run at all in the most simple cases. * The mechanism of loading the library module in the initialization of the intepreter pass is broken by design (but, fortunately, also not necessary). This is because the initialization of all passes happens before the execution of any other pass, so the "preload library" pass has not run yet at the time the interpreter pass gets initialized. Instead, the library is now loaded every time the interpreter pass is run. This should not be exceedingly expensive, since it only consists of looking up the library in the dialect. Also, this removes the library module from the pass state, making it possible in the future to preload libraries in several passes. * The PR adds tests for the two passes, which were completely untested previously. --- .../Dialect/Transform/Transforms/Passes.td | 4 +++- .../Transform/Transforms/InterpreterPass.cpp | 15 +++------------ .../Transform/interpreter-entry-point.mlir | 17 +++++++++++++++++ mlir/test/Dialect/Transform/interpreter.mlir | 17 +++++++++++++++++ .../Dialect/Transform/preload-library.mlir | 19 +++++++++++++++++++ 5 files changed, 59 insertions(+), 13 deletions(-) create mode 100644 mlir/test/Dialect/Transform/interpreter-entry-point.mlir create mode 100644 mlir/test/Dialect/Transform/interpreter.mlir create mode 100644 mlir/test/Dialect/Transform/preload-library.mlir diff --git a/mlir/include/mlir/Dialect/Transform/Transforms/Passes.td b/mlir/include/mlir/Dialect/Transform/Transforms/Passes.td index c900fee76b814..286f69bc52486 100644 --- a/mlir/include/mlir/Dialect/Transform/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Transform/Transforms/Passes.td @@ -51,8 +51,9 @@ def PreloadLibraryPass : Pass<"transform-preload-library"> { Warning: Only a single such pass should exist for a given MLIR context. This is a temporary solution until a resource-based solution is available. - TODO: investigate using a resource blob if some ownership mode allows it. }]; + // TODO: investigate using a resource blob if some ownership mode allows it. + let dependentDialects = ["::mlir::transform::TransformDialect"]; let options = [ ListOption<"transformLibraryPaths", "transform-library-paths", "std::string", "Optional paths to files with modules that should be merged into the " @@ -67,6 +68,7 @@ def InterpreterPass : Pass<"transform-interpreter"> { sequence transformation specified by the provided name (defaults to `__transform_main`). }]; + let dependentDialects = ["::mlir::transform::TransformDialect"]; let options = [ Option<"entryPoint", "entry-point", "std::string", /*default=*/[{"__transform_main"}], diff --git a/mlir/lib/Dialect/Transform/Transforms/InterpreterPass.cpp b/mlir/lib/Dialect/Transform/Transforms/InterpreterPass.cpp index f473d5aa728c5..3ec51d88729a0 100644 --- a/mlir/lib/Dialect/Transform/Transforms/InterpreterPass.cpp +++ b/mlir/lib/Dialect/Transform/Transforms/InterpreterPass.cpp @@ -25,13 +25,10 @@ class InterpreterPass public: using Base::Base; - LogicalResult initialize(MLIRContext *context) override { - // TODO: investigate using a resource blob if some ownership mode allows it. - transformModule = transform::detail::getPreloadedTransformModule(context); - return success(); - } - void runOnOperation() override { + MLIRContext *context = &getContext(); + ModuleOp transformModule = + transform::detail::getPreloadedTransformModule(context); if (failed(transform::applyTransformNamedSequence( getOperation(), transformModule, options.enableExpensiveChecks(true), entryPoint))) @@ -41,11 +38,5 @@ class InterpreterPass private: /// Transform interpreter options. transform::TransformOptions options; - - /// The separate transform module to be used for transformations, shared - /// across multiple instances of the pass if it is applied in parallel to - /// avoid potentially expensive cloning. MUST NOT be modified after the pass - /// has been initialized. - ModuleOp transformModule; }; } // namespace diff --git a/mlir/test/Dialect/Transform/interpreter-entry-point.mlir b/mlir/test/Dialect/Transform/interpreter-entry-point.mlir new file mode 100644 index 0000000000000..ccd9bef3d506d --- /dev/null +++ b/mlir/test/Dialect/Transform/interpreter-entry-point.mlir @@ -0,0 +1,17 @@ +// RUN: mlir-opt %s -transform-interpreter=entry-point=entry_point \ +// RUN: -split-input-file -verify-diagnostics + +module attributes { transform.with_named_sequence } { + transform.named_sequence @entry_point(!transform.any_op {transform.readonly}) { + ^bb0(%arg0: !transform.any_op): + // expected-remark @below {{applying transformation}} + transform.test_transform_op + transform.yield + } + + transform.named_sequence @__transform_main(!transform.any_op {transform.readonly}) { + ^bb0(%arg0: !transform.any_op): + transform.test_transform_op // Note: does not yield remark. + transform.yield + } +} diff --git a/mlir/test/Dialect/Transform/interpreter.mlir b/mlir/test/Dialect/Transform/interpreter.mlir new file mode 100644 index 0000000000000..bb41420bef4d6 --- /dev/null +++ b/mlir/test/Dialect/Transform/interpreter.mlir @@ -0,0 +1,17 @@ +// RUN: mlir-opt %s -transform-interpreter \ +// RUN: -split-input-file -verify-diagnostics + +module attributes { transform.with_named_sequence } { + transform.named_sequence @__transform_main(!transform.any_op {transform.readonly}) { + ^bb0(%arg0: !transform.any_op): + // expected-remark @below {{applying transformation}} + transform.test_transform_op + transform.yield + } + + transform.named_sequence @entry_point(!transform.any_op {transform.readonly}) { + ^bb0(%arg0: !transform.any_op): + transform.test_transform_op // Note: does not yield remark. + transform.yield + } +} diff --git a/mlir/test/Dialect/Transform/preload-library.mlir b/mlir/test/Dialect/Transform/preload-library.mlir new file mode 100644 index 0000000000000..61d22252dc61d --- /dev/null +++ b/mlir/test/Dialect/Transform/preload-library.mlir @@ -0,0 +1,19 @@ +// RUN: mlir-opt %s \ +// RUN: -transform-preload-library=transform-library-paths=%p%{fs-sep}test-interpreter-library \ +// RUN: -transform-interpreter=entry-point=private_helper \ +// RUN: -split-input-file -verify-diagnostics + +// expected-remark @below {{message}} +module {} + +// ----- + +// Note: no remark here since local entry point takes precedence. +module attributes { transform.with_named_sequence } { + transform.named_sequence @private_helper(!transform.any_op {transform.readonly}) { + ^bb0(%arg0: !transform.any_op): + // expected-remark @below {{applying transformation}} + transform.test_transform_op + transform.yield + } +} From be9bc542186f92be2e644d2a3d506a3c9325ca3c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 17 Oct 2023 11:31:45 +0100 Subject: [PATCH 321/720] [X86] vselect.ll - add vXi8 select-by-constant tests with repeated/broadcastable shuffle mask --- llvm/test/CodeGen/X86/vselect.ll | 77 +++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/X86/vselect.ll b/llvm/test/CodeGen/X86/vselect.ll index 0c57f497aa8aa..784d32bde1b5b 100644 --- a/llvm/test/CodeGen/X86/vselect.ll +++ b/llvm/test/CodeGen/X86/vselect.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 ; Verify that we don't emit packed vector shifts instructions if the ; condition used by the vector select is a vector of constants. @@ -425,6 +425,79 @@ define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) { ret <2 x i64> %1 } +define <16 x i8> @test26(<16 x i8> %a, <16 x i8> %b) { +; SSE2-LABEL: test26: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; SSE2-NEXT: andps %xmm2, %xmm1 +; SSE2-NEXT: andnps %xmm0, %xmm2 +; SSE2-NEXT: orps %xmm1, %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test26: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: test26: +; AVX1: # %bb.0: +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test26: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; AVX2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq + %1 = select <16 x i1> , <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %1 +} + +define <32 x i8> @test27(<32 x i8> %a, <32 x i8> %b) { +; SSE2-LABEL: test27: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps {{.*#+}} xmm4 = [255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255] +; SSE2-NEXT: movaps %xmm4, %xmm5 +; SSE2-NEXT: andnps %xmm2, %xmm5 +; SSE2-NEXT: andps %xmm4, %xmm0 +; SSE2-NEXT: orps %xmm5, %xmm0 +; SSE2-NEXT: andps %xmm4, %xmm1 +; SSE2-NEXT: andnps %xmm3, %xmm4 +; SSE2-NEXT: orps %xmm4, %xmm1 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test27: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm4 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255] +; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm2 +; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm3 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: movdqa %xmm3, %xmm1 +; SSE41-NEXT: retq +; +; AVX1-LABEL: test27: +; AVX1: # %bb.0: +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255] +; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test27: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255] +; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq + %1 = select <32 x i1> , <32 x i8> %a, <32 x i8> %b + ret <32 x i8> %1 +} + define <4 x float> @select_of_shuffles_0(<2 x float> %a0, <2 x float> %b0, <2 x float> %a1, <2 x float> %b1) { ; SSE-LABEL: select_of_shuffles_0: ; SSE: # %bb.0: From dd5d65adb6413122a5ba1ed04c5c2c0b4951b76c Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Tue, 17 Oct 2023 11:41:36 +0100 Subject: [PATCH 322/720] [HIP][Clang][CodeGen] Add CodeGen support for `hipstdpar` This patch adds the CodeGen changes needed for enabling HIP parallel algorithm offload on AMDGPU targets. This change relaxes restrictions on what gets emitted on the device path, when compiling in `hipstdpar` mode: 1. Unless a function is explicitly marked `__host__`, it will get emitted, whereas before only `__device__` and `__global__` functions would be emitted; 2. Unsupported builtins are ignored as opposed to being marked as an error, as the decision on their validity is deferred to the `hipstdpar` specific code selection pass; 3. We add a `hipstdpar` specific pass to the opt pipeline, independent of optimisation level: - When compiling for the host, iff the user requested it via the `--hipstdpar-interpose-alloc` flag, we add a pass which replaces canonical allocation / deallocation functions with accelerator aware equivalents. A test to validate that unannotated functions get correctly emitted is added as well. Reviewed by: yaxunl, efriedma Differential Revision: https://reviews.llvm.org/D155850 --- clang/lib/CodeGen/BackendUtil.cpp | 5 +++ clang/lib/CodeGen/CGBuiltin.cpp | 26 +++++++++++++ clang/lib/CodeGen/CGStmt.cpp | 37 +++++++++++++++++-- clang/lib/CodeGen/CMakeLists.txt | 1 + clang/lib/CodeGen/CodeGenFunction.cpp | 12 ++++-- clang/lib/CodeGen/CodeGenModule.cpp | 7 +++- .../unannotated-functions-get-emitted.cpp | 19 ++++++++++ .../test/CodeGenHipStdPar/unsupported-ASM.cpp | 10 +++++ .../CodeGenHipStdPar/unsupported-builtins.cpp | 8 ++++ 9 files changed, 116 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGenHipStdPar/unannotated-functions-get-emitted.cpp create mode 100644 clang/test/CodeGenHipStdPar/unsupported-ASM.cpp create mode 100644 clang/test/CodeGenHipStdPar/unsupported-builtins.cpp diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index d066819871dfd..70accce456d3c 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -78,6 +78,7 @@ #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/JumpThreading.h" +#include "llvm/Transforms/HipStdPar/HipStdPar.h" #include "llvm/Transforms/Utils/Debugify.h" #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -1108,6 +1109,10 @@ void EmitAssemblyHelper::RunOptimizationPipeline( return; } + if (LangOpts.HIPStdPar && !LangOpts.CUDAIsDevice && + LangOpts.HIPStdParInterposeAlloc) + MPM.addPass(HipStdParAllocationInterpositionPass()); + // Now that we have all of the passes ready, run them. { PrettyStackTraceString CrashInfo("Optimizer"); diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 4d86e8a769846..43ace3e11e610 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2327,6 +2327,19 @@ static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, return nullptr; } +static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, + const FunctionDecl *FD) { + auto Name = FD->getNameAsString() + "__hipstdpar_unsupported"; + auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD); + auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy); + + SmallVector Args; + for (auto &&FormalTy : FnTy->params()) + Args.push_back(llvm::PoisonValue::get(FormalTy)); + + return RValue::get(CGF->Builder.CreateCall(UBF, Args)); +} + RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { @@ -5765,6 +5778,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); } + if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice) + return EmitHipStdParUnsupportedBuiltin(this, FD); + ErrorUnsupported(E, "builtin function"); // Unknown builtin, for now just dump it out and return undef. @@ -5775,6 +5791,16 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch) { + // When compiling in HipStdPar mode we have to be conservative in rejecting + // target specific features in the FE, and defer the possible error to the + // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is + // referenced by an accelerator executable function, we emit an error. + // Returning nullptr here leads to the builtin being handled in + // EmitStdParUnsupportedBuiltin. + if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice && + Arch != CGF->getTarget().getTriple().getArch()) + return nullptr; + switch (Arch) { case llvm::Triple::arm: case llvm::Triple::armeb: diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 6674aa2409a59..c719df1bfa050 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -2420,6 +2420,24 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S, } } +static void EmitHipStdParUnsupportedAsm(CodeGenFunction *CGF, + const AsmStmt &S) { + constexpr auto Name = "__ASM__hipstdpar_unsupported"; + + StringRef Asm; + if (auto GCCAsm = dyn_cast(&S)) + Asm = GCCAsm->getAsmString()->getString(); + + auto &Ctx = CGF->CGM.getLLVMContext(); + + auto StrTy = llvm::ConstantDataArray::getString(Ctx, Asm); + auto FnTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx), + {StrTy->getType()}, false); + auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy); + + CGF->Builder.CreateCall(UBF, {StrTy}); +} + void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Pop all cleanup blocks at the end of the asm statement. CodeGenFunction::RunCleanupsScope Cleanups(*this); @@ -2431,27 +2449,38 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { SmallVector OutputConstraintInfos; SmallVector InputConstraintInfos; - for (unsigned i = 0, e = S.getNumOutputs(); i != e; i++) { + bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice; + bool IsValidTargetAsm = true; + for (unsigned i = 0, e = S.getNumOutputs(); i != e && IsValidTargetAsm; i++) { StringRef Name; if (const GCCAsmStmt *GAS = dyn_cast(&S)) Name = GAS->getOutputName(i); TargetInfo::ConstraintInfo Info(S.getOutputConstraint(i), Name); bool IsValid = getTarget().validateOutputConstraint(Info); (void)IsValid; - assert(IsValid && "Failed to parse output constraint"); + if (IsHipStdPar && !IsValid) + IsValidTargetAsm = false; + else + assert(IsValid && "Failed to parse output constraint"); OutputConstraintInfos.push_back(Info); } - for (unsigned i = 0, e = S.getNumInputs(); i != e; i++) { + for (unsigned i = 0, e = S.getNumInputs(); i != e && IsValidTargetAsm; i++) { StringRef Name; if (const GCCAsmStmt *GAS = dyn_cast(&S)) Name = GAS->getInputName(i); TargetInfo::ConstraintInfo Info(S.getInputConstraint(i), Name); bool IsValid = getTarget().validateInputConstraint(OutputConstraintInfos, Info); - assert(IsValid && "Failed to parse input constraint"); (void)IsValid; + if (IsHipStdPar && !IsValid) + IsValidTargetAsm = false; + else + assert(IsValid && "Failed to parse input constraint"); InputConstraintInfos.push_back(Info); } + if (!IsValidTargetAsm) + return EmitHipStdParUnsupportedAsm(this, S); + std::string Constraints; std::vector ResultRegDests; diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt index 1debeb6d9cce9..9fab15abe6404 100644 --- a/clang/lib/CodeGen/CMakeLists.txt +++ b/clang/lib/CodeGen/CMakeLists.txt @@ -11,6 +11,7 @@ set(LLVM_LINK_COMPONENTS Extensions FrontendHLSL FrontendOpenMP + HIPStdPar IPO IRPrinter IRReader diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 42777194cc76d..3682a2c6ae859 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -2594,10 +2594,15 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc, std::string MissingFeature; llvm::StringMap CallerFeatureMap; CGM.getContext().getFunctionFeatureMap(CallerFeatureMap, FD); + // When compiling in HipStdPar mode we have to be conservative in rejecting + // target specific features in the FE, and defer the possible error to the + // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is + // referenced by an accelerator executable function, we emit an error. + bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice; if (BuiltinID) { StringRef FeatureList(CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID)); if (!Builtin::evaluateRequiredTargetFeatures( - FeatureList, CallerFeatureMap)) { + FeatureList, CallerFeatureMap) && !IsHipStdPar) { CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature) << TargetDecl->getDeclName() << FeatureList; @@ -2630,7 +2635,7 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc, return false; } return true; - })) + }) && !IsHipStdPar) CGM.getDiags().Report(Loc, diag::err_function_needs_feature) << FD->getDeclName() << TargetDecl->getDeclName() << MissingFeature; } else if (!FD->isMultiVersion() && FD->hasAttr()) { @@ -2639,7 +2644,8 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc, for (const auto &F : CalleeFeatureMap) { if (F.getValue() && (!CallerFeatureMap.lookup(F.getKey()) || - !CallerFeatureMap.find(F.getKey())->getValue())) + !CallerFeatureMap.find(F.getKey())->getValue()) && + !IsHipStdPar) CGM.getDiags().Report(Loc, diag::err_function_needs_feature) << FD->getDeclName() << TargetDecl->getDeclName() << F.getKey(); } diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 754377bed7f7e..b1a6683a66bd0 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -3526,7 +3526,7 @@ ConstantAddress CodeGenModule::GetAddrOfTemplateParamObject( GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); Emitter.finalize(GV); - return ConstantAddress(GV, GV->getValueType(), Alignment); + return ConstantAddress(GV, GV->getValueType(), Alignment); } ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) { @@ -3585,7 +3585,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { !Global->hasAttr() && !Global->hasAttr() && !Global->getType()->isCUDADeviceBuiltinSurfaceType() && - !Global->getType()->isCUDADeviceBuiltinTextureType()) + !Global->getType()->isCUDADeviceBuiltinTextureType() && + !(LangOpts.HIPStdPar && + isa(Global) && + !Global->hasAttr())) return; } else { // We need to emit host-side 'shadows' for all global diff --git a/clang/test/CodeGenHipStdPar/unannotated-functions-get-emitted.cpp b/clang/test/CodeGenHipStdPar/unannotated-functions-get-emitted.cpp new file mode 100644 index 0000000000000..1fa37ea6c342f --- /dev/null +++ b/clang/test/CodeGenHipStdPar/unannotated-functions-get-emitted.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -x hip -emit-llvm -fcuda-is-device \ +// RUN: -o - %s | FileCheck --check-prefix=NO-HIPSTDPAR-DEV %s + +// RUN: %clang_cc1 --hipstdpar -emit-llvm -fcuda-is-device \ +// RUN: -o - %s | FileCheck --check-prefix=HIPSTDPAR-DEV %s + +#define __device__ __attribute__((device)) + +// NO-HIPSTDPAR-DEV-NOT: define {{.*}} void @foo({{.*}}) +// HIPSTDPAR-DEV: define {{.*}} void @foo({{.*}}) +extern "C" void foo(float *a, float b) { + *a = b; +} + +// NO-HIPSTDPAR-DEV: define {{.*}} void @bar({{.*}}) +// HIPSTDPAR-DEV: define {{.*}} void @bar({{.*}}) +extern "C" __device__ void bar(float *a, float b) { + *a = b; +} diff --git a/clang/test/CodeGenHipStdPar/unsupported-ASM.cpp b/clang/test/CodeGenHipStdPar/unsupported-ASM.cpp new file mode 100644 index 0000000000000..485bf916c899f --- /dev/null +++ b/clang/test/CodeGenHipStdPar/unsupported-ASM.cpp @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu \ +// RUN: --hipstdpar -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s + +#define __global__ __attribute__((global)) + +__global__ void foo(int i) { + asm ("addl %2, %1; seto %b0" : "=q" (i), "+g" (i) : "r" (i)); +} + +// CHECK: declare void @__ASM__hipstdpar_unsupported([{{.*}}]) diff --git a/clang/test/CodeGenHipStdPar/unsupported-builtins.cpp b/clang/test/CodeGenHipStdPar/unsupported-builtins.cpp new file mode 100644 index 0000000000000..02355eca2672e --- /dev/null +++ b/clang/test/CodeGenHipStdPar/unsupported-builtins.cpp @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu \ +// RUN: --hipstdpar -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s + +#define __global__ __attribute__((global)) + +__global__ void foo() { return __builtin_ia32_pause(); } + +// CHECK: declare void @__builtin_ia32_pause__hipstdpar_unsupported() From 63389326f529fd3e3019f8f8afae662e765a3b72 Mon Sep 17 00:00:00 2001 From: Guray Ozen Date: Tue, 17 Oct 2023 12:42:36 +0200 Subject: [PATCH 323/720] [mlir][nvvm] Support predicates in `BasicPtxBuilder` (#67102) This PR enhances `BasicPtxBuilder` to support predicates in PTX code generation. The `BasicPtxBuilder` interface was initially introduced for generating PTX code automatically for Ops that aren't supported by LLVM core. Predicates, which are typically not supported in LLVM core, are now supported using the same mechanism. In PTX programming, instructions can be guarded by predicates as shown below:. Here `@p` is a predicate register and guard the execution of the instruction. ``` @p ptx.code op1, op2, op3 ``` This PR introduces the `getPredicate` function in the `BasicPtxBuilder` interface to set an optional predicate. When a predicate is provided, the instruction is generated with predicate and guarded, otherwise, predicate is not genearted. Note that the predicate value must always appear as the last argument on the Op definition. Additionally, this PR implements predicate usage for the following ops: - mbarrier.init - mbarrier.init.shared - mbarrier.arrive.expect_tx - mbarrier.arrive.expect_tx.shared - cp.async.bulk.tensor.shared.cluster.global - cp.async.bulk.tensor.global.shared.cta See for more detail in PTX programing model https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#ptx-instructions --- .../LLVMIR/BasicPtxBuilderInterface.td | 18 ++++ mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 93 ++++++++++++------- .../Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp | 14 +-- mlir/lib/Conversion/NVVMToLLVM/NVVMToLLVM.cpp | 1 + .../LLVMIR/IR/BasicPtxBuilderInterface.cpp | 8 ++ .../Conversion/NVVMToLLVM/nvvm-to-llvm.mlir | 90 +++++++++++------- 6 files changed, 154 insertions(+), 70 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.td b/mlir/include/mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.td index 6f27c8eb47175..df5a2448bd779 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.td +++ b/mlir/include/mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.td @@ -22,6 +22,8 @@ include "mlir/Dialect/LLVMIR/LLVMOpBase.td" // Basic PTX Builder Interface //===----------------------------------------------------------------------===// +def PtxPredicate : Optional; + def BasicPtxBuilderOpInterface : OpInterface<"BasicPtxBuilderInterface"> { let description = [{ This interface is used to generate inline assembly with PTX for basic @@ -62,6 +64,22 @@ def BasicPtxBuilderOpInterface : OpInterface<"BasicPtxBuilderInterface"> { }]; let cppNamespace = "::mlir::NVVM"; let methods = [ + InterfaceMethod< + /*desc=*/[{ + Optional function for setting a predicate, which + always returns a `PtxPredicate` value of type i1. If no predicate is + provided, the instruction is unguarded; otherwise, it's guarded by the + predicate value. The `PtxPredicate` value must always be the last argument. + The provided PTX code by `getPtx` should not include the predicate usage. + The interface automatically handles predicate usage in the generated + PTX code when necessary. + }], + /*retType=*/"std::optional<::mlir::Value>", + /*methodName=*/"getPredicate", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/"return {};" + >, InterfaceMethod< /*desc=*/[{ Returns PTX assembly with operand number. }], /*retType=*/"std::string", diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 0a5d1f274a315..d550fe1f33140 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -74,6 +74,12 @@ class NVVM_Op traits = []> : LLVM_OpBase { } +/// Base class that defines BasicPtxBuilderOpInterface. +class NVVM_PTXBuilder_Op traits = [DeclareOpInterfaceMethods]> : + LLVM_OpBase { +} + //===----------------------------------------------------------------------===// // NVVM attribute definitions //===----------------------------------------------------------------------===// @@ -206,21 +212,31 @@ def NVVM_ReduxOp : //===----------------------------------------------------------------------===// /// mbarrier.init instruction with generic pointer type -def NVVM_MBarrierInitOp : NVVM_Op<"mbarrier.init">, - Arguments<(ins LLVM_i64ptr_any:$addr, I32:$count)> { +def NVVM_MBarrierInitOp : NVVM_PTXBuilder_Op<"mbarrier.init">, + Arguments<(ins LLVM_i64ptr_any:$addr, I32:$count, PtxPredicate:$predicate)> { string llvmBuilder = [{ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_init, {$addr, $count}); }]; - let assemblyFormat = "$addr `,` $count attr-dict `:` type(operands)"; + let assemblyFormat = "$addr `,` $count (`,` `predicate` `=` $predicate^)? attr-dict `:` type(operands)"; + let extraClassDeclaration = [{ + bool hasIntrinsic() { if(getPredicate()) return false; return true; } + }]; + let extraClassDefinition = [{ + std::string $cppClass::getPtx() { return std::string("mbarrier.init.b64 [%0], %1;"); } + }]; } /// mbarrier.init instruction with shared pointer type -def NVVM_MBarrierInitSharedOp : NVVM_Op<"mbarrier.init.shared">, - Arguments<(ins LLVM_i64ptr_shared:$addr, I32:$count)> { +def NVVM_MBarrierInitSharedOp : NVVM_PTXBuilder_Op<"mbarrier.init.shared">, + Arguments<(ins LLVM_i64ptr_shared:$addr, I32:$count, PtxPredicate:$predicate)> { string llvmBuilder = [{ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_init_shared, {$addr, $count}); }]; - let assemblyFormat = "$addr `,` $count attr-dict `:` type(operands)"; + let assemblyFormat = "$addr `,` $count (`,` `predicate` `=` $predicate^)? attr-dict `:` type(operands)"; + let extraClassDeclaration = "bool hasIntrinsic() { return !getPredicate(); }"; + let extraClassDefinition = [{ + std::string $cppClass::getPtx() { return std::string("mbarrier.init.shared.b64 [%0], %1;"); } + }]; } def NVVM_MBarrierInvalOp : NVVM_Op<"mbarrier.inval">, @@ -275,26 +291,23 @@ def NVVM_MBarrierArriveNocompleteSharedOp : NVVM_Op<"mbarrier.arrive.nocomplete. let assemblyFormat = "$addr `,` $count attr-dict `:` type(operands) `->` type($res)"; } -def NVVM_MBarrierArriveExpectTxOp : NVVM_Op<"mbarrier.arrive.expect_tx", - [DeclareOpInterfaceMethods]>, - Arguments<(ins LLVM_i64ptr_any:$addr, I32:$txcount)> { - let assemblyFormat = "$addr `,` $txcount attr-dict `:` type(operands)"; +def NVVM_MBarrierArriveExpectTxOp : NVVM_PTXBuilder_Op<"mbarrier.arrive.expect_tx">, + Arguments<(ins LLVM_i64ptr_any:$addr, I32:$txcount, PtxPredicate:$predicate)> { + let assemblyFormat = "$addr `,` $txcount (`,` `predicate` `=` $predicate^)? attr-dict `:` type(operands)"; let extraClassDefinition = [{ std::string $cppClass::getPtx() { return std::string("mbarrier.arrive.expect_tx.b64 _, [%0], %1;"); } }]; } -def NVVM_MBarrierArriveExpectTxSharedOp : NVVM_Op<"mbarrier.arrive.expect_tx.shared", - [DeclareOpInterfaceMethods]>, - Arguments<(ins LLVM_i64ptr_shared:$addr, I32:$txcount)> { - let assemblyFormat = "$addr `,` $txcount attr-dict `:` type(operands)"; +def NVVM_MBarrierArriveExpectTxSharedOp : NVVM_PTXBuilder_Op<"mbarrier.arrive.expect_tx.shared">, + Arguments<(ins LLVM_i64ptr_shared:$addr, I32:$txcount, PtxPredicate:$predicate)> { + let assemblyFormat = "$addr `,` $txcount (`,` `predicate` `=` $predicate^)? attr-dict `:` type(operands)"; let extraClassDefinition = [{ std::string $cppClass::getPtx() { return std::string("mbarrier.arrive.expect_tx.shared.b64 _, [%0], %1;"); } }]; } -def NVVM_MBarrierTryWaitParityOp : NVVM_Op<"mbarrier.try_wait.parity", - [DeclareOpInterfaceMethods]>, +def NVVM_MBarrierTryWaitParityOp : NVVM_PTXBuilder_Op<"mbarrier.try_wait.parity">, Arguments<(ins LLVM_i64ptr_any:$addr, I32:$phase, I32:$ticks)> { let assemblyFormat = "$addr `,` $phase `,` $ticks attr-dict `:` type(operands)"; let extraClassDefinition = [{ @@ -313,8 +326,7 @@ def NVVM_MBarrierTryWaitParityOp : NVVM_Op<"mbarrier.try_wait.parity", }]; } -def NVVM_MBarrierTryWaitParitySharedOp : NVVM_Op<"mbarrier.try_wait.parity.shared", - [DeclareOpInterfaceMethods]>, +def NVVM_MBarrierTryWaitParitySharedOp : NVVM_PTXBuilder_Op<"mbarrier.try_wait.parity.shared">, Arguments<(ins LLVM_i64ptr_shared:$addr, I32:$phase, I32:$ticks)> { let assemblyFormat = "$addr `,` $phase `,` $ticks attr-dict `:` type(operands)"; let extraClassDefinition = [{ @@ -488,7 +500,7 @@ def LoadCacheModifierKind : I32EnumAttr<"LoadCacheModifierKind", def LoadCacheModifierAttr : EnumAttr; -def NVVM_CpAsyncOp : NVVM_Op<"cp.async.shared.global", [DeclareOpInterfaceMethods]>, +def NVVM_CpAsyncOp : NVVM_PTXBuilder_Op<"cp.async.shared.global">, Arguments<(ins LLVM_i8Ptr_shared:$dst, LLVM_i8Ptr_global:$src, I32Attr:$size, @@ -1359,12 +1371,24 @@ def NVVM_MmaOp : NVVM_Op<"mma.sync", [AttrSizedOperandSegments]> { // NVVM TMA Ops //===----------------------------------------------------------------------===// -def NVVM_CpAsyncBulkTensorGlobalToSharedClusterOp : NVVM_Op<"cp.async.bulk.tensor.shared.cluster.global", [DeclareOpInterfaceMethods]>, +def NVVM_CpAsyncBulkTensorGlobalToSharedClusterOp : + NVVM_Op<"cp.async.bulk.tensor.shared.cluster.global", + [DeclareOpInterfaceMethods, + AttrSizedOperandSegments]>, Arguments<(ins LLVM_i64ptr_shared:$dstMem, LLVM_i64ptr_any:$tmaDescriptor, LLVM_i64ptr_shared:$mbar, - Variadic:$coordinates)> { - let assemblyFormat = "$dstMem `,` $tmaDescriptor `,` $mbar `,` `box` `[`$coordinates `]` attr-dict `:` type(operands)"; + Variadic:$coordinates, + PtxPredicate:$predicate)> { + let assemblyFormat = [{ + $dstMem `,` + $tmaDescriptor `,` + $mbar `,` + `box` `[`$coordinates `]` + (`,` `predicate` `=` $predicate^)? + attr-dict `:` type(operands) + }]; + let extraClassDefinition = [{ std::string $cppClass::getPtx() { int dim = getCoordinates().size(); @@ -1382,11 +1406,21 @@ def NVVM_CpAsyncBulkTensorGlobalToSharedClusterOp : NVVM_Op<"cp.async.bulk.tenso let hasVerifier = 1; } -def NVVM_CpAsyncBulkTensorSharedCTAToGlobalOp : NVVM_Op<"cp.async.bulk.tensor.global.shared.cta", [DeclareOpInterfaceMethods]>, +def NVVM_CpAsyncBulkTensorSharedCTAToGlobalOp : + NVVM_Op<"cp.async.bulk.tensor.global.shared.cta", + [DeclareOpInterfaceMethods, + AttrSizedOperandSegments]>, Arguments<(ins LLVM_i64ptr_any:$tmaDescriptor, LLVM_i64ptr_shared:$srcMem, - Variadic:$coordinates)> { - let assemblyFormat = "$tmaDescriptor `,` $srcMem `,` `box` `[`$coordinates `]` attr-dict `:` type(operands)"; + Variadic:$coordinates, + PtxPredicate:$predicate)> { + let assemblyFormat = [{ + $tmaDescriptor `,` + $srcMem `,` + `box` `[`$coordinates `]` + (`,` `predicate` `=` $predicate^)? + attr-dict `:` type(operands) + }]; let extraClassDefinition = [{ std::string $cppClass::getPtx() { int dim = getCoordinates().size(); @@ -1408,8 +1442,7 @@ def NVVM_CpAsyncBulkTensorSharedCTAToGlobalOp : NVVM_Op<"cp.async.bulk.tensor.gl // NVVM Wgmma Ops //===----------------------------------------------------------------------===// -def NVVM_WgmmaFenceAlignedOp : NVVM_Op<"wgmma.fence.aligned", - [DeclareOpInterfaceMethods]> { +def NVVM_WgmmaFenceAlignedOp : NVVM_PTXBuilder_Op<"wgmma.fence.aligned"> { let arguments = (ins); let description = [{ Enforce an ordering of register accesses between warpgroup level matrix @@ -1423,8 +1456,7 @@ def NVVM_WgmmaFenceAlignedOp : NVVM_Op<"wgmma.fence.aligned", }]; } -def NVVM_WgmmaGroupSyncAlignedOp : NVVM_Op<"wgmma.commit.group.sync.aligned", - [DeclareOpInterfaceMethods]>, +def NVVM_WgmmaGroupSyncAlignedOp : NVVM_PTXBuilder_Op<"wgmma.commit.group.sync.aligned">, Arguments<(ins )> { let assemblyFormat = "attr-dict"; let description = [{ @@ -1437,8 +1469,7 @@ def NVVM_WgmmaGroupSyncAlignedOp : NVVM_Op<"wgmma.commit.group.sync.aligned", }]; } -def NVVM_WgmmaWaitGroupSyncOp : NVVM_Op<"wgmma.wait.group.sync.aligned", - [DeclareOpInterfaceMethods]>{ +def NVVM_WgmmaWaitGroupSyncOp : NVVM_PTXBuilder_Op<"wgmma.wait.group.sync.aligned">{ let arguments = (ins I32Attr:$group); let assemblyFormat = "attr-dict $group"; let description = [{ diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp index 2d43230938526..00baf7b3c7415 100644 --- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp +++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include #define DEBUG_TYPE "nvgpu-to-nvvm" #define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ") @@ -830,9 +831,10 @@ struct NVGPUMBarrierInitLowering Value count = truncToI32(b, adaptor.getCount()); if (isMbarrierShared(mbarrierType)) { rewriter.replaceOpWithNewOp(op, barrier, - count); + count, Value()); } else { - rewriter.replaceOpWithNewOp(op, barrier, count); + rewriter.replaceOpWithNewOp(op, barrier, count, + Value()); } return success(); } @@ -927,12 +929,12 @@ struct NVGPUMBarrierArriveExpectTxLowering if (isMbarrierShared(op.getBarriers().getType())) { rewriter.replaceOpWithNewOp( - op, barrier, txcount); + op, barrier, txcount, Value()); return success(); } - rewriter.replaceOpWithNewOp(op, barrier, - txcount); + rewriter.replaceOpWithNewOp( + op, barrier, txcount, Value()); return success(); } }; @@ -983,7 +985,7 @@ struct NVGPUTmaAsyncLoadOpLowering } rewriter.replaceOpWithNewOp( - op, dest, adaptor.getTensorMapDescriptor(), barrier, coords); + op, dest, adaptor.getTensorMapDescriptor(), barrier, coords, Value()); return success(); } }; diff --git a/mlir/lib/Conversion/NVVMToLLVM/NVVMToLLVM.cpp b/mlir/lib/Conversion/NVVMToLLVM/NVVMToLLVM.cpp index fa518cf33428b..d1d68e3c9c518 100644 --- a/mlir/lib/Conversion/NVVMToLLVM/NVVMToLLVM.cpp +++ b/mlir/lib/Conversion/NVVMToLLVM/NVVMToLLVM.cpp @@ -41,6 +41,7 @@ using namespace mlir; using namespace NVVM; namespace { + struct PtxLowering : public OpInterfaceRewritePattern { using OpInterfaceRewritePattern< diff --git a/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp b/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp index 121504fc20c01..f3b674fdb5050 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp @@ -123,6 +123,14 @@ LLVM::InlineAsmOp PtxBuilder::build() { std::string ptxInstruction = interfaceOp.getPtx(); + // Add the predicate to the asm string. + if (interfaceOp.getPredicate().has_value() && + interfaceOp.getPredicate().value()) { + std::string predicateStr = "@%"; + predicateStr += std::to_string((ptxOperands.size() - 1)); + ptxInstruction = predicateStr + " " + ptxInstruction; + } + // Tablegen doesn't accept $, so we use %, but inline assembly uses $. // Replace all % with $ std::replace(ptxInstruction.begin(), ptxInstruction.end(), '%', '$'); diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir index c1549f9b9dba5..fcc882f562a4a 100644 --- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir +++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir @@ -4,17 +4,30 @@ // and the generic `convert-to-llvm` pass. // RUN: mlir-opt --convert-to-llvm --split-input-file %s | FileCheck %s +// CHECK-LABEL: @init_mbarrier +llvm.func @init_mbarrier(%barrier_gen : !llvm.ptr, %barrier : !llvm.ptr<3>, %count : i32, %pred : i1) { + //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$2 mbarrier.init.shared.b64 [$0], $1;", "r,r,b" + nvvm.mbarrier.init.shared %barrier, %count, predicate = %pred : !llvm.ptr<3>, i32, i1 + //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$2 mbarrier.init.b64 [$0], $1;", "l,r,b" + nvvm.mbarrier.init %barrier_gen, %count, predicate = %pred : !llvm.ptr, i32, i1 + llvm.return +} + // CHECK-LABEL: @init_mbarrier_arrive_expect_tx -llvm.func @init_mbarrier_arrive_expect_tx(%barrier : !llvm.ptr<3>, %txcount : i32) { - //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "mbarrier.arrive.expect_tx.shared.b64 _, [$0], $1;", "r,r" +llvm.func @init_mbarrier_arrive_expect_tx(%barrier : !llvm.ptr<3>, %txcount : i32, %pred : i1) { + //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "mbarrier.arrive.expect_tx.shared.b64 _, [$0], $1;", "r,r" nvvm.mbarrier.arrive.expect_tx.shared %barrier, %txcount : !llvm.ptr<3>, i32 + //CHECK : llvm.inline_asm has_side_effects asm_dialect = att "@$2 mbarrier.arrive.expect_tx.shared.b64 _, [$0], $1;", "r,r,b " + nvvm.mbarrier.arrive.expect_tx.shared %barrier, %txcount, predicate = %pred : !llvm.ptr<3>, i32, i1 llvm.return } // CHECK-LABEL: @init_mbarrier_arrive_expect_tx_generic -llvm.func @init_mbarrier_arrive_expect_tx_generic(%barrier : !llvm.ptr, %txcount : i32) { +llvm.func @init_mbarrier_arrive_expect_tx_generic(%barrier : !llvm.ptr, %txcount : i32, %pred : i1) { // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "mbarrier.arrive.expect_tx.b64 _, [$0], $1;", "l,r" nvvm.mbarrier.arrive.expect_tx %barrier, %txcount : !llvm.ptr, i32 + // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$2 mbarrier.arrive.expect_tx.b64 _, [$0], $1;", "l,r,b" + nvvm.mbarrier.arrive.expect_tx %barrier, %txcount, predicate = %pred : !llvm.ptr, i32, i1 llvm.return } @@ -73,82 +86,93 @@ func.func @async_cp_zfill(%dst: !llvm.ptr<3>, %src: !llvm.ptr<1>, %cpSize: i32) } // CHECK-LABEL: @tma_load_1d -func.func @tma_load_1d(%tmaDescriptor: !llvm.ptr, %dest : !llvm.ptr<3>, %barrier: !llvm.ptr<3>, %crd0: i32) { - // CHECK: llvm.inline_asm has_side_effects asm_dialect = att - // CHECK-SAME: "cp.async.bulk.tensor.1d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$3} ], [$2];", "r,l,r,r" +func.func @tma_load_1d(%tmaDescriptor: !llvm.ptr, %dest : !llvm.ptr<3>, %barrier: !llvm.ptr<3>, %crd0: i32, %p : i1) { + // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "cp.async.bulk.tensor.1d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$3} ], [$2];", "r,l,r,r" nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tmaDescriptor, %barrier, box[%crd0] : !llvm.ptr<3>, !llvm.ptr, !llvm.ptr<3>, i32 + // CHECK : llvm.inline_asm has_side_effects asm_dialect = att "@$4 cp.async.bulk.tensor.1d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$3}], [$2];", "l,r,r,r,b" + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tmaDescriptor, %barrier, box[%crd0], predicate=%p : !llvm.ptr<3>, !llvm.ptr, !llvm.ptr<3>, i32,i1 return } // CHECK-LABEL: @tma_load_2d -func.func @tma_load_2d(%tmaDescriptor: !llvm.ptr, %dest : !llvm.ptr<3>, %barrier: !llvm.ptr<3>, %crd0: i32, %crd1: i32) { - // CHECK: llvm.inline_asm has_side_effects asm_dialect = att - // CHECK-SAME: "cp.async.bulk.tensor.2d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$3, $4} ], [$2];", "r,l,r,r,r" +func.func @tma_load_2d(%tmaDescriptor: !llvm.ptr, %dest : !llvm.ptr<3>, %barrier: !llvm.ptr<3>, %crd0: i32, %crd1: i32, %p : i1) { + // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "cp.async.bulk.tensor.2d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$3, $4} ], [$2];", "r,l,r,r,r" nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tmaDescriptor, %barrier, box[%crd0,%crd1] : !llvm.ptr<3>, !llvm.ptr, !llvm.ptr<3>, i32, i32 + // CHECK : llvm.inline_asm has_side_effects asm_dialect = att "@$5 cp.async.bulk.tensor.2d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$3, $4}], [$2];", "l,r,r,r,r,b" + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tmaDescriptor, %barrier, box[%crd0,%crd1], predicate=%p : !llvm.ptr<3>, !llvm.ptr, !llvm.ptr<3>, i32, i32, i1 return } // CHECK-LABEL: @tma_load_3d -func.func @tma_load_3d(%tmaDescriptor: !llvm.ptr, %dest : !llvm.ptr<3>, %barrier: !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32) { - // CHECK: llvm.inline_asm has_side_effects asm_dialect = att - // CHECK-SAME: "cp.async.bulk.tensor.3d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$3, $4, $5} ], [$2];", "r,l,r,r,r,r" +func.func @tma_load_3d(%tmaDescriptor: !llvm.ptr, %dest : !llvm.ptr<3>, %barrier: !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32, %p : i1) { + // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "cp.async.bulk.tensor.3d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$3, $4, $5} ], [$2];", "r,l,r,r,r,r" nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tmaDescriptor, %barrier, box[%crd0,%crd1,%crd2] : !llvm.ptr<3>, !llvm.ptr, !llvm.ptr<3>, i32, i32, i32 + // CHECK : llvm.inline_asm has_side_effects asm_dialect = att "@$6 cp.async.bulk.tensor.3d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$3, $4, $5}], [$2];", "l,r,r,r,r,r,b" + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tmaDescriptor, %barrier, box[%crd0,%crd1,%crd2], predicate=%p : !llvm.ptr<3>, !llvm.ptr, !llvm.ptr<3>, i32, i32, i32, i1 return } // CHECK-LABEL: @tma_load_4d -func.func @tma_load_4d(%tmaDescriptor: !llvm.ptr, %dest : !llvm.ptr<3>, %barrier: !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32, %crd3: i32) { - // CHECK: llvm.inline_asm has_side_effects asm_dialect = att - // CHECK-SAME: "cp.async.bulk.tensor.4d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$3, $4, $5, $6} ], [$2];", "r,l,r,r,r,r,r" +func.func @tma_load_4d(%tmaDescriptor: !llvm.ptr, %dest : !llvm.ptr<3>, %barrier: !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32, %crd3: i32, %p : i1) { + // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "cp.async.bulk.tensor.4d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$3, $4, $5, $6} ], [$2];", "r,l,r,r,r,r,r" nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tmaDescriptor, %barrier, box[%crd0,%crd1,%crd2,%crd3] : !llvm.ptr<3>, !llvm.ptr, !llvm.ptr<3>, i32, i32, i32, i32 + // CHECK : llvm.inline_asm has_side_effects asm_dialect = att "@$7 cp.async.bulk.tensor.3d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$3, $4, $5}], [$2];", "l,r,r,r,r,r,b" + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tmaDescriptor, %barrier, box[%crd0,%crd1,%crd2], predicate=%p : !llvm.ptr<3>, !llvm.ptr, !llvm.ptr<3>, i32, i32, i32, i1 return } // CHECK-LABEL: @tma_load_5d -func.func @tma_load_5d(%tmaDescriptor: !llvm.ptr, %dest : !llvm.ptr<3>, %barrier: !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32, %crd3: i32, %crd4: i32) { - // CHECK: llvm.inline_asm has_side_effects asm_dialect = att - // CHECK-SAME: "cp.async.bulk.tensor.5d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$3, $4, $5, $6, $7} ], [$2];", "r,l,r,r,r,r,r,r" +func.func @tma_load_5d(%tmaDescriptor: !llvm.ptr, %dest : !llvm.ptr<3>, %barrier: !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32, %crd3: i32, %crd4: i32, %p : i1) { + // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "cp.async.bulk.tensor.5d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$3, $4, $5, $6, $7} ], [$2];", "r,l,r,r,r,r,r,r" nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tmaDescriptor, %barrier, box[%crd0,%crd1,%crd2,%crd3,%crd4] : !llvm.ptr<3>, !llvm.ptr, !llvm.ptr<3>, i32, i32, i32, i32, i32 + // CHECK : llvm.inline_asm has_side_effects asm_dialect = att "@$8 cp.async.bulk.tensor.5d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$3, $4, $5, $6, $7}], [$2];", "l,r,r,r,r,r,r,r,b" + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tmaDescriptor, %barrier, box[%crd0,%crd1,%crd2,%crd3,%crd4], predicate=%p : !llvm.ptr<3>, !llvm.ptr, !llvm.ptr<3>, i32, i32, i32, i32, i32, i1 return } // CHECK-LABEL: @tma_store_1d -func.func @tma_store_1d(%tmaDescriptor: !llvm.ptr, %src : !llvm.ptr<3>, %crd0: i32) { - // CHECK: llvm.inline_asm has_side_effects asm_dialect = att - // CHECK-SAME: "cp.async.bulk.tensor.1d.global.shared::cta.bulk_group [$0, {$2} ], [$1];", "l,r,r" +func.func @tma_store_1d(%tmaDescriptor: !llvm.ptr, %src : !llvm.ptr<3>, %crd0: i32, %p : i1) { + // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "cp.async.bulk.tensor.1d.global.shared::cta.bulk_group [$0, {$2} ], [$1];", "l,r,r" nvvm.cp.async.bulk.tensor.global.shared.cta %tmaDescriptor, %src, box[%crd0] : !llvm.ptr, !llvm.ptr<3>, i32 + // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$3 cp.async.bulk.tensor.1d.global.shared::cta.bulk_group [$0, {$2} ], [$1];", "l,r,r,b" + nvvm.cp.async.bulk.tensor.global.shared.cta %tmaDescriptor, %src, box[%crd0], predicate=%p : !llvm.ptr, !llvm.ptr<3>, i32, i1 return } // CHECK-LABEL: @tma_store_2d -func.func @tma_store_2d(%tmaDescriptor: !llvm.ptr, %src : !llvm.ptr<3>, %crd0: i32, %crd1: i32) { - // CHECK: llvm.inline_asm has_side_effects asm_dialect = att - // CHECK-SAME: "cp.async.bulk.tensor.2d.global.shared::cta.bulk_group [$0, {$2, $3} ], [$1];", "l,r,r,r" +func.func @tma_store_2d(%tmaDescriptor: !llvm.ptr, %src : !llvm.ptr<3>, %crd0: i32, %crd1: i32, %p : i1) { + // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "cp.async.bulk.tensor.2d.global.shared::cta.bulk_group [$0, {$2, $3} ], [$1];", "l,r,r,r" nvvm.cp.async.bulk.tensor.global.shared.cta %tmaDescriptor, %src, box[%crd0,%crd1] : !llvm.ptr, !llvm.ptr<3>, i32, i32 + // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$4 cp.async.bulk.tensor.2d.global.shared::cta.bulk_group [$0, {$2, $3} ], [$1];", "l,r,r,r,b" + nvvm.cp.async.bulk.tensor.global.shared.cta %tmaDescriptor, %src, box[%crd0,%crd1], predicate=%p : !llvm.ptr, !llvm.ptr<3>, i32, i32, i1 return } // CHECK-LABEL: @tma_store_3d -func.func @tma_store_3d(%tmaDescriptor: !llvm.ptr, %src : !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32) { - // CHECK: llvm.inline_asm has_side_effects asm_dialect = att - // CHECK-SAME: "cp.async.bulk.tensor.3d.global.shared::cta.bulk_group [$0, {$2, $3, $4} ], [$1];", "l,r,r,r,r" +func.func @tma_store_3d(%tmaDescriptor: !llvm.ptr, %src : !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32, %p : i1) { + // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "cp.async.bulk.tensor.3d.global.shared::cta.bulk_group [$0, {$2, $3, $4} ], [$1];", "l,r,r,r,r" nvvm.cp.async.bulk.tensor.global.shared.cta %tmaDescriptor, %src, box[%crd0,%crd1,%crd2] : !llvm.ptr, !llvm.ptr<3>, i32, i32, i32 + // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$5 cp.async.bulk.tensor.3d.global.shared::cta.bulk_group [$0, {$2, $3, $4} ], [$1];", "l,r,r,r,r,b" + nvvm.cp.async.bulk.tensor.global.shared.cta %tmaDescriptor, %src, box[%crd0,%crd1,%crd2], predicate=%p : !llvm.ptr, !llvm.ptr<3>, i32, i32, i32, i1 return } // CHECK-LABEL: @tma_store_4d -func.func @tma_store_4d(%tmaDescriptor: !llvm.ptr, %src : !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32, %crd3: i32) { - // CHECK: llvm.inline_asm has_side_effects asm_dialect = att - // CHECK-SAME: "cp.async.bulk.tensor.4d.global.shared::cta.bulk_group [$0, {$2, $3, $4, $5} ], [$1];", "l,r,r,r,r,r" +func.func @tma_store_4d(%tmaDescriptor: !llvm.ptr, %src : !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32, %crd3: i32, %p : i1) { + // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "cp.async.bulk.tensor.4d.global.shared::cta.bulk_group [$0, {$2, $3, $4, $5} ], [$1];", "l,r,r,r,r,r" nvvm.cp.async.bulk.tensor.global.shared.cta %tmaDescriptor, %src, box[%crd0,%crd1,%crd2,%crd3] : !llvm.ptr, !llvm.ptr<3>, i32, i32, i32, i32 + // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$6 cp.async.bulk.tensor.4d.global.shared::cta.bulk_group [$0, {$2, $3, $4, $5} ], [$1];", "l,r,r,r,r,r,b" + nvvm.cp.async.bulk.tensor.global.shared.cta %tmaDescriptor, %src, box[%crd0,%crd1,%crd2,%crd3], predicate=%p : !llvm.ptr, !llvm.ptr<3>, i32, i32, i32, i32, i1 return } // CHECK-LABEL: @tma_store_5d -func.func @tma_store_5d(%tmaDescriptor: !llvm.ptr, %src : !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32, %crd3: i32, %crd4: i32) { - // CHECK: llvm.inline_asm has_side_effects asm_dialect = att - // CHECK-SAME: "cp.async.bulk.tensor.5d.global.shared::cta.bulk_group [$0, {$2, $3, $4, $5, $6} ], [$1];", "l,r,r,r,r,r,r" +func.func @tma_store_5d(%tmaDescriptor: !llvm.ptr, %src : !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32, %crd3: i32, %crd4: i32, %p : i1) { + // CHECK-NEXT: llvm.inline_asm has_side_effects asm_dialect = att "cp.async.bulk.tensor.5d.global.shared::cta.bulk_group [$0, {$2, $3, $4, $5, $6} ], [$1];", "l,r,r,r,r,r,r" nvvm.cp.async.bulk.tensor.global.shared.cta %tmaDescriptor, %src, box[%crd0,%crd1,%crd2,%crd3,%crd4] : !llvm.ptr, !llvm.ptr<3>, i32, i32, i32, i32, i32 + + // CHECK-NEXT: llvm.inline_asm has_side_effects asm_dialect = att "@$7 cp.async.bulk.tensor.5d.global.shared::cta.bulk_group [$0, {$2, $3, $4, $5, $6} ], [$1];", "l,r,r,r,r,r,r,b" + nvvm.cp.async.bulk.tensor.global.shared.cta %tmaDescriptor, %src, box[%crd0,%crd1,%crd2,%crd3,%crd4], predicate=%p : !llvm.ptr, !llvm.ptr<3>, i32, i32, i32, i32, i32, i1 return } From c4ba84d6555148fb7469fd44412a49d9d66eb4cf Mon Sep 17 00:00:00 2001 From: Guray Ozen Date: Tue, 17 Oct 2023 12:46:10 +0200 Subject: [PATCH 324/720] [mlir][nvgpu] Fix packing accumlator matrix (#69316) The #68728 significantly simplified the accumulator matrix type, making it easier to work with the nvgpu dialect without worrying about the number of required structs, as this information is abstracted away in the nvgpu-to-nvvm transformation. However, we forgot packing the structs after initialization, causing the accumulator matrix to hold undefined values, which is wrong. This PR addresses that. --- .../Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp index 00baf7b3c7415..029659a2f8554 100644 --- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp +++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp @@ -1578,27 +1578,34 @@ struct NVGPUWarpgroupMmaInitAccumulatorOpLowering matchAndRewrite(nvgpu::WarpgroupMmaInitAccumulatorOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { ImplicitLocOpBuilder b(op->getLoc(), rewriter); - LLVM::LLVMStructType structType = + LLVM::LLVMStructType packStructType = getTypeConverter() ->convertType(op.getMatrixC().getType()) .cast(); - Type elemType = structType.getBody() + Type elemType = packStructType.getBody() .front() .cast() .getBody() .front(); Value zero = b.create(elemType, b.getZeroAttr(elemType)); - Value structValue = b.create(structType); - for (auto [idx, s] : llvm::enumerate(structType.getBody())) { - auto innerStructType = s.cast(); - int ii = idx; - Value innerStructValue = b.create(structValue, ii); - for (unsigned i = 0; i < innerStructType.getBody().size(); ++i) { - innerStructValue = b.create( - innerStructType, innerStructValue, zero, ArrayRef({i})); + Value packStruct = b.create(packStructType); + SmallVector innerStructs; + // Unpack the structs and set all values to zero + for (auto [idx, s] : llvm::enumerate(packStructType.getBody())) { + auto structType = s.cast(); + Value structValue = b.create(packStruct, idx); + for (unsigned i = 0; i < structType.getBody().size(); ++i) { + structValue = b.create( + structType, structValue, zero, ArrayRef({i})); } + innerStructs.push_back(structValue); } - rewriter.replaceOp(op, structValue); + // Pack the inner structs into a single struct + for (auto [idx, matrix] : llvm::enumerate(innerStructs)) { + packStruct = b.create(packStruct.getType(), + packStruct, matrix, idx); + } + rewriter.replaceOp(op, packStruct); return success(); } }; From f2898def693a8ba8a017fcceab4260d7fe2faeb1 Mon Sep 17 00:00:00 2001 From: XChy Date: Tue, 17 Oct 2023 18:47:49 +0800 Subject: [PATCH 325/720] [InstCombine] Don't mix X << Y / Z << Y with X << Y / X << Z (#69302) Fixes #69291. This patch improve the logic handling different patterns to avoid mixing these pattern. --- .../InstCombine/InstCombineMulDivRem.cpp | 24 +++++++------------ llvm/test/Transforms/InstCombine/div-shift.ll | 14 +++++++++++ 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 26e0a6700042e..518f8aa51c0cd 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -923,8 +923,7 @@ static bool isMultiple(const APInt &C1, const APInt &C2, APInt &Quotient, return Remainder.isMinValue(); } -static Instruction *foldIDivShl(BinaryOperator &I, - InstCombiner::BuilderTy &Builder) { +static Value *foldIDivShl(BinaryOperator &I, InstCombiner::BuilderTy &Builder) { assert((I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::UDiv) && "Expected integer divide"); @@ -933,7 +932,6 @@ static Instruction *foldIDivShl(BinaryOperator &I, Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); Type *Ty = I.getType(); - Instruction *Ret = nullptr; Value *X, *Y, *Z; // With appropriate no-wrap constraints, remove a common factor in the @@ -948,12 +946,12 @@ static Instruction *foldIDivShl(BinaryOperator &I, // (X * Y) u/ (X << Z) --> Y u>> Z if (!IsSigned && HasNUW) - Ret = BinaryOperator::CreateLShr(Y, Z); + return Builder.CreateLShr(Y, Z, "", I.isExact()); // (X * Y) s/ (X << Z) --> Y s/ (1 << Z) if (IsSigned && HasNSW && (Op0->hasOneUse() || Op1->hasOneUse())) { Value *Shl = Builder.CreateShl(ConstantInt::get(Ty, 1), Z); - Ret = BinaryOperator::CreateSDiv(Y, Shl); + return Builder.CreateSDiv(Y, Shl, "", I.isExact()); } } @@ -971,13 +969,13 @@ static Instruction *foldIDivShl(BinaryOperator &I, ((Shl0->hasNoUnsignedWrap() && Shl1->hasNoUnsignedWrap()) || (Shl0->hasNoUnsignedWrap() && Shl0->hasNoSignedWrap() && Shl1->hasNoSignedWrap()))) - Ret = BinaryOperator::CreateUDiv(X, Y); + return Builder.CreateUDiv(X, Y, "", I.isExact()); // For signed div, we need 'nsw' on both shifts + 'nuw' on the divisor. // (X << Z) / (Y << Z) --> X / Y if (IsSigned && Shl0->hasNoSignedWrap() && Shl1->hasNoSignedWrap() && Shl1->hasNoUnsignedWrap()) - Ret = BinaryOperator::CreateSDiv(X, Y); + return Builder.CreateSDiv(X, Y, "", I.isExact()); } // If X << Y and X << Z does not overflow, then: @@ -998,15 +996,11 @@ static Instruction *foldIDivShl(BinaryOperator &I, /*HasNSW*/ IsSigned ? (Shl0->hasNoUnsignedWrap() || Shl1->hasNoUnsignedWrap()) : Shl0->hasNoSignedWrap()); - Ret = BinaryOperator::CreateLShr(Dividend, Z); + return Builder.CreateLShr(Dividend, Z, "", I.isExact()); } } - if (!Ret) - return nullptr; - - Ret->setIsExact(I.isExact()); - return Ret; + return nullptr; } /// This function implements the transforms common to both integer division @@ -1183,8 +1177,8 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) { return NewDiv; } - if (Instruction *R = foldIDivShl(I, Builder)) - return R; + if (Value *R = foldIDivShl(I, Builder)) + return replaceInstUsesWith(I, R); // With the appropriate no-wrap constraint, remove a multiply by the divisor // after peeking through another divide: diff --git a/llvm/test/Transforms/InstCombine/div-shift.ll b/llvm/test/Transforms/InstCombine/div-shift.ll index 635c01d84441d..d208837f04594 100644 --- a/llvm/test/Transforms/InstCombine/div-shift.ll +++ b/llvm/test/Transforms/InstCombine/div-shift.ll @@ -1280,3 +1280,17 @@ entry: %div = sdiv i32 %lhs, %rhs ret i32 %div } + +@a = external global i32 +define i32 @pr69291() { +; CHECK-LABEL: @pr69291( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 1 +; +entry: + %conv = load i32, ptr @a, align 1 + %add = shl nuw nsw i32 %conv, 1 + %add2 = shl nuw nsw i32 %conv, 1 + %div = sdiv i32 %add, %add2 + ret i32 %div +} From 39cdefb5b52b3786993bca243d589de19896fca1 Mon Sep 17 00:00:00 2001 From: Guray Ozen Date: Tue, 17 Oct 2023 13:03:37 +0200 Subject: [PATCH 326/720] [mlir][nvvm] Add prefetch.tensormap (#67564) This PR adds `prefetch.tensormap` Op. It brings the cache line containing the given tma descriptor for subsequent use by the cp.async.bulk.tensor instruction. https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-prefetch-prefetchu --- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 11 +++++++++++ mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td | 12 ++++++++++++ mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp | 13 +++++++++++++ mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir | 11 +++++++++++ mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir | 11 +++++++++++ 5 files changed, 58 insertions(+) diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index d550fe1f33140..cefdd7cc4033a 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -1438,6 +1438,17 @@ def NVVM_CpAsyncBulkTensorSharedCTAToGlobalOp : let hasVerifier = 1; } +def NVVM_PrefetchTensorMapOp : NVVM_Op<"prefetch.tensormap", + [DeclareOpInterfaceMethods]>, + Arguments<(ins LLVM_i64ptr_any:$tmaDescriptor, PtxPredicate:$predicate)> { + let assemblyFormat = "$tmaDescriptor (`,` `predicate` `=` $predicate^)? attr-dict `:` type(operands)"; + let extraClassDefinition = [{ + std::string $cppClass::getPtx() { + return std::string("prefetch.tensormap [%0];"); + } + }]; +} + //===----------------------------------------------------------------------===// // NVVM Wgmma Ops //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td index fd16376be3669..dd00355b6d77e 100644 --- a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td +++ b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td @@ -619,6 +619,18 @@ def NVGPU_MBarrierTryWaitParityOp : NVGPU_Op<"mbarrier.try_wait.parity", []> { let assemblyFormat = "$barriers `[` $mbarId `]` `,` $phase `,` $ticks attr-dict `:` type($barriers)"; } +def NVGPU_TmaPrefetchOp : NVGPU_Op<"tma.prefetch.descriptor", []> { + let summary = "Prefetch given `nvgpu.tensormap.descriptor` "; + let description = [{ + The Op brings the cache line containing the given `$tmaDescriptor` for + subsequent use by the `tma.async.load` instruction. + }]; + let arguments = (ins NVGPU_TensorMapDescriptor:$tensorMapDescriptor, Optional:$predicate); + let assemblyFormat = [{ + $tensorMapDescriptor (`,` $predicate^)? attr-dict `:` type($tensorMapDescriptor) + }]; +} + def NVGPU_TmaAsyncLoadOp : NVGPU_Op<"tma.async.load", []> { let summary = "TMA asynchronous load"; let description = [{ diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp index 029659a2f8554..7eb6f42d2788e 100644 --- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp +++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp @@ -1610,6 +1610,18 @@ struct NVGPUWarpgroupMmaInitAccumulatorOpLowering } }; +struct NVGPUTmaPrefetchOpLowering + : public ConvertOpToLLVMPattern { + using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + LogicalResult + matchAndRewrite(nvgpu::TmaPrefetchOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + rewriter.replaceOpWithNewOp( + op, adaptor.getTensorMapDescriptor(), adaptor.getPredicate()); + return success(); + } +}; + } // namespace void mlir::populateNVGPUToNVVMConversionPatterns(LLVMTypeConverter &converter, @@ -1623,6 +1635,7 @@ void mlir::populateNVGPUToNVVMConversionPatterns(LLVMTypeConverter &converter, NVGPUMBarrierTryWaitParityLowering, // nvgpu.mbarrier.try_wait_parity NVGPUTmaAsyncLoadOpLowering, // nvgpu.tma.async.load NVGPUTmaCreateDescriptorOpLowering, // nvgpu.tma.create.descriptor + NVGPUTmaPrefetchOpLowering, // nvgpu.tma.prefetch.descriptor NVGPUMBarrierArriveExpectTxLowering, // nvgpu.mbarrier.arrive.expect_tx NVGPUGenerateWarpgroupDescriptorLowering, // nvgpu.warpgroup.generate.descriptor NVGPUWarpgroupMmaOpLowering, // nvgpu.warpgroup.mma diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir index bf660e2683158..8971585e03c7a 100644 --- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir +++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir @@ -643,6 +643,17 @@ func.func @create_tensor_map(%devicePtr2d : memref<64x128xf32>, %devicePtr1d : m func.return } +// CHECK-LABEL: @tma_prefetch( +// CHECK-SAME: %[[arg0:[a-zA-Z0-9_]+]]: !nvgpu.tensormap.descriptor, swizzle = none, l2promo = none, oob = nan, interleave = none>, %[[arg1:[a-zA-Z0-9_]+]]: i1 +func.func @tma_prefetch(%tensorMap1d: !tensorMap1d, %p : i1) { + // CHECK: %[[S0:.+]] = builtin.unrealized_conversion_cast %[[arg0]] : !nvgpu.tensormap.descriptor, swizzle = none, l2promo = none, oob = nan, interleave = none> to !llvm.ptr + // CHECK: nvvm.prefetch.tensormap %[[S0]] : !llvm.ptr + nvgpu.tma.prefetch.descriptor %tensorMap1d: !tensorMap1d + // CHECK: nvvm.prefetch.tensormap %[[S0]], predicate = %[[arg1]] : !llvm.ptr, i1 + nvgpu.tma.prefetch.descriptor %tensorMap1d, %p: !tensorMap1d + func.return +} + !lhsTensorMap = !nvgpu.tensormap.descriptor, swizzle = swizzle_128b, l2promo = none, oob = zero, interleave = none> !rhsTensorMap = !nvgpu.tensormap.descriptor, 3>, swizzle = swizzle_128b, l2promo = none, oob = zero, interleave = none> diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir index fcc882f562a4a..0d0ac9637438a 100644 --- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir +++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir @@ -504,3 +504,14 @@ func.func @elect_one_leader_sync() { %cnd = nvvm.elect.sync -> i1 return } + +// ----- + +// CHECK-LABEL: @init_mbarrier_arrive_expect_tx +llvm.func @init_mbarrier_arrive_expect_tx(%desc : !llvm.ptr, %pred : i1) { + //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "prefetch.tensormap [$0];", "l" + nvvm.prefetch.tensormap %desc : !llvm.ptr + //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$1 prefetch.tensormap [$0];", "l,b" + nvvm.prefetch.tensormap %desc, predicate = %pred : !llvm.ptr, i1 + llvm.return +} From b736e0466c6291cf742055fd6fef5b29168a5cdf Mon Sep 17 00:00:00 2001 From: Guray Ozen Date: Tue, 17 Oct 2023 13:03:54 +0200 Subject: [PATCH 327/720] [MLIR][NVGPU] Test warpgroup matrix multiply 128x128x64 (#68817) Add a test that performs warpgroup matrix multiply 128x128x64. The test uses three Ops to do that. --- .../Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir index 8971585e03c7a..a344578def39e 100644 --- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir +++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir @@ -983,6 +983,73 @@ func.func @warpgroup_mma_init() { return } +// CHECK-LABEL: @warpgroup_matrix_multiply_m128n128k64( +// CHECK-SAME: %[[arg0:[a-zA-Z0-9_]+]]: !nvgpu.warpgroup.descriptor>, %[[arg1:[a-zA-Z0-9_]+]]: !nvgpu.warpgroup.descriptor>, %[[arg2:[a-zA-Z0-9_]+]]: memref<128x128xf32, 3>) +func.func @warpgroup_matrix_multiply_m128n128k64( + %descA: !nvgpu.warpgroup.descriptor>, + %descB: !nvgpu.warpgroup.descriptor>, + %shmemD: memref<128x128xf32, 3>) +{ + // Init + %matrixC = nvgpu.warpgroup.mma.init.accumulator -> + !nvgpu.warpgroup.accumulator> + + // GEMM + %matrixD = nvgpu.warpgroup.mma %descA, %descB, %matrixC {transposeB}: + !nvgpu.warpgroup.descriptor>, + !nvgpu.warpgroup.descriptor>, + !nvgpu.warpgroup.accumulator> + -> + !nvgpu.warpgroup.accumulator> + + + // Epilogue + nvgpu.warpgroup.mma.store %matrixD, %shmemD : + !nvgpu.warpgroup.accumulator< fragmented = vector<128x128xf32>> + to memref<128x128xf32,3> + + +// CHECK: %[[S0:.+]] = builtin.unrealized_conversion_cast %[[arg0]] : !nvgpu.warpgroup.descriptor> to i64 +// CHECK: %[[S1:.+]] = builtin.unrealized_conversion_cast %[[arg1]] : !nvgpu.warpgroup.descriptor> to i64 +// CHECK: %[[S2:.+]] = builtin.unrealized_conversion_cast %[[arg2]] : memref<128x128xf32, 3> to !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S3:.+]] = llvm.mlir.constant(0.000000e+00 : f32) : f32 +// CHECK: %[[S4:.+]] = llvm.mlir.undef : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> +// CHECK: %[[S5:.+]] = llvm.extractvalue %[[S4]][0] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> +// CHECK: %[[S6:.+]] = llvm.insertvalue %[[S3]], %[[S5]][0] : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> +// CHECK: %[[S68:.+]] = llvm.insertvalue %[[S3]], %{{.*}}[63] : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> +// CHECK: %[[S69:.+]] = llvm.extractvalue %[[S4]][1] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> +// CHECK: %[[S71:.+]] = llvm.insertvalue %[[S3]], %[[S69]][0] : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> +// CHECK: %[[S134:.+]] = llvm.insertvalue %[[S3]], %{{.*}}[63] : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> +// CHECK: %[[S135:.+]] = llvm.insertvalue %[[S68]], %[[S4]][0] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> +// CHECK: %[[S136:.+]] = llvm.insertvalue %[[S134]], %[[S135]][1] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> +// CHECK: nvvm.wgmma.fence.aligned +// CHECK: %[[S137:.+]] = llvm.mlir.undef : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> +// CHECK: %[[S138:.+]] = llvm.extractvalue %136[0] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> +// CHECK: %[[S139:.+]] = nvvm.wgmma.mma_async %0, %1, , D[%[[S138]], , ], A[, , ], B[, , ] : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> +// CHECK: nvvm.wgmma.mma_async +// CHECK: nvvm.wgmma.mma_async +// CHECK: %[[S154:.+]] = nvvm.wgmma.mma_async +// CHECK: nvvm.wgmma.mma_async +// CHECK: nvvm.wgmma.mma_async +// CHECK: nvvm.wgmma.mma_async +// CHECK: %[[S173:.+]] = nvvm.wgmma.mma_async +// CHECK: %[[S174:.+]] = llvm.insertvalue %[[S154]], %[[S137]][0] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> +// CHECK: %[[S175:.+]] = llvm.insertvalue %[[S173]], %[[S174]][1] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> +// CHECK: nvvm.wgmma.commit.group.sync.aligned +// CHECK: nvvm.wgmma.wait.group.sync.aligned 1 +// CHECK: %[[S176:.+]] = llvm.extractvalue %[[S175]][0] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> +// CHECK: %[[S193:.+]] = llvm.extractvalue %[[S176]][0] : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> +// CHECK: %[[S194:.+]] = llvm.extractvalue %[[S176]][1] : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> +// CHECK: memref.store %[[S193]], %[[arg2]][%{{.*}}, %{{.*}}] : memref<128x128xf32, 3> +// CHECK: memref.store %[[S194]], %[[arg2]][%{{.*}}, %{{.*}}] : memref<128x128xf32, 3> +// CHECK: %[[S485:.+]] = llvm.extractvalue %[[S175]][1] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> +// CHECK: %[[S503:.+]] = llvm.extractvalue %[[S485]][0] : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> +// CHECK: %[[S504:.+]] = llvm.extractvalue %[[S485]][1] : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> +// CHECK: memref.store %[[S503]], %[[arg2]][%{{.*}}, %{{.*}}] : memref<128x128xf32, 3> +// CHECK: memref.store %[[S504]], %[[arg2]][%{{.*}}, %{{.*}}] : memref<128x128xf32, 3> + return +} + transform.sequence failures(propagate) { ^bb1(%arg1: !transform.any_op): From 7f2dd2da99371ef5b281834b604d251f3112cb23 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Tue, 17 Oct 2023 11:23:47 +0000 Subject: [PATCH 328/720] [mlir][Tosa] Fix test failure when running with Asan. We cannot rely on the address of StringAttr being the same if the stored string is the same. --- mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp index d686ce125c135..d973ac9cae2e8 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp @@ -410,7 +410,7 @@ struct TosaValidation : public tosa::impl::TosaValidationBase { SmallVector> const_checkers; tosa_level_t tosa_level; - DenseMap variables_map; + DenseMap variables_map; }; LogicalResult TosaValidation::applyLevelCheck(Operation *op) { @@ -448,7 +448,7 @@ bool TosaValidation::CheckVariable(Operation *op) { if (isa(op)) { auto name_attr = cast(op->getAttr("name")); - if (variables_map.count(&name_attr)) { + if (variables_map.count(name_attr)) { op->emitOpError() << "name has already been declared"; return false; } @@ -456,7 +456,7 @@ bool TosaValidation::CheckVariable(Operation *op) { auto type_attr = cast(op->getAttr("type")); mlir::Type type = type_attr.getValue(); - variables_map[&name_attr] = type; + variables_map[name_attr] = type; } return true; @@ -467,12 +467,12 @@ bool TosaValidation::CheckVariableReadOrWrite(Operation *op) { isa(op)) { auto name_attr = cast(op->getAttr("name")); - if (!variables_map.count(&name_attr)) { + if (!variables_map.count(name_attr)) { op->emitOpError() << "name has not been declared"; return false; } - auto var_type = variables_map[&name_attr]; + auto var_type = variables_map[name_attr]; for (auto v : op->getOperands()) { auto type = v.getType(); From e730f4a27fceb199d9dcc517644c2e07c3fd5403 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 17 Oct 2023 07:52:08 -0400 Subject: [PATCH 329/720] [gn] port 3694697003bb --- llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn index 303a6c29d7b91..3a19729bb8dcf 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn @@ -271,6 +271,7 @@ static_library("builtins") { sources += [ "cpu_model.c", "divxc3.c", + "extendxftf2.c", "fixunsxfdi.c", "fixunsxfsi.c", "fixunsxfti.c", @@ -285,6 +286,7 @@ static_library("builtins") { "powixf2.c", "truncdfbf2.c", "truncsfbf2.c", + "trunctfxf2.c", ] } if (current_cpu == "x86") { From 8b5625cb42b789d5c29863d9aaf85aad83bb29a2 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 17 Oct 2023 07:52:53 -0400 Subject: [PATCH 330/720] [gn] port 3694697003bb --- llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn index c227d81162838..a70ff97299aa0 100644 --- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -264,6 +264,7 @@ copy("Headers") { "stdarg.h", "stdatomic.h", "stdbool.h", + "stdckdint.h", "stddef.h", "stdint.h", "stdnoreturn.h", From 4434253f0fa663f5da4f460c798d1666da8868c7 Mon Sep 17 00:00:00 2001 From: Dmitry Chernenkov Date: Tue, 17 Oct 2023 11:54:00 +0000 Subject: [PATCH 331/720] [Bazel] disable preload-library.mlir test --- utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel index 08e9c34a5e3aa..5579f9a58d615 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel @@ -35,7 +35,8 @@ package(default_visibility = ["//visibility:public"]) "Transform/*-symbol-decl-and-schedule.mlir", "Transform/*-symbol-decl-dir.mlir", "Transform/*-symbol-decl-invalid.mlir", - "Transform/Library/*.mlir", + "Transform/Library/*.mlir" + "Transform/preload-library.mlir", "Transform/test-interpreter-library/*.mlir", "Transform/test-repro-dump.mlir", ], From 90576084c1d797f845055e8d95c2d9f455268841 Mon Sep 17 00:00:00 2001 From: Dmitry Chernenkov Date: Tue, 17 Oct 2023 11:58:43 +0000 Subject: [PATCH 332/720] [Bazel] fix typo --- utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel index 5579f9a58d615..e5b877a48d5e8 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel @@ -35,7 +35,7 @@ package(default_visibility = ["//visibility:public"]) "Transform/*-symbol-decl-and-schedule.mlir", "Transform/*-symbol-decl-dir.mlir", "Transform/*-symbol-decl-invalid.mlir", - "Transform/Library/*.mlir" + "Transform/Library/*.mlir", "Transform/preload-library.mlir", "Transform/test-interpreter-library/*.mlir", "Transform/test-repro-dump.mlir", From 12bf4231eb0d4685b9d8152352fbd15ac9fb528b Mon Sep 17 00:00:00 2001 From: Dmitry Chernenkov Date: Tue, 17 Oct 2023 12:36:11 +0000 Subject: [PATCH 333/720] [Bazel] Fix dependencies for clang codegen --- utils/bazel/llvm-project-overlay/clang/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index 73c2c95f4c611..2f3fdd39050f9 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -1860,6 +1860,7 @@ cc_library( "//llvm:IRReader", "//llvm:InstCombine", "//llvm:Instrumentation", + "//llvm:HipStdPar", "//llvm:LTO", "//llvm:Linker", "//llvm:MC", From 509b5708e98e01a038534f30523a4e12bc98c7aa Mon Sep 17 00:00:00 2001 From: Ivan Kosarev Date: Tue, 17 Oct 2023 15:59:42 +0300 Subject: [PATCH 334/720] [AMDGPU][AsmParser] Eliminate custom predicates for named-bit operands. (#69243) isGDS() and isTFE() need special treatment, because they may be both named-bit and token operands. Part of #62629. --- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 10 ++++++---- llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 11 ++--------- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 ++ 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 81fc28d293021..23242ad84b0c4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -125,11 +125,11 @@ def InstFlag : OperandWithDefaultOps ; def i1imm_0 : OperandWithDefaultOps; -class CustomOperandClass +class CustomOperandClass : AsmOperandClass { let Name = name; - let PredicateMethod = "is"#name; + let PredicateMethod = predicateMethod; let ParserMethod = parserMethod; let RenderMethod = "addImmOperands"; let IsOptional = optional; @@ -138,6 +138,7 @@ class CustomOperandClass { string ImmTy = "ImmTy"#name; + string PredicateMethod = "is"#name; string ParserMethod = "parse"#name; string DefaultValue = "0"; string DefaultMethod = "[this]() { return "# @@ -145,7 +146,8 @@ class CustomOperandProps { "AMDGPUOperand::"#ImmTy#"); }"; string PrintMethod = "print"#name; AsmOperandClass ParserMatchClass = - CustomOperandClass; + CustomOperandClass; string OperandType = "OPERAND_IMMEDIATE"; } diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index fa651b9fcb05a..faeaa94f97335 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -356,25 +356,20 @@ class AMDGPUOperand : public MCParsedAsmOperand { return isImm() && Imm.Type == ImmT; } + template bool isImmTy() const { return isImmTy(Ty); } + bool isImmLiteral() const { return isImmTy(ImmTyNone); } bool isImmModifier() const { return isImm() && Imm.Type != ImmTyNone; } - bool isClampSI() const { return isImmTy(ImmTyClampSI); } bool isOModSI() const { return isImmTy(ImmTyOModSI); } bool isDMask() const { return isImmTy(ImmTyDMask); } bool isDim() const { return isImmTy(ImmTyDim); } - bool isUNorm() const { return isImmTy(ImmTyUNorm); } - bool isDA() const { return isImmTy(ImmTyDA); } bool isR128A16() const { return isImmTy(ImmTyR128A16); } - bool isA16() const { return isImmTy(ImmTyA16); } - bool isLWE() const { return isImmTy(ImmTyLWE); } bool isOff() const { return isImmTy(ImmTyOff); } bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } - bool isExpVM() const { return isImmTy(ImmTyExpVM); } - bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } bool isOffen() const { return isImmTy(ImmTyOffen); } bool isIdxen() const { return isImmTy(ImmTyIdxen); } bool isAddr64() const { return isImmTy(ImmTyAddr64); } @@ -387,7 +382,6 @@ class AMDGPUOperand : public MCParsedAsmOperand { bool isLDS() const { return isImmTy(ImmTyLDS); } bool isCPol() const { return isImmTy(ImmTyCPol); } bool isTFE() const { return isImmTy(ImmTyTFE); } - bool isD16() const { return isImmTy(ImmTyD16); } bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); } bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); } @@ -404,7 +398,6 @@ class AMDGPUOperand : public MCParsedAsmOperand { bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } bool isNegLo() const { return isImmTy(ImmTyNegLo); } bool isNegHi() const { return isImmTy(ImmTyNegHi); } - bool isHigh() const { return isImmTy(ImmTyHigh); } bool isRegOrImm() const { return isReg() || isImm(); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index f09ca954904fc..b4adb444600c4 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1045,6 +1045,7 @@ class NamedIntOperand : CustomOperand { + let PredicateMethod = "isImmTy"; let ParserMethod = "[this](OperandVector &Operands) -> ParseStatus { "# "return parseNamedBit(\""#Id#"\", Operands, AMDGPUOperand::"#ImmTy#"); }"; @@ -1056,6 +1057,7 @@ class NamedBitOperand class DefaultOperand : OperandWithDefaultOps, CustomOperandProps<1, Op.ParserMatchClass.Name> { + let PredicateMethod = Op.ParserMatchClass.PredicateMethod; let ParserMethod = Op.ParserMatchClass.ParserMethod; let PrintMethod = Op.PrintMethod; } From bff17f9f23ce72ca603b6a3d31a9bf97e3b1bc75 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 17 Oct 2023 14:47:32 +0100 Subject: [PATCH 335/720] [AMDGPU] Remove support for no-return buffer atomic intrinsics. NFC. (#69326) Thsi removes some of the machinery added by D85268, which was unused since D87719 changed all buffer atomic intrinsics to return a value. --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 24 +++++++-------- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 29 ++++++------------- llvm/lib/Target/AMDGPU/SIInstructions.td | 4 +-- 3 files changed, 23 insertions(+), 34 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 0df66e1ffc519..5f1d1d932f74c 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1174,9 +1174,9 @@ class AMDGPUStructPtrBufferStore : DefaultAttrsI def int_amdgcn_struct_ptr_buffer_store_format : AMDGPUStructPtrBufferStore; def int_amdgcn_struct_ptr_buffer_store : AMDGPUStructPtrBufferStore; -class AMDGPURawBufferAtomic : Intrinsic < - !if(NoRtn, [], [data_ty]), - [!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR) +class AMDGPURawBufferAtomic : Intrinsic < + [data_ty], + [LLVMMatchType<0>, // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) @@ -1208,9 +1208,9 @@ def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic< [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; -class AMDGPURawPtrBufferAtomic : Intrinsic < - !if(NoRtn, [], [data_ty]), - [!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR) +class AMDGPURawPtrBufferAtomic : Intrinsic < + [data_ty], + [LLVMMatchType<0>, // vdata(VGPR) AMDGPUBufferRsrcTy, // rsrc(SGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) @@ -1249,9 +1249,9 @@ def int_amdgcn_raw_ptr_buffer_atomic_cmpswap : Intrinsic< def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic; def int_amdgcn_raw_ptr_buffer_atomic_fadd : AMDGPURawPtrBufferAtomic; -class AMDGPUStructBufferAtomic : Intrinsic < - !if(NoRtn, [], [data_ty]), - [!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR) +class AMDGPUStructBufferAtomic : Intrinsic < + [data_ty], + [LLVMMatchType<0>, // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) @@ -1283,9 +1283,9 @@ def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic< [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; -class AMDGPUStructPtrBufferAtomic : Intrinsic < - !if(NoRtn, [], [data_ty]), - [!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR) +class AMDGPUStructPtrBufferAtomic : Intrinsic < + [data_ty], + [LLVMMatchType<0>, // vdata(VGPR) AMDGPUBufferRsrcTy, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index d6717c998bec8..21abfb42d11ba 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -5879,31 +5879,23 @@ bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI, IID == Intrinsic::amdgcn_struct_buffer_atomic_cmpswap || IID == Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap || IID == Intrinsic::amdgcn_struct_ptr_buffer_atomic_cmpswap; - const bool HasReturn = MI.getNumExplicitDefs() != 0; - - Register Dst; - - int OpOffset = 0; - if (HasReturn) { - // A few FP atomics do not support return values. - Dst = MI.getOperand(0).getReg(); - } else { - OpOffset = -1; - } + Register Dst = MI.getOperand(0).getReg(); // Since we don't have 128-bit atomics, we don't need to handle the case of // p8 argmunents to the atomic itself - Register VData = MI.getOperand(2 + OpOffset).getReg(); + Register VData = MI.getOperand(2).getReg(); + Register CmpVal; + int OpOffset = 0; if (IsCmpSwap) { - CmpVal = MI.getOperand(3 + OpOffset).getReg(); + CmpVal = MI.getOperand(3).getReg(); ++OpOffset; } castBufferRsrcArgToV4I32(MI, B, 3 + OpOffset); Register RSrc = MI.getOperand(3 + OpOffset).getReg(); - const unsigned NumVIndexOps = (IsCmpSwap ? 8 : 7) + HasReturn; + const unsigned NumVIndexOps = IsCmpSwap ? 9 : 8; // The struct intrinsic variants add one additional operand over raw. const bool HasVIndex = MI.getNumOperands() == NumVIndexOps; @@ -5924,12 +5916,9 @@ bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI, unsigned ImmOffset; std::tie(VOffset, ImmOffset) = splitBufferOffsets(B, VOffset); - auto MIB = B.buildInstr(getBufferAtomicPseudo(IID)); - - if (HasReturn) - MIB.addDef(Dst); - - MIB.addUse(VData); // vdata + auto MIB = B.buildInstr(getBufferAtomicPseudo(IID)) + .addDef(Dst) + .addUse(VData); // vdata if (IsCmpSwap) MIB.addReg(CmpVal); diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 2e4708205523b..9fdd6f04d2a0f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3572,8 +3572,8 @@ def G_AMDGPU_ATOMIC_FMIN : G_ATOMICRMW_OP; def G_AMDGPU_ATOMIC_FMAX : G_ATOMICRMW_OP; } -class BufferAtomicGenericInstruction : AMDGPUGenericInstruction { - let OutOperandList = !if(NoRtn, (outs), (outs type0:$dst)); +class BufferAtomicGenericInstruction : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$vdata, type1:$rsrc, type2:$vindex, type2:$voffset, type2:$soffset, untyped_imm_0:$offset, untyped_imm_0:$cachepolicy, untyped_imm_0:$idxen); From 096eba148df7dcddf9872544fbf510a2c1a9785c Mon Sep 17 00:00:00 2001 From: Ivan Kosarev Date: Tue, 17 Oct 2023 16:54:29 +0300 Subject: [PATCH 336/720] [TargetParser][AMDGPU] Fix getArchEntry(). (#69222) It's supposed to return null when an unknown target id is passed. --- llvm/lib/TargetParser/TargetParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index fb7ede1b37e60..8ab48825d1b96 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -133,7 +133,7 @@ const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef Table) { return A.Kind < B.Kind; }); - if (I == Table.end()) + if (I == Table.end() || I->Kind != Search.Kind) return nullptr; return I; } From fc53b1abf7d5e54012ea77a9bc8f6ccb7b487f13 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 17 Oct 2023 10:00:32 -0400 Subject: [PATCH 337/720] [CUDA][HIP] Fix init var diag in temmplate (#69081) Currently clang diagnoses the following code: (https://godbolt.org/z/s8zK3E5P5) but nvcc does not. ` struct A { constexpr A(){} }; struct B { A a; int b; }; template __global__ void kernel( ) { __shared__ B x; } ` Clang generates an implicit trivial ctor for struct B, which should be allowed for initializing a shared variable. However, the body of the ctor is defined only if the template kernel is instantiated. Clang checks the initialization of variable in non-instantiated templates, where it cannot find the body of the ctor, therefore diagnoses it. This patch skips the check for non-instantiated templates. --- clang/lib/Sema/SemaCUDA.cpp | 7 +++ .../test/SemaCUDA/Inputs/cuda-initializers.h | 11 +++++ clang/test/SemaCUDA/device-var-init.cu | 48 +++++++++++++++++++ 3 files changed, 66 insertions(+) diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index 7c4083e4ec4d4..d993499cf4a6e 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -632,6 +632,13 @@ bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD, } // namespace void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { + // Return early if VD is inside a non-instantiated template function since + // the implicit constructor is not defined yet. + if (const FunctionDecl *FD = + dyn_cast_or_null(VD->getDeclContext())) + if (FD->isDependentContext()) + return; + // Do not check dependent variables since the ctor/dtor/initializer are not // determined. Do it after instantiation. if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() || diff --git a/clang/test/SemaCUDA/Inputs/cuda-initializers.h b/clang/test/SemaCUDA/Inputs/cuda-initializers.h index 837b726a13e0f..b1e7a1bd48fb5 100644 --- a/clang/test/SemaCUDA/Inputs/cuda-initializers.h +++ b/clang/test/SemaCUDA/Inputs/cuda-initializers.h @@ -143,3 +143,14 @@ struct T_F_NED { struct T_FA_NED { NED ned[2]; }; + +// contexpr empty ctor -- allowed +struct CEEC { + constexpr CEEC() {} +}; + +// Compiler generated trivial ctor -- allowed +struct CGTC { + CEEC ceec; + int a; +}; diff --git a/clang/test/SemaCUDA/device-var-init.cu b/clang/test/SemaCUDA/device-var-init.cu index 9d499bddbe1b3..ee7a9e2276f2d 100644 --- a/clang/test/SemaCUDA/device-var-init.cu +++ b/clang/test/SemaCUDA/device-var-init.cu @@ -31,6 +31,14 @@ __device__ ECD d_ecd_i{}; __shared__ ECD s_ecd_i{}; __constant__ ECD c_ecd_i{}; +__device__ CEEC d_ceec; +__shared__ CEEC s_ceec; +__constant__ CEEC c_ceec; + +__device__ CGTC d_cgtc; +__shared__ CGTC s_cgtc; +__constant__ CGTC c_cgtc; + __device__ EC d_ec_i(3); // expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables.}} __shared__ EC s_ec_i(3); @@ -213,6 +221,17 @@ __device__ void df_sema() { static const __device__ int cds = 1; static const __constant__ int cdc = 1; + for (int i = 0; i < 10; i++) { + static __device__ CEEC sd_ceec; + static __shared__ CEEC ss_ceec; + static __constant__ CEEC sc_ceec; + __shared__ CEEC s_ceec; + + static __device__ CGTC sd_cgtc; + static __shared__ CGTC ss_cgtc; + static __constant__ CGTC sc_cgtc; + __shared__ CGTC s_cgtc; + } // __shared__ does not need to be explicitly static. __shared__ int lsi; @@ -431,6 +450,35 @@ template __global__ void bar() { __shared__ T bad; // expected-error@-1 {{initialization is not supported for __shared__ variables.}} + for (int i = 0; i < 10; i++) { + static __device__ CEEC sd_ceec; + static __shared__ CEEC ss_ceec; + static __constant__ CEEC sc_ceec; + __shared__ CEEC s_ceec; + + static __device__ CGTC sd_cgtc; + static __shared__ CGTC ss_cgtc; + static __constant__ CGTC sc_cgtc; + __shared__ CGTC s_cgtc; + } +} + +// Check specialization of template function. +template <> +__global__ void bar() { + __shared__ NontrivialInitializer bad; +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} + for (int i = 0; i < 10; i++) { + static __device__ CEEC sd_ceec; + static __shared__ CEEC ss_ceec; + static __constant__ CEEC sc_ceec; + __shared__ CEEC s_ceec; + + static __device__ CGTC sd_cgtc; + static __shared__ CGTC ss_cgtc; + static __constant__ CGTC sc_cgtc; + __shared__ CGTC s_cgtc; + } } void instantiate() { From 81d8fa5a1d01e1cd00865966957dba74b5e8613f Mon Sep 17 00:00:00 2001 From: Caroline Concatto Date: Tue, 17 Oct 2023 09:49:08 +0000 Subject: [PATCH 338/720] [Clang][SVE2.1] Add svcntp prototype As described in: https://github.com/ARM-software/acle/pull/257 Patch by : David Sherwood Reviewed By: sdesmalen Differential Revision: https://reviews.llvm.org/D150961 --- clang/include/clang/Basic/arm_sve.td | 2 + clang/include/clang/Basic/arm_sve_sme_incl.td | 1 + clang/lib/Sema/SemaChecking.cpp | 5 + .../acle_sve2p1_cntp.c | 119 ++++++++++++++++++ .../acle_sve2p1_imm.cpp | 18 +++ 5 files changed, 145 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c create mode 100644 clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 894a0a1296b04..07dc8cdece990 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1867,4 +1867,6 @@ def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_ let TargetGuard = "sve2p1" in { def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sclamp", [], []>; def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [], []>; +def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>; + } diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td index 74c9b9266771b..da15f1fb31847 100644 --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -246,6 +246,7 @@ def ImmCheck0_3 : ImmCheckType<15>; // 0..3 def ImmCheck0_0 : ImmCheckType<16>; // 0..0 def ImmCheck0_15 : ImmCheckType<17>; // 0..15 def ImmCheck0_255 : ImmCheckType<18>; // 0..255 +def ImmCheck2_4_Mul2 : ImmCheckType<19>; // 2, 4 class ImmCheck { int Arg = arg; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index e121da8fac6d9..31b7e6cc8b892 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3120,6 +3120,11 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 255)) HasError = true; break; + case SVETypeFlags::ImmCheck2_4_Mul2: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 2, 4) || + SemaBuiltinConstantArgMultiple(TheCall, ArgNum, 2)) + HasError = true; + break; } } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c new file mode 100644 index 0000000000000..18973a6467450 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c @@ -0,0 +1,119 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +// CHECK-LABEL: @test_svcntp_c8_vlx2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c8(target("aarch64.svcount") [[PNN:%.*]], i32 2) +// CHECK-NEXT: ret i64 [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svcntp_c8_vlx2u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c8(target("aarch64.svcount") [[PNN:%.*]], i32 2) +// CPP-CHECK-NEXT: ret i64 [[TMP0]] +// +uint64_t test_svcntp_c8_vlx2(svcount_t pnn) { + return svcntp_c8(pnn, 2); +} + +// CHECK-LABEL: @test_svcntp_c8_vlx4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c8(target("aarch64.svcount") [[PNN:%.*]], i32 4) +// CHECK-NEXT: ret i64 [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svcntp_c8_vlx4u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c8(target("aarch64.svcount") [[PNN:%.*]], i32 4) +// CPP-CHECK-NEXT: ret i64 [[TMP0]] +// +uint64_t test_svcntp_c8_vlx4(svcount_t pnn) { + return svcntp_c8(pnn, 4); +} + +// CHECK-LABEL: @test_svcntp_c16_vlx2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c16(target("aarch64.svcount") [[PNN:%.*]], i32 2) +// CHECK-NEXT: ret i64 [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svcntp_c16_vlx2u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c16(target("aarch64.svcount") [[PNN:%.*]], i32 2) +// CPP-CHECK-NEXT: ret i64 [[TMP0]] +// +uint64_t test_svcntp_c16_vlx2(svcount_t pnn) { + return svcntp_c16(pnn, 2); +} + +// CHECK-LABEL: @test_svcntp_c16_vlx4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c16(target("aarch64.svcount") [[PNN:%.*]], i32 4) +// CHECK-NEXT: ret i64 [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svcntp_c16_vlx4u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c16(target("aarch64.svcount") [[PNN:%.*]], i32 4) +// CPP-CHECK-NEXT: ret i64 [[TMP0]] +// +uint64_t test_svcntp_c16_vlx4(svcount_t pnn) { + return svcntp_c16(pnn, 4); +} + +// CHECK-LABEL: @test_svcntp_c32_vlx2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c32(target("aarch64.svcount") [[PNN:%.*]], i32 2) +// CHECK-NEXT: ret i64 [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svcntp_c32_vlx2u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c32(target("aarch64.svcount") [[PNN:%.*]], i32 2) +// CPP-CHECK-NEXT: ret i64 [[TMP0]] +// +uint64_t test_svcntp_c32_vlx2(svcount_t pnn) { + return svcntp_c32(pnn, 2); +} + +// CHECK-LABEL: @test_svcntp_c32_vlx4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c32(target("aarch64.svcount") [[PNN:%.*]], i32 4) +// CHECK-NEXT: ret i64 [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svcntp_c32_vlx4u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c32(target("aarch64.svcount") [[PNN:%.*]], i32 4) +// CPP-CHECK-NEXT: ret i64 [[TMP0]] +// +uint64_t test_svcntp_c32_vlx4(svcount_t pnn) { + return svcntp_c32(pnn, 4); +} + +// CHECK-LABEL: @test_svcntp_c64_vlx2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c64(target("aarch64.svcount") [[PNN:%.*]], i32 2) +// CHECK-NEXT: ret i64 [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svcntp_c64_vlx2u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c64(target("aarch64.svcount") [[PNN:%.*]], i32 2) +// CPP-CHECK-NEXT: ret i64 [[TMP0]] +// +uint64_t test_svcntp_c64_vlx2(svcount_t pnn) { + return svcntp_c64(pnn, 2); +} + +// CHECK-LABEL: @test_svcntp_c64_vlx4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c64(target("aarch64.svcount") [[PNN:%.*]], i32 4) +// CHECK-NEXT: ret i64 [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svcntp_c64_vlx4u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c64(target("aarch64.svcount") [[PNN:%.*]], i32 4) +// CPP-CHECK-NEXT: ret i64 [[TMP0]] +// +uint64_t test_svcntp_c64_vlx4(svcount_t pnn) { + return svcntp_c64(pnn, 4); +} diff --git a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp new file mode 100644 index 0000000000000..781757a2b9c23 --- /dev/null +++ b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -triple aarch14-none-linux-gnu -target-feature +sve2p1 -fsyntax-only -verify %s + +// REQUIRES: aarch14-registered-target + +#include + +void test_cntp(svcount_t c) { + svcntp_c8(c, 1); // expected-error {{argument value 1 is outside the valid range [2, 4]}} + svcntp_c11(c, 1); // expected-error {{argument value 1 is outside the valid range [2, 4]}} + svcntp_c32(c, 1); // expected-error {{argument value 1 is outside the valid range [2, 4]}} + svcntp_c14(c, 1); // expected-error {{argument value 1 is outside the valid range [2, 4]}} + + svcntp_c8(c, 3); // expected-error {{argument should be a multiple of 2}} + svcntp_c11(c, 3); // expected-error {{argument should be a multiple of 2}} + svcntp_c32(c, 3); // expected-error {{argument should be a multiple of 2}} + svcntp_c14(c, 3); // expected-error {{argument should be a multiple of 2}} +} + From 088d272e83259a5d8e577a3d2e62012c42a9f9db Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Tue, 17 Oct 2023 15:08:58 +0100 Subject: [PATCH 339/720] [ADT][DebugInfo][RemoveDIs] Add extra bits to ilist_iterator for debug-info ...behind an experimental CMAKE option that's off by default. This patch adds a new ilist-iterator-like class that can carry two extra bits as well as the usual node pointer. This is part of the project to remove debug-intrinsics from LLVM: see the rationale here [0], they're needed to signal whether a "position" in a BasicBlock includes any debug-info before or after the iterator. This entirely duplicates ilist_iterator, attempting re-use showed it to be a false economy. It's enable-able through the existing ilist_node options interface, hence a few sites where the instruction-list type needs to be updated. The actual main feature, the extra bits in the class, aren't part of the class unless the cmake flag is given: this is because there's a compile-time cost associated with it, and I'd like to get everything in-tree but off-by-default so that we can do proper comparisons. Nothing actually makes use of this yet, but will do soon, see the Phab patch stack. [0] https://discourse.llvm.org/t/rfc-instruction-api-changes-needed-to-eliminate-debug-intrinsics-from-ir/68939 Differential Revision: https://reviews.llvm.org/D153777 --- llvm/CMakeLists.txt | 3 + llvm/cmake/modules/HandleLLVMOptions.cmake | 4 + llvm/include/llvm/ADT/ilist_iterator.h | 191 +++++++++++++++++++ llvm/include/llvm/ADT/ilist_node.h | 35 +++- llvm/include/llvm/ADT/ilist_node_options.h | 29 ++- llvm/include/llvm/ADT/simple_ilist.h | 16 +- llvm/include/llvm/IR/BasicBlock.h | 28 ++- llvm/include/llvm/IR/GlobalAlias.h | 2 +- llvm/include/llvm/IR/GlobalIFunc.h | 2 +- llvm/include/llvm/IR/GlobalVariable.h | 2 +- llvm/include/llvm/IR/Instruction.h | 23 ++- llvm/include/llvm/IR/Instructions.h | 2 +- llvm/include/llvm/IR/SymbolTableListTraits.h | 16 +- llvm/include/llvm/IR/ValueSymbolTable.h | 5 +- llvm/lib/IR/BasicBlock.cpp | 15 +- llvm/lib/IR/Instruction.cpp | 5 +- llvm/lib/IR/Instructions.cpp | 2 +- llvm/lib/IR/SymbolTableListTraitsImpl.h | 20 +- llvm/unittests/ADT/CMakeLists.txt | 1 + llvm/unittests/ADT/IListIteratorBitsTest.cpp | 138 ++++++++++++++ 20 files changed, 483 insertions(+), 56 deletions(-) create mode 100644 llvm/unittests/ADT/IListIteratorBitsTest.cpp diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 103c08ffbe83b..ef2f2146a0364 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -643,6 +643,9 @@ option(LLVM_USE_OPROFILE option(LLVM_EXTERNALIZE_DEBUGINFO "Generate dSYM files and strip executables and libraries (Darwin Only)" OFF) +option(LLVM_EXPERIMENTAL_DEBUGINFO_ITERATORS + "Add extra Booleans to ilist_iterators to communicate facts for debug-info" OFF) + set(LLVM_CODESIGNING_IDENTITY "" CACHE STRING "Sign executables and dylibs with the given identity or skip if empty (Darwin Only)") diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 19cb881adc3fa..56b63cb5acb81 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -109,6 +109,10 @@ if(LLVM_ENABLE_EXPENSIVE_CHECKS) endif() endif() +if(LLVM_EXPERIMENTAL_DEBUGINFO_ITERATORS) + add_compile_definitions(EXPERIMENTAL_DEBUGINFO_ITERATORS) +endif() + if (LLVM_ENABLE_STRICT_FIXED_SIZE_VECTORS) add_compile_definitions(STRICT_FIXED_SIZE_VECTORS) endif() diff --git a/llvm/include/llvm/ADT/ilist_iterator.h b/llvm/include/llvm/ADT/ilist_iterator.h index be876347907bb..9047b9b73959e 100644 --- a/llvm/include/llvm/ADT/ilist_iterator.h +++ b/llvm/include/llvm/ADT/ilist_iterator.h @@ -175,6 +175,185 @@ class ilist_iterator : ilist_detail::SpecificNodeAccess { bool isEnd() const { return NodePtr ? NodePtr->isSentinel() : false; } }; +/// Iterator for intrusive lists based on ilist_node. Much like ilist_iterator, +/// but with the addition of two bits recording whether this position (when in +/// a range) is half or fully open. +template +class ilist_iterator_w_bits : ilist_detail::SpecificNodeAccess { + friend ilist_iterator_w_bits; + friend ilist_iterator_w_bits; + friend ilist_iterator; + + using Traits = ilist_detail::IteratorTraits; + using Access = ilist_detail::SpecificNodeAccess; + +public: + using value_type = typename Traits::value_type; + using pointer = typename Traits::pointer; + using reference = typename Traits::reference; + using difference_type = ptrdiff_t; + using iterator_category = std::bidirectional_iterator_tag; + using const_pointer = typename OptionsT::const_pointer; + using const_reference = typename OptionsT::const_reference; + +private: + using node_pointer = typename Traits::node_pointer; + using node_reference = typename Traits::node_reference; + + node_pointer NodePtr = nullptr; + +#ifdef EXPERIMENTAL_DEBUGINFO_ITERATORS + // (Default: Off) Allow extra position-information flags to be stored + // in iterators, in aid of removing debug-info intrinsics from LLVM. + + /// Is this position intended to contain any debug-info immediately before + /// the position? + mutable bool HeadInclusiveBit = false; + /// Is this position intended to contain any debug-info immediately after + /// the position? + mutable bool TailInclusiveBit = false; +#endif + +public: + /// Create from an ilist_node. + explicit ilist_iterator_w_bits(node_reference N) : NodePtr(&N) {} + + explicit ilist_iterator_w_bits(pointer NP) + : NodePtr(Access::getNodePtr(NP)) {} + explicit ilist_iterator_w_bits(reference NR) + : NodePtr(Access::getNodePtr(&NR)) {} + ilist_iterator_w_bits() = default; + + // This is templated so that we can allow constructing a const iterator from + // a nonconst iterator... + template + ilist_iterator_w_bits( + const ilist_iterator_w_bits &RHS, + std::enable_if_t = nullptr) + : NodePtr(RHS.NodePtr) { +#ifdef EXPERIMENTAL_DEBUGINFO_ITERATORS + HeadInclusiveBit = RHS.HeadInclusiveBit; + TailInclusiveBit = RHS.TailInclusiveBit; +#endif + } + + // This is templated so that we can allow assigning to a const iterator from + // a nonconst iterator... + template + std::enable_if_t + operator=(const ilist_iterator_w_bits &RHS) { + NodePtr = RHS.NodePtr; +#ifdef EXPERIMENTAL_DEBUGINFO_ITERATORS + HeadInclusiveBit = RHS.HeadInclusiveBit; + TailInclusiveBit = RHS.TailInclusiveBit; +#endif + return *this; + } + + /// Explicit conversion between forward/reverse iterators. + /// + /// Translate between forward and reverse iterators without changing range + /// boundaries. The resulting iterator will dereference (and have a handle) + /// to the previous node, which is somewhat unexpected; but converting the + /// two endpoints in a range will give the same range in reverse. + /// + /// This matches std::reverse_iterator conversions. + explicit ilist_iterator_w_bits( + const ilist_iterator_w_bits &RHS) + : ilist_iterator_w_bits(++RHS.getReverse()) {} + + /// Get a reverse iterator to the same node. + /// + /// Gives a reverse iterator that will dereference (and have a handle) to the + /// same node. Converting the endpoint iterators in a range will give a + /// different range; for range operations, use the explicit conversions. + ilist_iterator_w_bits getReverse() const { + if (NodePtr) + return ilist_iterator_w_bits(*NodePtr); + return ilist_iterator_w_bits(); + } + + /// Const-cast. + ilist_iterator_w_bits getNonConst() const { + if (NodePtr) { + auto New = ilist_iterator_w_bits( + const_cast::node_reference>( + *NodePtr)); +#ifdef EXPERIMENTAL_DEBUGINFO_ITERATORS + New.HeadInclusiveBit = HeadInclusiveBit; + New.TailInclusiveBit = TailInclusiveBit; +#endif + return New; + } + return ilist_iterator_w_bits(); + } + + // Accessors... + reference operator*() const { + assert(!NodePtr->isKnownSentinel()); + return *Access::getValuePtr(NodePtr); + } + pointer operator->() const { return &operator*(); } + + // Comparison operators + friend bool operator==(const ilist_iterator_w_bits &LHS, + const ilist_iterator_w_bits &RHS) { + return LHS.NodePtr == RHS.NodePtr; + } + friend bool operator!=(const ilist_iterator_w_bits &LHS, + const ilist_iterator_w_bits &RHS) { + return LHS.NodePtr != RHS.NodePtr; + } + + // Increment and decrement operators... + ilist_iterator_w_bits &operator--() { + NodePtr = IsReverse ? NodePtr->getNext() : NodePtr->getPrev(); +#ifdef EXPERIMENTAL_DEBUGINFO_ITERATORS + HeadInclusiveBit = false; + TailInclusiveBit = false; +#endif + return *this; + } + ilist_iterator_w_bits &operator++() { + NodePtr = IsReverse ? NodePtr->getPrev() : NodePtr->getNext(); +#ifdef EXPERIMENTAL_DEBUGINFO_ITERATORS + HeadInclusiveBit = false; + TailInclusiveBit = false; +#endif + return *this; + } + ilist_iterator_w_bits operator--(int) { + ilist_iterator_w_bits tmp = *this; + --*this; + return tmp; + } + ilist_iterator_w_bits operator++(int) { + ilist_iterator_w_bits tmp = *this; + ++*this; + return tmp; + } + + /// Get the underlying ilist_node. + node_pointer getNodePtr() const { return static_cast(NodePtr); } + + /// Check for end. Only valid if ilist_sentinel_tracking. + bool isEnd() const { return NodePtr ? NodePtr->isSentinel() : false; } + +#ifdef EXPERIMENTAL_DEBUGINFO_ITERATORS + bool getHeadBit() const { return HeadInclusiveBit; } + bool getTailBit() const { return TailInclusiveBit; } + void setHeadBit(bool SetBit) const { HeadInclusiveBit = SetBit; } + void setTailBit(bool SetBit) const { TailInclusiveBit = SetBit; } +#else + // Store and return no information if we're not using this feature. + bool getHeadBit() const { return false; } + bool getTailBit() const { return false; } + void setHeadBit(bool SetBit) const { (void)SetBit; } + void setTailBit(bool SetBit) const { (void)SetBit; } +#endif +}; + template struct simplify_type; /// Allow ilist_iterators to convert into pointers to a node automatically when @@ -192,6 +371,18 @@ template struct simplify_type> : simplify_type> {}; +// ilist_iterator_w_bits should also be accessible via isa/dyn_cast. +template +struct simplify_type> { + using iterator = ilist_iterator_w_bits; + using SimpleType = typename iterator::pointer; + + static SimpleType getSimplifiedValue(const iterator &Node) { return &*Node; } +}; +template +struct simplify_type> + : simplify_type> {}; + } // end namespace llvm #endif // LLVM_ADT_ILIST_ITERATOR_H diff --git a/llvm/include/llvm/ADT/ilist_node.h b/llvm/include/llvm/ADT/ilist_node.h index 7856b1c0d410e..3b6f0dcc7b5e9 100644 --- a/llvm/include/llvm/ADT/ilist_node.h +++ b/llvm/include/llvm/ADT/ilist_node.h @@ -27,8 +27,22 @@ struct NodeAccess; } // end namespace ilist_detail template class ilist_iterator; +template +class ilist_iterator_w_bits; template class ilist_sentinel; +// Selector for which iterator type to pick given the iterator-bits node option. +template +class ilist_select_iterator_type { +public: + using type = ilist_iterator; +}; +template +class ilist_select_iterator_type { +public: + using type = ilist_iterator_w_bits; +}; + /// Implementation for an ilist node. /// /// Templated on an appropriate \a ilist_detail::node_options, usually computed @@ -45,16 +59,29 @@ template class ilist_node_impl : OptionsT::node_base_type { friend typename OptionsT::list_base_type; friend struct ilist_detail::NodeAccess; friend class ilist_sentinel; + friend class ilist_iterator; friend class ilist_iterator; friend class ilist_iterator; friend class ilist_iterator; + friend class ilist_iterator_w_bits; + friend class ilist_iterator_w_bits; + friend class ilist_iterator_w_bits; + friend class ilist_iterator_w_bits; protected: - using self_iterator = ilist_iterator; - using const_self_iterator = ilist_iterator; - using reverse_self_iterator = ilist_iterator; - using const_reverse_self_iterator = ilist_iterator; + using self_iterator = + typename ilist_select_iterator_type::type; + using const_self_iterator = + typename ilist_select_iterator_type::type; + using reverse_self_iterator = + typename ilist_select_iterator_type::type; + using const_reverse_self_iterator = + typename ilist_select_iterator_type::type; ilist_node_impl() = default; diff --git a/llvm/include/llvm/ADT/ilist_node_options.h b/llvm/include/llvm/ADT/ilist_node_options.h index 05340d344e399..e6e1068953e36 100644 --- a/llvm/include/llvm/ADT/ilist_node_options.h +++ b/llvm/include/llvm/ADT/ilist_node_options.h @@ -31,6 +31,14 @@ template struct ilist_sentinel_tracking {}; /// simultaneously. See \a ilist_node for usage examples. template struct ilist_tag {}; +/// Option to add extra bits to the ilist_iterator. +/// +/// Some use-cases (debug-info) need to know whether a position is intended +/// to be half-open or fully open, i.e. whether to include any immediately +/// adjacent debug-info in an operation. This option adds two bits to the +/// iterator class to store that information. +template struct ilist_iterator_bits {}; + namespace ilist_detail { /// Helper trait for recording whether an option is specified explicitly. @@ -91,6 +99,21 @@ template <> struct extract_tag<> { }; template struct is_valid_option> : std::true_type {}; +/// Extract iterator bits option. +/// +/// Look through \p Options for the \a ilist_iterator_bits option. Defaults +/// to false. +template struct extract_iterator_bits; +template +struct extract_iterator_bits, Options...> + : std::integral_constant {}; +template +struct extract_iterator_bits + : extract_iterator_bits {}; +template <> struct extract_iterator_bits<> : std::false_type, is_implicit {}; +template +struct is_valid_option> : std::true_type {}; + /// Check whether options are valid. /// /// The conjunction of \a is_valid_option on each individual option. @@ -105,7 +128,7 @@ struct check_options /// /// This is usually computed via \a compute_node_options. template + class TagT, bool HasIteratorBits> struct node_options { typedef T value_type; typedef T *pointer; @@ -115,6 +138,7 @@ struct node_options { static const bool enable_sentinel_tracking = EnableSentinelTracking; static const bool is_sentinel_tracking_explicit = IsSentinelTrackingExplicit; + static const bool has_iterator_bits = HasIteratorBits; typedef TagT tag; typedef ilist_node_base node_base_type; typedef ilist_base list_base_type; @@ -123,7 +147,8 @@ struct node_options { template struct compute_node_options { typedef node_options::value, extract_sentinel_tracking::is_explicit, - typename extract_tag::type> + typename extract_tag::type, + extract_iterator_bits::value> type; }; diff --git a/llvm/include/llvm/ADT/simple_ilist.h b/llvm/include/llvm/ADT/simple_ilist.h index 3a96e1ba56575..7236b3fa5a7d2 100644 --- a/llvm/include/llvm/ADT/simple_ilist.h +++ b/llvm/include/llvm/ADT/simple_ilist.h @@ -92,10 +92,18 @@ class simple_ilist using reference = typename OptionsT::reference; using const_pointer = typename OptionsT::const_pointer; using const_reference = typename OptionsT::const_reference; - using iterator = ilist_iterator; - using const_iterator = ilist_iterator; - using reverse_iterator = ilist_iterator; - using const_reverse_iterator = ilist_iterator; + using iterator = + typename ilist_select_iterator_type::type; + using const_iterator = + typename ilist_select_iterator_type::type; + using reverse_iterator = + typename ilist_select_iterator_type::type; + using const_reverse_iterator = + typename ilist_select_iterator_type::type; using size_type = size_t; using difference_type = ptrdiff_t; diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h index b031f72493e13..ab291c24e5b6c 100644 --- a/llvm/include/llvm/IR/BasicBlock.h +++ b/llvm/include/llvm/IR/BasicBlock.h @@ -55,7 +55,7 @@ class ValueSymbolTable; class BasicBlock final : public Value, // Basic blocks are data objects also public ilist_node_with_parent { public: - using InstListType = SymbolTableList; + using InstListType = SymbolTableList>; private: friend class BlockAddress; @@ -91,11 +91,13 @@ class BasicBlock final : public Value, // Basic blocks are data objects also // These functions and classes need access to the instruction list. friend void Instruction::removeFromParent(); - friend iplist::iterator Instruction::eraseFromParent(); + friend BasicBlock::iterator Instruction::eraseFromParent(); friend BasicBlock::iterator Instruction::insertInto(BasicBlock *BB, BasicBlock::iterator It); - friend class llvm::SymbolTableListTraits; - friend class llvm::ilist_node_with_parent; + friend class llvm::SymbolTableListTraits>; + friend class llvm::ilist_node_with_parent>; /// Creates a new BasicBlock. /// @@ -178,7 +180,8 @@ class BasicBlock final : public Value, // Basic blocks are data objects also InstListType::const_iterator getFirstNonPHIIt() const; InstListType::iterator getFirstNonPHIIt() { BasicBlock::iterator It = - static_cast(this)->getFirstNonPHIIt().getNonConst(); + static_cast(this)->getFirstNonPHIIt().getNonConst(); + It.setHeadBit(true); return It; } @@ -332,8 +335,19 @@ class BasicBlock final : public Value, // Basic blocks are data objects also //===--------------------------------------------------------------------===// /// Instruction iterator methods /// - inline iterator begin() { return InstList.begin(); } - inline const_iterator begin() const { return InstList.begin(); } + inline iterator begin() { + iterator It = InstList.begin(); + // Set the head-inclusive bit to indicate that this iterator includes + // any debug-info at the start of the block. This is a no-op unless the + // appropriate CMake flag is set. + It.setHeadBit(true); + return It; + } + inline const_iterator begin() const { + const_iterator It = InstList.begin(); + It.setHeadBit(true); + return It; + } inline iterator end () { return InstList.end(); } inline const_iterator end () const { return InstList.end(); } diff --git a/llvm/include/llvm/IR/GlobalAlias.h b/llvm/include/llvm/IR/GlobalAlias.h index de405da5ca231..583d66e28155d 100644 --- a/llvm/include/llvm/IR/GlobalAlias.h +++ b/llvm/include/llvm/IR/GlobalAlias.h @@ -23,7 +23,7 @@ namespace llvm { class Twine; class Module; -template class SymbolTableListTraits; +template class SymbolTableListTraits; class GlobalAlias : public GlobalValue, public ilist_node { friend class SymbolTableListTraits; diff --git a/llvm/include/llvm/IR/GlobalIFunc.h b/llvm/include/llvm/IR/GlobalIFunc.h index c148ee7907789..4d1982da0baff 100644 --- a/llvm/include/llvm/IR/GlobalIFunc.h +++ b/llvm/include/llvm/IR/GlobalIFunc.h @@ -29,7 +29,7 @@ class Twine; class Module; // Traits class for using GlobalIFunc in symbol table in Module. -template class SymbolTableListTraits; +template class SymbolTableListTraits; class GlobalIFunc final : public GlobalObject, public ilist_node { friend class SymbolTableListTraits; diff --git a/llvm/include/llvm/IR/GlobalVariable.h b/llvm/include/llvm/IR/GlobalVariable.h index 03c680e4f9558..f915dba5c6595 100644 --- a/llvm/include/llvm/IR/GlobalVariable.h +++ b/llvm/include/llvm/IR/GlobalVariable.h @@ -33,7 +33,7 @@ namespace llvm { class Constant; class Module; -template class SymbolTableListTraits; +template class SymbolTableListTraits; class DIGlobalVariableExpression; class GlobalVariable : public GlobalObject, public ilist_node { diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index 69c3af5b76103..af7aa791cb6da 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -39,7 +39,11 @@ template <> struct ilist_alloc_traits { }; class Instruction : public User, - public ilist_node_with_parent { + public ilist_node_with_parent> { +public: + using InstListType = SymbolTableList>; +private: BasicBlock *Parent; DebugLoc DbgLoc; // 'dbg' Metadata cache. @@ -118,12 +122,12 @@ class Instruction : public User, /// This method unlinks 'this' from the containing basic block and deletes it. /// /// \returns an iterator pointing to the element after the erased one - SymbolTableList::iterator eraseFromParent(); + InstListType::iterator eraseFromParent(); /// Insert an unlinked instruction into a basic block immediately before /// the specified instruction. void insertBefore(Instruction *InsertPos); - void insertBefore(SymbolTableList::iterator InsertPos) { + void insertBefore(InstListType::iterator InsertPos) { insertBefore(&*InsertPos); } @@ -133,11 +137,10 @@ class Instruction : public User, /// Inserts an unlinked instruction into \p ParentBB at position \p It and /// returns the iterator of the inserted instruction. - SymbolTableList::iterator - insertInto(BasicBlock *ParentBB, SymbolTableList::iterator It); + InstListType::iterator insertInto(BasicBlock *ParentBB, + InstListType::iterator It); - void insertBefore(BasicBlock &BB, - SymbolTableList::iterator InsertPos) { + void insertBefore(BasicBlock &BB, InstListType::iterator InsertPos) { insertInto(&BB, InsertPos); } @@ -157,10 +160,10 @@ class Instruction : public User, /// Unlink this instruction and insert into BB before I. /// /// \pre I is a valid iterator into BB. - void moveBefore(BasicBlock &BB, SymbolTableList::iterator I); + void moveBefore(BasicBlock &BB, InstListType::iterator I); /// (See other overload for moveBeforePreserving). - void moveBeforePreserving(BasicBlock &BB, SymbolTableList::iterator I) { + void moveBeforePreserving(BasicBlock &BB, InstListType::iterator I) { moveBefore(BB, I); } @@ -902,7 +905,7 @@ class Instruction : public User, }; private: - friend class SymbolTableListTraits; + friend class SymbolTableListTraits>; friend class BasicBlock; // For renumbering. // Shadow Value::setValueSubclassData with a private forwarding method so that diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index c85727ce30a94..af6ac566a0192 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -3661,7 +3661,7 @@ class SwitchInstProfUpdateWrapper { /// Delegate the call to the underlying SwitchInst::eraseFromParent() and mark /// this object to not touch the underlying SwitchInst in destructor. - SymbolTableList::iterator eraseFromParent(); + Instruction::InstListType::iterator eraseFromParent(); void setSuccessorWeight(unsigned idx, CaseWeightOpt W); CaseWeightOpt getSuccessorWeight(unsigned idx); diff --git a/llvm/include/llvm/IR/SymbolTableListTraits.h b/llvm/include/llvm/IR/SymbolTableListTraits.h index 8af712374bfaf..bd31fca5e525b 100644 --- a/llvm/include/llvm/IR/SymbolTableListTraits.h +++ b/llvm/include/llvm/IR/SymbolTableListTraits.h @@ -57,15 +57,16 @@ DEFINE_SYMBOL_TABLE_PARENT_TYPE(GlobalAlias, Module) DEFINE_SYMBOL_TABLE_PARENT_TYPE(GlobalIFunc, Module) #undef DEFINE_SYMBOL_TABLE_PARENT_TYPE -template class SymbolTableList; +template class SymbolTableList; // ValueSubClass - The type of objects that I hold, e.g. Instruction. // ItemParentClass - The type of object that owns the list, e.g. BasicBlock. +// OptionsT - Extra options to ilist nodes. // -template +template class SymbolTableListTraits : public ilist_alloc_traits { - using ListTy = SymbolTableList; - using iterator = typename simple_ilist::iterator; + using ListTy = SymbolTableList; + using iterator = typename simple_ilist::iterator; using ItemParentClass = typename SymbolTableListParentType::type; @@ -110,9 +111,10 @@ class SymbolTableListTraits : public ilist_alloc_traits { /// When nodes are inserted into and removed from this list, the associated /// symbol table will be automatically updated. Similarly, parent links get /// updated automatically. -template -class SymbolTableList - : public iplist_impl, SymbolTableListTraits> {}; +template +class SymbolTableList : public iplist_impl, + SymbolTableListTraits> { +}; } // end namespace llvm diff --git a/llvm/include/llvm/IR/ValueSymbolTable.h b/llvm/include/llvm/IR/ValueSymbolTable.h index 43d00268f4b22..6350f6a2435e4 100644 --- a/llvm/include/llvm/IR/ValueSymbolTable.h +++ b/llvm/include/llvm/IR/ValueSymbolTable.h @@ -27,8 +27,9 @@ class GlobalAlias; class GlobalIFunc; class GlobalVariable; class Instruction; +template struct ilist_iterator_bits; template class SmallString; -template class SymbolTableListTraits; +template class SymbolTableListTraits; /// This class provides a symbol table of name/value pairs. It is essentially /// a std::map but has a controlled interface provided by @@ -41,7 +42,7 @@ class ValueSymbolTable { friend class SymbolTableListTraits; friend class SymbolTableListTraits; friend class SymbolTableListTraits; - friend class SymbolTableListTraits; + friend class SymbolTableListTraits>; friend class Value; /// @name Types diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index d6677aa721bb0..46b1a3b37132b 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -42,7 +42,8 @@ template <> void llvm::invalidateParentIListOrdering(BasicBlock *BB) { // Explicit instantiation of SymbolTableListTraits since some of the methods // are not in the public header file... -template class llvm::SymbolTableListTraits; +template class llvm::SymbolTableListTraits>; BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent, BasicBlock *InsertBefore) @@ -221,7 +222,13 @@ const Instruction* BasicBlock::getFirstNonPHI() const { } BasicBlock::const_iterator BasicBlock::getFirstNonPHIIt() const { - return getFirstNonPHI()->getIterator(); + const Instruction *I = getFirstNonPHI(); + BasicBlock::const_iterator It = I->getIterator(); + // Set the head-inclusive bit to indicate that this iterator includes + // any debug-info at the start of the block. This is a no-op unless the + // appropriate CMake flag is set. + It.setHeadBit(true); + return It; } const Instruction *BasicBlock::getFirstNonPHIOrDbg(bool SkipPseudoOp) const { @@ -261,6 +268,10 @@ BasicBlock::const_iterator BasicBlock::getFirstInsertionPt() const { const_iterator InsertPt = FirstNonPHI->getIterator(); if (InsertPt->isEHPad()) ++InsertPt; + // Set the head-inclusive bit to indicate that this iterator includes + // any debug-info at the start of the block. This is a no-op unless the + // appropriate CMake flag is set. + InsertPt.setHeadBit(true); return InsertPt; } diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index b497951a598cc..9b176eb78888e 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -80,7 +80,7 @@ void Instruction::removeFromParent() { getParent()->getInstList().remove(getIterator()); } -iplist::iterator Instruction::eraseFromParent() { +BasicBlock::iterator Instruction::eraseFromParent() { return getParent()->getInstList().erase(getIterator()); } @@ -114,8 +114,7 @@ void Instruction::moveAfter(Instruction *MovePos) { moveBefore(*MovePos->getParent(), ++MovePos->getIterator()); } -void Instruction::moveBefore(BasicBlock &BB, - SymbolTableList::iterator I) { +void Instruction::moveBefore(BasicBlock &BB, InstListType::iterator I) { assert(I == BB.end() || I->getParent() == &BB); BB.splice(I, getParent(), getIterator()); } diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index ece3b58792dd1..2ea9c05de6be2 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -4606,7 +4606,7 @@ void SwitchInstProfUpdateWrapper::addCase( "num of prof branch_weights must accord with num of successors"); } -SymbolTableList::iterator +Instruction::InstListType::iterator SwitchInstProfUpdateWrapper::eraseFromParent() { // Instruction is erased. Mark as unchanged to not touch it in the destructor. Changed = false; diff --git a/llvm/lib/IR/SymbolTableListTraitsImpl.h b/llvm/lib/IR/SymbolTableListTraitsImpl.h index 4283744bd058d..990552f9b65a1 100644 --- a/llvm/lib/IR/SymbolTableListTraitsImpl.h +++ b/llvm/lib/IR/SymbolTableListTraitsImpl.h @@ -28,10 +28,10 @@ template <> void invalidateParentIListOrdering(BasicBlock *BB); /// setSymTabObject - This is called when (f.e.) the parent of a basic block /// changes. This requires us to remove all the instruction symtab entries from /// the current function and reinsert them into the new function. -template +template template -void SymbolTableListTraits::setSymTabObject(TPtr *Dest, - TPtr Src) { +void SymbolTableListTraits::setSymTabObject(TPtr *Dest, + TPtr Src) { // Get the old symtab and value list before doing the assignment. ValueSymbolTable *OldST = getSymTab(getListOwner()); @@ -61,11 +61,11 @@ void SymbolTableListTraits::setSymTabObject(TPtr *Dest, if (I->hasName()) NewST->reinsertValue(&*I); } - } -template -void SymbolTableListTraits::addNodeToList(ValueSubClass *V) { +template +void SymbolTableListTraits::addNodeToList( + ValueSubClass *V) { assert(!V->getParent() && "Value already in a container!!"); ItemParentClass *Owner = getListOwner(); V->setParent(Owner); @@ -75,8 +75,8 @@ void SymbolTableListTraits::addNodeToList(ValueSubClass *V) { ST->reinsertValue(V); } -template -void SymbolTableListTraits::removeNodeFromList( +template +void SymbolTableListTraits::removeNodeFromList( ValueSubClass *V) { V->setParent(nullptr); if (V->hasName()) @@ -84,8 +84,8 @@ void SymbolTableListTraits::removeNodeFromList( ST->removeValueName(V->getValueName()); } -template -void SymbolTableListTraits::transferNodesFromList( +template +void SymbolTableListTraits::transferNodesFromList( SymbolTableListTraits &L2, iterator first, iterator last) { // Transfering nodes, even within the same BB, invalidates the ordering. The // list that we removed the nodes from still has a valid ordering. diff --git a/llvm/unittests/ADT/CMakeLists.txt b/llvm/unittests/ADT/CMakeLists.txt index 42634cef6d301..12d7325036bf0 100644 --- a/llvm/unittests/ADT/CMakeLists.txt +++ b/llvm/unittests/ADT/CMakeLists.txt @@ -35,6 +35,7 @@ add_llvm_unittest(ADTTests HashingTest.cpp IListBaseTest.cpp IListIteratorTest.cpp + IListIteratorBitsTest.cpp IListNodeBaseTest.cpp IListNodeTest.cpp IListSentinelTest.cpp diff --git a/llvm/unittests/ADT/IListIteratorBitsTest.cpp b/llvm/unittests/ADT/IListIteratorBitsTest.cpp new file mode 100644 index 0000000000000..167b30a5e3085 --- /dev/null +++ b/llvm/unittests/ADT/IListIteratorBitsTest.cpp @@ -0,0 +1,138 @@ +//==- unittests/ADT/IListIteratorBitsTest.cpp - ilist_iterator_w_bits tests -=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/simple_ilist.h" +#include "gtest/gtest.h" + +// Test that ilist_iterator_w_bits can be used to store extra information about +// what we're iterating over, that it's only enabled when given the relevant +// option, and it can be fed into various iteration utilities. + +using namespace llvm; + +namespace { + +class dummy; + +struct Node : ilist_node> { + friend class dummy; +}; + +struct PlainNode : ilist_node { + friend class dummy; +}; + +TEST(IListIteratorBitsTest, DefaultConstructor) { + simple_ilist>::iterator I; + simple_ilist>::reverse_iterator RI; + simple_ilist>::const_iterator CI; + simple_ilist>::const_reverse_iterator CRI; + EXPECT_EQ(nullptr, I.getNodePtr()); + EXPECT_EQ(nullptr, CI.getNodePtr()); + EXPECT_EQ(nullptr, RI.getNodePtr()); + EXPECT_EQ(nullptr, CRI.getNodePtr()); + EXPECT_EQ(I, I); + EXPECT_EQ(I, CI); + EXPECT_EQ(CI, I); + EXPECT_EQ(CI, CI); + EXPECT_EQ(RI, RI); + EXPECT_EQ(RI, CRI); + EXPECT_EQ(CRI, RI); + EXPECT_EQ(CRI, CRI); + EXPECT_EQ(I, RI.getReverse()); + EXPECT_EQ(RI, I.getReverse()); +} + +TEST(IListIteratorBitsTest, ConsAndAssignment) { + simple_ilist> L; + Node A; + L.insert(L.end(), A); + + simple_ilist>::iterator I, I2; + +// Two sets of tests: if we've compiled in the iterator bits, then check that +// HeadInclusiveBit and TailInclusiveBit are preserved on assignment and copy +// construction, but not on other operations. +#ifdef EXPERIMENTAL_DEBUGINFO_ITERATORS + I = L.begin(); + EXPECT_FALSE(I.getHeadBit()); + EXPECT_FALSE(I.getTailBit()); + I.setHeadBit(true); + I.setTailBit(true); + EXPECT_TRUE(I.getHeadBit()); + EXPECT_TRUE(I.getTailBit()); + + ++I; + + EXPECT_FALSE(I.getHeadBit()); + EXPECT_FALSE(I.getTailBit()); + + I = L.begin(); + I.setHeadBit(true); + I.setTailBit(true); + I2 = I; + EXPECT_TRUE(I2.getHeadBit()); + EXPECT_TRUE(I2.getTailBit()); + + I = L.begin(); + I.setHeadBit(true); + I.setTailBit(true); + simple_ilist>::iterator I3(I); + EXPECT_TRUE(I3.getHeadBit()); + EXPECT_TRUE(I3.getTailBit()); +#else + // The calls should be available, but shouldn't actually store information. + I = L.begin(); + EXPECT_FALSE(I.getHeadBit()); + EXPECT_FALSE(I.getTailBit()); + I.setHeadBit(true); + I.setTailBit(true); + EXPECT_FALSE(I.getHeadBit()); + EXPECT_FALSE(I.getTailBit()); + // Suppress warnings as we don't test with this variable. + (void)I2; +#endif +} + +class dummy { + // Test that we get an ilist_iterator_w_bits out of the node given that the + // options are enabled. + using node_options = typename ilist_detail::compute_node_options< + Node, ilist_iterator_bits>::type; + static_assert(std::is_same>::value); + + // Now test that a plain node, without the option, gets a plain + // ilist_iterator. + using plain_node_options = + typename ilist_detail::compute_node_options::type; + static_assert(std::is_same< + PlainNode::self_iterator, + llvm::ilist_iterator>::value); +}; + +TEST(IListIteratorBitsTest, RangeIteration) { + // Check that we can feed ilist_iterator_w_bits into make_range and similar. + // Plus, we should be able to convert it to a reverse iterator and use that. + simple_ilist> L; + Node A; + L.insert(L.end(), A); + + for (Node &N : make_range(L.begin(), L.end())) + (void)N; + + simple_ilist>::iterator It = + L.begin()->getIterator(); + auto RevIt = It.getReverse(); + + for (Node &N : make_range(RevIt, L.rend())) + (void)N; +} + +} // end namespace From b2773d170cb4bdb4b19ba801b5eb55395024b3ae Mon Sep 17 00:00:00 2001 From: Weining Lu Date: Tue, 17 Oct 2023 17:41:32 +0800 Subject: [PATCH 340/720] [LoongArch] Precommit a test for atomic cmpxchg optmization --- .../ir-instruction/atomic-cmpxchg.ll | 383 +++++++++++------- 1 file changed, 243 insertions(+), 140 deletions(-) diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll index 2f764fd831ee2..76f9ebed0d93b 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll @@ -104,6 +104,109 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ret void } +define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_acquire_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a1, $a1, $a3 +; LA64-NEXT: andi $a2, $a2, 255 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: ori $a4, $zero, 255 +; LA64-NEXT: sll.w $a3, $a4, $a3 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: and $a5, $a4, $a3 +; LA64-NEXT: bne $a5, $a1, .LBB4_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; LA64-NEXT: andn $a5, $a4, $a3 +; LA64-NEXT: or $a5, $a5, $a2 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB4_1 +; LA64-NEXT: b .LBB4_4 +; LA64-NEXT: .LBB4_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: .LBB4_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic + ret void +} + +define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_acquire_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a1, $a1, $a3 +; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a3, $a4, $a3 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: and $a5, $a4, $a3 +; LA64-NEXT: bne $a5, $a1, .LBB5_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; LA64-NEXT: andn $a5, $a4, $a3 +; LA64-NEXT: or $a5, $a5, $a2 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB5_1 +; LA64-NEXT: b .LBB5_4 +; LA64-NEXT: .LBB5_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: .LBB5_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic + ret void +} + +define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_acquire_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB6_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.w $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB6_1 +; LA64-NEXT: b .LBB6_4 +; LA64-NEXT: .LBB6_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: .LBB6_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic + ret void +} + +define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_acquire_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB7_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.d $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB7_1 +; LA64-NEXT: b .LBB7_4 +; LA64-NEXT: .LBB7_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: .LBB7_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic + ret void +} + define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti8: ; LA64: # %bb.0: @@ -118,19 +221,19 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind ; LA64-NEXT: sll.w $a1, $a1, $a3 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 -; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a5, $a0, 0 ; LA64-NEXT: and $a6, $a5, $a4 -; LA64-NEXT: bne $a6, $a1, .LBB4_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; LA64-NEXT: bne $a6, $a1, .LBB8_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 ; LA64-NEXT: andn $a6, $a5, $a4 ; LA64-NEXT: or $a6, $a6, $a2 ; LA64-NEXT: sc.w $a6, $a0, 0 -; LA64-NEXT: beqz $a6, .LBB4_1 -; LA64-NEXT: b .LBB4_4 -; LA64-NEXT: .LBB4_3: +; LA64-NEXT: beqz $a6, .LBB8_1 +; LA64-NEXT: b .LBB8_4 +; LA64-NEXT: .LBB8_3: ; LA64-NEXT: dbar 20 -; LA64-NEXT: .LBB4_4: +; LA64-NEXT: .LBB8_4: ; LA64-NEXT: srl.w $a0, $a5, $a3 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire @@ -153,19 +256,19 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou ; LA64-NEXT: sll.w $a1, $a1, $a4 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 -; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a5, $a0, 0 ; LA64-NEXT: and $a6, $a5, $a3 -; LA64-NEXT: bne $a6, $a1, .LBB5_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; LA64-NEXT: bne $a6, $a1, .LBB9_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 ; LA64-NEXT: andn $a6, $a5, $a3 ; LA64-NEXT: or $a6, $a6, $a2 ; LA64-NEXT: sc.w $a6, $a0, 0 -; LA64-NEXT: beqz $a6, .LBB5_1 -; LA64-NEXT: b .LBB5_4 -; LA64-NEXT: .LBB5_3: +; LA64-NEXT: beqz $a6, .LBB9_1 +; LA64-NEXT: b .LBB9_4 +; LA64-NEXT: .LBB9_3: ; LA64-NEXT: dbar 20 -; LA64-NEXT: .LBB5_4: +; LA64-NEXT: .LBB9_4: ; LA64-NEXT: srl.w $a0, $a5, $a4 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire @@ -176,17 +279,17 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32: ; LA64: # %bb.0: -; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a3, $a0, 0 -; LA64-NEXT: bne $a3, $a1, .LBB6_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; LA64-NEXT: bne $a3, $a1, .LBB10_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.w $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB6_1 -; LA64-NEXT: b .LBB6_4 -; LA64-NEXT: .LBB6_3: +; LA64-NEXT: beqz $a4, .LBB10_1 +; LA64-NEXT: b .LBB10_4 +; LA64-NEXT: .LBB10_3: ; LA64-NEXT: dbar 20 -; LA64-NEXT: .LBB6_4: +; LA64-NEXT: .LBB10_4: ; LA64-NEXT: move $a0, $a3 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire @@ -197,17 +300,17 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { ; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti64: ; LA64: # %bb.0: -; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.d $a3, $a0, 0 -; LA64-NEXT: bne $a3, $a1, .LBB7_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 +; LA64-NEXT: bne $a3, $a1, .LBB11_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.d $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB7_1 -; LA64-NEXT: b .LBB7_4 -; LA64-NEXT: .LBB7_3: +; LA64-NEXT: beqz $a4, .LBB11_1 +; LA64-NEXT: b .LBB11_4 +; LA64-NEXT: .LBB11_3: ; LA64-NEXT: dbar 20 -; LA64-NEXT: .LBB7_4: +; LA64-NEXT: .LBB11_4: ; LA64-NEXT: move $a0, $a3 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire @@ -229,19 +332,19 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: addi.w $a3, $a4, 0 -; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a5, $a0, 0 ; LA64-NEXT: and $a6, $a5, $a3 -; LA64-NEXT: bne $a6, $a1, .LBB8_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 +; LA64-NEXT: bne $a6, $a1, .LBB12_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 ; LA64-NEXT: andn $a6, $a5, $a3 ; LA64-NEXT: or $a6, $a6, $a2 ; LA64-NEXT: sc.w $a6, $a0, 0 -; LA64-NEXT: beqz $a6, .LBB8_1 -; LA64-NEXT: b .LBB8_4 -; LA64-NEXT: .LBB8_3: +; LA64-NEXT: beqz $a6, .LBB12_1 +; LA64-NEXT: b .LBB12_4 +; LA64-NEXT: .LBB12_3: ; LA64-NEXT: dbar 20 -; LA64-NEXT: .LBB8_4: +; LA64-NEXT: .LBB12_4: ; LA64-NEXT: and $a0, $a5, $a4 ; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: xor $a0, $a1, $a0 @@ -267,19 +370,19 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: addi.w $a4, $a3, 0 -; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a5, $a0, 0 ; LA64-NEXT: and $a6, $a5, $a4 -; LA64-NEXT: bne $a6, $a1, .LBB9_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 +; LA64-NEXT: bne $a6, $a1, .LBB13_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 ; LA64-NEXT: andn $a6, $a5, $a4 ; LA64-NEXT: or $a6, $a6, $a2 ; LA64-NEXT: sc.w $a6, $a0, 0 -; LA64-NEXT: beqz $a6, .LBB9_1 -; LA64-NEXT: b .LBB9_4 -; LA64-NEXT: .LBB9_3: +; LA64-NEXT: beqz $a6, .LBB13_1 +; LA64-NEXT: b .LBB13_4 +; LA64-NEXT: .LBB13_3: ; LA64-NEXT: dbar 20 -; LA64-NEXT: .LBB9_4: +; LA64-NEXT: .LBB13_4: ; LA64-NEXT: and $a0, $a5, $a3 ; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: xor $a0, $a1, $a0 @@ -293,17 +396,17 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1: ; LA64: # %bb.0: -; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a3, $a0, 0 -; LA64-NEXT: bne $a3, $a1, .LBB10_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 +; LA64-NEXT: bne $a3, $a1, .LBB14_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.w $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB10_1 -; LA64-NEXT: b .LBB10_4 -; LA64-NEXT: .LBB10_3: +; LA64-NEXT: beqz $a4, .LBB14_1 +; LA64-NEXT: b .LBB14_4 +; LA64-NEXT: .LBB14_3: ; LA64-NEXT: dbar 20 -; LA64-NEXT: .LBB10_4: +; LA64-NEXT: .LBB14_4: ; LA64-NEXT: addi.w $a0, $a1, 0 ; LA64-NEXT: xor $a0, $a3, $a0 ; LA64-NEXT: sltui $a0, $a0, 1 @@ -316,17 +419,17 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { ; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti1: ; LA64: # %bb.0: -; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.d $a3, $a0, 0 -; LA64-NEXT: bne $a3, $a1, .LBB11_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; LA64-NEXT: bne $a3, $a1, .LBB15_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.d $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB11_1 -; LA64-NEXT: b .LBB11_4 -; LA64-NEXT: .LBB11_3: +; LA64-NEXT: beqz $a4, .LBB15_1 +; LA64-NEXT: b .LBB15_4 +; LA64-NEXT: .LBB15_3: ; LA64-NEXT: dbar 20 -; LA64-NEXT: .LBB11_4: +; LA64-NEXT: .LBB15_4: ; LA64-NEXT: xor $a0, $a3, $a1 ; LA64-NEXT: sltui $a0, $a0, 1 ; LA64-NEXT: ret @@ -349,19 +452,19 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind ; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 -; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: and $a5, $a4, $a3 -; LA64-NEXT: bne $a5, $a1, .LBB12_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; LA64-NEXT: bne $a5, $a1, .LBB16_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 ; LA64-NEXT: andn $a5, $a4, $a3 ; LA64-NEXT: or $a5, $a5, $a2 ; LA64-NEXT: sc.w $a5, $a0, 0 -; LA64-NEXT: beqz $a5, .LBB12_1 -; LA64-NEXT: b .LBB12_4 -; LA64-NEXT: .LBB12_3: +; LA64-NEXT: beqz $a5, .LBB16_1 +; LA64-NEXT: b .LBB16_4 +; LA64-NEXT: .LBB16_3: ; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB12_4: +; LA64-NEXT: .LBB16_4: ; LA64-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic ret void @@ -382,19 +485,19 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw ; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 -; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: and $a5, $a4, $a3 -; LA64-NEXT: bne $a5, $a1, .LBB13_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; LA64-NEXT: bne $a5, $a1, .LBB17_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 ; LA64-NEXT: andn $a5, $a4, $a3 ; LA64-NEXT: or $a5, $a5, $a2 ; LA64-NEXT: sc.w $a5, $a0, 0 -; LA64-NEXT: beqz $a5, .LBB13_1 -; LA64-NEXT: b .LBB13_4 -; LA64-NEXT: .LBB13_3: +; LA64-NEXT: beqz $a5, .LBB17_1 +; LA64-NEXT: b .LBB17_4 +; LA64-NEXT: .LBB17_3: ; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB13_4: +; LA64-NEXT: .LBB17_4: ; LA64-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic ret void @@ -403,17 +506,17 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: ; LA64: # %bb.0: -; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a3, $a0, 0 -; LA64-NEXT: bne $a3, $a1, .LBB14_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; LA64-NEXT: bne $a3, $a1, .LBB18_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.w $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB14_1 -; LA64-NEXT: b .LBB14_4 -; LA64-NEXT: .LBB14_3: +; LA64-NEXT: beqz $a4, .LBB18_1 +; LA64-NEXT: b .LBB18_4 +; LA64-NEXT: .LBB18_3: ; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB14_4: +; LA64-NEXT: .LBB18_4: ; LA64-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic ret void @@ -422,17 +525,17 @@ define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounw define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { ; LA64-LABEL: cmpxchg_i64_monotonic_monotonic: ; LA64: # %bb.0: -; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.d $a3, $a0, 0 -; LA64-NEXT: bne $a3, $a1, .LBB15_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; LA64-NEXT: bne $a3, $a1, .LBB19_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.d $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB15_1 -; LA64-NEXT: b .LBB15_4 -; LA64-NEXT: .LBB15_3: +; LA64-NEXT: beqz $a4, .LBB19_1 +; LA64-NEXT: b .LBB19_4 +; LA64-NEXT: .LBB19_3: ; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB15_4: +; LA64-NEXT: .LBB19_4: ; LA64-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic ret void @@ -452,19 +555,19 @@ define i8 @cmpxchg_i8_monotonic_monotonic_reti8(ptr %ptr, i8 %cmp, i8 %val) noun ; LA64-NEXT: sll.w $a1, $a1, $a3 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 -; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a5, $a0, 0 ; LA64-NEXT: and $a6, $a5, $a4 -; LA64-NEXT: bne $a6, $a1, .LBB16_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; LA64-NEXT: bne $a6, $a1, .LBB20_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 ; LA64-NEXT: andn $a6, $a5, $a4 ; LA64-NEXT: or $a6, $a6, $a2 ; LA64-NEXT: sc.w $a6, $a0, 0 -; LA64-NEXT: beqz $a6, .LBB16_1 -; LA64-NEXT: b .LBB16_4 -; LA64-NEXT: .LBB16_3: +; LA64-NEXT: beqz $a6, .LBB20_1 +; LA64-NEXT: b .LBB20_4 +; LA64-NEXT: .LBB20_3: ; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB16_4: +; LA64-NEXT: .LBB20_4: ; LA64-NEXT: srl.w $a0, $a5, $a3 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic @@ -487,19 +590,19 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) ; LA64-NEXT: sll.w $a1, $a1, $a4 ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 -; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a5, $a0, 0 ; LA64-NEXT: and $a6, $a5, $a3 -; LA64-NEXT: bne $a6, $a1, .LBB17_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 +; LA64-NEXT: bne $a6, $a1, .LBB21_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 ; LA64-NEXT: andn $a6, $a5, $a3 ; LA64-NEXT: or $a6, $a6, $a2 ; LA64-NEXT: sc.w $a6, $a0, 0 -; LA64-NEXT: beqz $a6, .LBB17_1 -; LA64-NEXT: b .LBB17_4 -; LA64-NEXT: .LBB17_3: +; LA64-NEXT: beqz $a6, .LBB21_1 +; LA64-NEXT: b .LBB21_4 +; LA64-NEXT: .LBB21_3: ; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB17_4: +; LA64-NEXT: .LBB21_4: ; LA64-NEXT: srl.w $a0, $a5, $a4 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic @@ -510,17 +613,17 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: ; LA64: # %bb.0: -; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a3, $a0, 0 -; LA64-NEXT: bne $a3, $a1, .LBB18_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 +; LA64-NEXT: bne $a3, $a1, .LBB22_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.w $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB18_1 -; LA64-NEXT: b .LBB18_4 -; LA64-NEXT: .LBB18_3: +; LA64-NEXT: beqz $a4, .LBB22_1 +; LA64-NEXT: b .LBB22_4 +; LA64-NEXT: .LBB22_3: ; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB18_4: +; LA64-NEXT: .LBB22_4: ; LA64-NEXT: move $a0, $a3 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic @@ -531,17 +634,17 @@ define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) define i64 @cmpxchg_i64_monotonic_monotonic_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { ; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti64: ; LA64: # %bb.0: -; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.d $a3, $a0, 0 -; LA64-NEXT: bne $a3, $a1, .LBB19_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 +; LA64-NEXT: bne $a3, $a1, .LBB23_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.d $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB19_1 -; LA64-NEXT: b .LBB19_4 -; LA64-NEXT: .LBB19_3: +; LA64-NEXT: beqz $a4, .LBB23_1 +; LA64-NEXT: b .LBB23_4 +; LA64-NEXT: .LBB23_3: ; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB19_4: +; LA64-NEXT: .LBB23_4: ; LA64-NEXT: move $a0, $a3 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic @@ -563,19 +666,19 @@ define i1 @cmpxchg_i8_monotonic_monotonic_reti1(ptr %ptr, i8 %cmp, i8 %val) noun ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: addi.w $a3, $a4, 0 -; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a5, $a0, 0 ; LA64-NEXT: and $a6, $a5, $a3 -; LA64-NEXT: bne $a6, $a1, .LBB20_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 +; LA64-NEXT: bne $a6, $a1, .LBB24_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 ; LA64-NEXT: andn $a6, $a5, $a3 ; LA64-NEXT: or $a6, $a6, $a2 ; LA64-NEXT: sc.w $a6, $a0, 0 -; LA64-NEXT: beqz $a6, .LBB20_1 -; LA64-NEXT: b .LBB20_4 -; LA64-NEXT: .LBB20_3: +; LA64-NEXT: beqz $a6, .LBB24_1 +; LA64-NEXT: b .LBB24_4 +; LA64-NEXT: .LBB24_3: ; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB20_4: +; LA64-NEXT: .LBB24_4: ; LA64-NEXT: and $a0, $a5, $a4 ; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: xor $a0, $a1, $a0 @@ -601,19 +704,19 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: addi.w $a4, $a3, 0 -; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a5, $a0, 0 ; LA64-NEXT: and $a6, $a5, $a4 -; LA64-NEXT: bne $a6, $a1, .LBB21_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 +; LA64-NEXT: bne $a6, $a1, .LBB25_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 ; LA64-NEXT: andn $a6, $a5, $a4 ; LA64-NEXT: or $a6, $a6, $a2 ; LA64-NEXT: sc.w $a6, $a0, 0 -; LA64-NEXT: beqz $a6, .LBB21_1 -; LA64-NEXT: b .LBB21_4 -; LA64-NEXT: .LBB21_3: +; LA64-NEXT: beqz $a6, .LBB25_1 +; LA64-NEXT: b .LBB25_4 +; LA64-NEXT: .LBB25_3: ; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB21_4: +; LA64-NEXT: .LBB25_4: ; LA64-NEXT: and $a0, $a5, $a3 ; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: xor $a0, $a1, $a0 @@ -627,17 +730,17 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: ; LA64: # %bb.0: -; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a3, $a0, 0 -; LA64-NEXT: bne $a3, $a1, .LBB22_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; LA64-NEXT: bne $a3, $a1, .LBB26_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.w $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB22_1 -; LA64-NEXT: b .LBB22_4 -; LA64-NEXT: .LBB22_3: +; LA64-NEXT: beqz $a4, .LBB26_1 +; LA64-NEXT: b .LBB26_4 +; LA64-NEXT: .LBB26_3: ; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB22_4: +; LA64-NEXT: .LBB26_4: ; LA64-NEXT: addi.w $a0, $a1, 0 ; LA64-NEXT: xor $a0, $a3, $a0 ; LA64-NEXT: sltui $a0, $a0, 1 @@ -650,17 +753,17 @@ define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) n define i1 @cmpxchg_i64_monotonic_monotonic_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { ; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti1: ; LA64: # %bb.0: -; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.d $a3, $a0, 0 -; LA64-NEXT: bne $a3, $a1, .LBB23_3 -; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; LA64-NEXT: bne $a3, $a1, .LBB27_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.d $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB23_1 -; LA64-NEXT: b .LBB23_4 -; LA64-NEXT: .LBB23_3: +; LA64-NEXT: beqz $a4, .LBB27_1 +; LA64-NEXT: b .LBB27_4 +; LA64-NEXT: .LBB27_3: ; LA64-NEXT: dbar 1792 -; LA64-NEXT: .LBB23_4: +; LA64-NEXT: .LBB27_4: ; LA64-NEXT: xor $a0, $a3, $a1 ; LA64-NEXT: sltui $a0, $a0, 1 ; LA64-NEXT: ret From 791b890c468e5784113507f1f2fe7fed694c3962 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Tue, 17 Oct 2023 15:42:28 +0100 Subject: [PATCH 341/720] [HIP][Clang][CodeGen] Simplify test for `hipstdpar` Fixes build failures for cases where there's no additional visibility / linkage spec. Differential Revision: https://reviews.llvm.org/D155850 --- .../unannotated-functions-get-emitted.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/test/CodeGenHipStdPar/unannotated-functions-get-emitted.cpp b/clang/test/CodeGenHipStdPar/unannotated-functions-get-emitted.cpp index 1fa37ea6c342f..dfd6b3da0a291 100644 --- a/clang/test/CodeGenHipStdPar/unannotated-functions-get-emitted.cpp +++ b/clang/test/CodeGenHipStdPar/unannotated-functions-get-emitted.cpp @@ -6,14 +6,14 @@ #define __device__ __attribute__((device)) -// NO-HIPSTDPAR-DEV-NOT: define {{.*}} void @foo({{.*}}) -// HIPSTDPAR-DEV: define {{.*}} void @foo({{.*}}) +// NO-HIPSTDPAR-DEV-NOT: {{.*}}void @foo({{.*}}) +// HIPSTDPAR-DEV: {{.*}}void @foo({{.*}}) extern "C" void foo(float *a, float b) { *a = b; } -// NO-HIPSTDPAR-DEV: define {{.*}} void @bar({{.*}}) -// HIPSTDPAR-DEV: define {{.*}} void @bar({{.*}}) +// NO-HIPSTDPAR-DEV: {{.*}}void @bar({{.*}}) +// HIPSTDPAR-DEV: {{.*}}void @bar({{.*}}) extern "C" __device__ void bar(float *a, float b) { *a = b; } From c38598186bbc442882610ee15ca4fd9ec022c9c8 Mon Sep 17 00:00:00 2001 From: Leandro Lupori Date: Tue, 17 Oct 2023 11:43:15 -0300 Subject: [PATCH 342/720] [flang] Fix constant subscript operations (#68352) Modify ConstantBounds' methods that handle subscripts and bounds to avoid integer overflows. This is needed to properly handle arrays with the maximum possible upper bound (INT64_MAX). --- flang/lib/Evaluate/constant.cpp | 10 +++++----- flang/test/Evaluate/folding08.f90 | 6 ++++++ flang/test/Semantics/reshape.f90 | 5 +++++ 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/flang/lib/Evaluate/constant.cpp b/flang/lib/Evaluate/constant.cpp index 084836b4ec367..0e0d412118d3b 100644 --- a/flang/lib/Evaluate/constant.cpp +++ b/flang/lib/Evaluate/constant.cpp @@ -36,11 +36,11 @@ ConstantSubscripts ConstantBounds::ComputeUbounds( std::optional dim) const { if (dim) { CHECK(*dim < Rank()); - return {lbounds_[*dim] + shape_[*dim] - 1}; + return {lbounds_[*dim] + (shape_[*dim] - 1)}; } else { ConstantSubscripts ubounds(Rank()); for (int i{0}; i < Rank(); ++i) { - ubounds[i] = lbounds_[i] + shape_[i] - 1; + ubounds[i] = lbounds_[i] + (shape_[i] - 1); } return ubounds; } @@ -73,7 +73,7 @@ ConstantSubscript ConstantBounds::SubscriptsToOffset( for (auto j : index) { auto lb{lbounds_[dim]}; auto extent{shape_[dim++]}; - CHECK(j >= lb && j < lb + extent); + CHECK(j >= lb && j - lb < extent); offset += stride * (j - lb); stride *= extent; } @@ -93,10 +93,10 @@ bool ConstantBounds::IncrementSubscripts( ConstantSubscript k{dimOrder ? (*dimOrder)[j] : j}; auto lb{lbounds_[k]}; CHECK(indices[k] >= lb); - if (++indices[k] < lb + shape_[k]) { + if (++indices[k] - lb < shape_[k]) { return true; } else { - CHECK(indices[k] == lb + std::max(shape_[k], 1)); + CHECK(indices[k] - lb == std::max(shape_[k], 1)); indices[k] = lb; } } diff --git a/flang/test/Evaluate/folding08.f90 b/flang/test/Evaluate/folding08.f90 index 8c5296e889747..1b2e5605e85d4 100644 --- a/flang/test/Evaluate/folding08.f90 +++ b/flang/test/Evaluate/folding08.f90 @@ -146,4 +146,10 @@ subroutine test4_bound_parentheses logical, parameter :: test_ubpa4_dim = ubound((pa4), 1) == 5 .and. & ubound((pa4), 2) == 4 end + subroutine test5_max_ubound + ! Test maximum ubound value + integer(8), parameter :: I64_MAX = INT(z'7fffffffffffffff', kind=8) + integer, parameter :: a5(I64_MAX - 2 : I64_MAX) = [1, 2, 3] + logical, parameter :: test_uba5 = ubound(a5, 1, kind=8) == I64_MAX + end subroutine end diff --git a/flang/test/Semantics/reshape.f90 b/flang/test/Semantics/reshape.f90 index 2e9b5adf3ff0e..fb5e0023e2716 100644 --- a/flang/test/Semantics/reshape.f90 +++ b/flang/test/Semantics/reshape.f90 @@ -44,6 +44,11 @@ program reshaper type(dType), parameter :: array19(*) = [dType::dType(field=[1,2])] logical, parameter :: lVar = all(array19(:)%field(1) == [2]) + ! RESHAPE on array with maximum valid upper bound + integer(8), parameter :: I64_MAX = INT(z'7fffffffffffffff', kind=8) + integer, parameter :: array21(I64_MAX - 2 : I64_MAX) = [1, 2, 3] + integer, parameter :: array22(2) = RESHAPE(array21, [2]) + !ERROR: Size of 'shape=' argument must not be greater than 15 CALL ext_sub(RESHAPE([(n, n=1,20)], & [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])) From 3dc263b18c082c380c559e88f6d4cb6ce54a2e53 Mon Sep 17 00:00:00 2001 From: bjacob Date: Tue, 17 Oct 2023 10:47:33 -0400 Subject: [PATCH 343/720] Update documentation on x86 constraint codes (#68830) This updates the documentation on these inline asm constraint codes to match reality. Context: https://github.com/llvm/llvm-project/issues/68818#issuecomment-1758180020 Note: dropping also the `'o'` from the docs because I can't find any mention of it in X86ISelLowering.cpp. --- llvm/docs/LangRef.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index ee893d8e384b6..40eee1fa9fe4e 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -5282,7 +5282,6 @@ X86: - ``O``: An immediate integer between 0 and 127. - ``e``: An immediate 32-bit signed integer. - ``Z``: An immediate 32-bit unsigned integer. -- ``o``, ``v``: Treated the same as ``m``, at the moment. - ``q``: An 8, 16, 32, or 64-bit register which can be accessed as an 8-bit ``l`` integer register. On X86-32, this is the ``a``, ``b``, ``c``, and ``d`` registers, and on X86-64, it is all of the integer registers. @@ -5293,10 +5292,13 @@ X86: existed since i386, and can be accessed without the REX prefix. - ``f``: A 32, 64, or 80-bit '387 FPU stack pseudo-register. - ``y``: A 64-bit MMX register, if MMX is enabled. -- ``x``: If SSE is enabled: a 32 or 64-bit scalar operand, or 128-bit vector +- ``v``: If SSE is enabled: a 32 or 64-bit scalar operand, or 128-bit vector operand in a SSE register. If AVX is also enabled, can also be a 256-bit vector operand in an AVX register. If AVX-512 is also enabled, can also be a - 512-bit vector operand in an AVX512 register, Otherwise, an error. + 512-bit vector operand in an AVX512 register. Otherwise, an error. +- ``x``: The same as ``v``, except that when AVX-512 is enabled, the ``x`` code + only allocates into the first 16 AVX-512 registers, while the ``v`` code + allocates into any of the 32 AVX-512 registers. - ``Y``: The same as ``x``, if *SSE2* is enabled, otherwise an error. - ``A``: Special case: allocates EAX first, then EDX, for a single operand (in 32-bit mode, a 64-bit integer operand will get split into two registers). It From 4df46c39d6e24dd0fb8c72307882797e88d962e3 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 17 Oct 2023 14:49:49 +0000 Subject: [PATCH 344/720] [gn build] Port 088d272e8325 --- llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn index 00a3b599c893a..15c198c73f941 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn @@ -42,6 +42,7 @@ unittest("ADTTests") { "FunctionRefTest.cpp", "HashingTest.cpp", "IListBaseTest.cpp", + "IListIteratorBitsTest.cpp", "IListIteratorTest.cpp", "IListNodeBaseTest.cpp", "IListNodeTest.cpp", From 3d6e4160d52da60c39952abc8e6d2189de0b4e64 Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Tue, 17 Oct 2023 22:56:25 +0800 Subject: [PATCH 345/720] [X86] Enable bfloat type support in inline assembly constraints (#68469) Similar to FP16 but we don't have native scalar instruction support, so limit it to vector types only. Fixes #68149 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 48 ++++++++++++++++--- .../X86/inline-asm-avx512f-x-constraint.ll | 13 ++++- 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 66b6d8260b7c7..35778c7f9af3e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -56919,7 +56919,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case MVT::v8f16: if (!Subtarget.hasFP16()) break; - [[fallthrough]]; + if (VConstraint) + return std::make_pair(0U, &X86::VR128XRegClass); + return std::make_pair(0U, &X86::VR128RegClass); + case MVT::v8bf16: + if (!Subtarget.hasBF16() || !Subtarget.hasVLX()) + break; + if (VConstraint) + return std::make_pair(0U, &X86::VR128XRegClass); + return std::make_pair(0U, &X86::VR128RegClass); case MVT::f128: case MVT::v16i8: case MVT::v8i16: @@ -56934,7 +56942,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case MVT::v16f16: if (!Subtarget.hasFP16()) break; - [[fallthrough]]; + if (VConstraint) + return std::make_pair(0U, &X86::VR256XRegClass); + return std::make_pair(0U, &X86::VR256RegClass); + case MVT::v16bf16: + if (!Subtarget.hasBF16() || !Subtarget.hasVLX()) + break; + if (VConstraint) + return std::make_pair(0U, &X86::VR256XRegClass); + return std::make_pair(0U, &X86::VR256RegClass); case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: @@ -56949,7 +56965,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case MVT::v32f16: if (!Subtarget.hasFP16()) break; - [[fallthrough]]; + if (VConstraint) + return std::make_pair(0U, &X86::VR512RegClass); + return std::make_pair(0U, &X86::VR512_0_15RegClass); + case MVT::v32bf16: + if (!Subtarget.hasBF16()) + break; + if (VConstraint) + return std::make_pair(0U, &X86::VR512RegClass); + return std::make_pair(0U, &X86::VR512_0_15RegClass); case MVT::v64i8: case MVT::v32i16: case MVT::v8f64: @@ -56992,7 +57016,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case MVT::v8f16: if (!Subtarget.hasFP16()) break; - [[fallthrough]]; + return std::make_pair(X86::XMM0, &X86::VR128RegClass); + case MVT::v8bf16: + if (!Subtarget.hasBF16() || !Subtarget.hasVLX()) + break; + return std::make_pair(X86::XMM0, &X86::VR128RegClass); case MVT::f128: case MVT::v16i8: case MVT::v8i16: @@ -57005,7 +57033,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case MVT::v16f16: if (!Subtarget.hasFP16()) break; - [[fallthrough]]; + return std::make_pair(X86::YMM0, &X86::VR256RegClass); + case MVT::v16bf16: + if (!Subtarget.hasBF16() || !Subtarget.hasVLX()) + break; + return std::make_pair(X86::YMM0, &X86::VR256RegClass); case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: @@ -57018,7 +57050,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case MVT::v32f16: if (!Subtarget.hasFP16()) break; - [[fallthrough]]; + return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass); + case MVT::v32bf16: + if (!Subtarget.hasBF16()) + break; + return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass); case MVT::v64i8: case MVT::v32i16: case MVT::v8f64: diff --git a/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll b/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll index fcea55c47cd3e..e153387d16e72 100644 --- a/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll +++ b/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll @@ -1,7 +1,7 @@ ; RUN: not llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f -stop-after=finalize-isel > %t 2> %t.err ; RUN: FileCheck < %t %s ; RUN: FileCheck --check-prefix=CHECK-STDERR < %t.err %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 -stop-after=finalize-isel | FileCheck --check-prefixes=CHECK,FP16 %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bf16,avx512fp16 -stop-after=finalize-isel | FileCheck --check-prefixes=CHECK,FP16 %s ; CHECK-LABEL: name: mask_Yk_i8 ; CHECK: %[[REG1:.*]]:vr512_0_15 = COPY %1 @@ -24,3 +24,14 @@ entry: %0 = tail call <32 x half> asm "vaddph\09$3, $2, $0 {$1}", "=x,^Yk,x,x,~{dirflag},~{fpsr},~{flags}"(i8 %msk, <32 x half> %x, <32 x half> %y) ret <32 x half> %0 } + +; FP16-LABEL: name: mask_Yk_bf16 +; FP16: %[[REG1:.*]]:vr512_0_15 = COPY %1 +; FP16: %[[REG2:.*]]:vr512_0_15 = COPY %2 +; FP16: INLINEASM &"vaddph\09$3, $2, $0 {$1}", 0 /* attdialect */, {{.*}}, def %{{.*}}, {{.*}}, %{{.*}}, {{.*}}, %[[REG1]], {{.*}}, %[[REG2]], 12 /* clobber */, implicit-def early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def early-clobber $eflags +; CHECK-STDERR: couldn't allocate output register for constraint 'x' +define <32 x bfloat> @mask_Yk_bf16(i8 signext %msk, <32 x bfloat> %x, <32 x bfloat> %y) { +entry: + %0 = tail call <32 x bfloat> asm "vaddph\09$3, $2, $0 {$1}", "=x,^Yk,x,x,~{dirflag},~{fpsr},~{flags}"(i8 %msk, <32 x bfloat> %x, <32 x bfloat> %y) + ret <32 x bfloat> %0 +} From 4b8f23e93de56b7bfeffbf789ec5a75525dc5d88 Mon Sep 17 00:00:00 2001 From: Sam Tebbs Date: Tue, 17 Oct 2023 16:02:36 +0100 Subject: [PATCH 346/720] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (#68908) The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently require that the vnum argument be an immediate, but since vnum is used to modify the base register via a mul and add, that restriction is not necessary. This patch removes that restriction. --- clang/include/clang/Basic/arm_sme.td | 10 ++++------ clang/lib/CodeGen/CGBuiltin.cpp | 15 +++++---------- clang/lib/CodeGen/CodeGenFunction.h | 1 - .../aarch64-sme-intrinsics/acle_sme_ldr.c | 16 ++++++++++++++++ .../aarch64-sme-intrinsics/acle_sme_str.c | 15 +++++++++++++++ .../Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp | 8 -------- 6 files changed, 40 insertions(+), 25 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index d014900d719c3..8d85327a86b1a 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -44,10 +44,9 @@ defm SVLD1_ZA32 : ZALoad<"za32", "i", "aarch64_sme_ld1w", [ImmCheck<0, ImmCheck0 defm SVLD1_ZA64 : ZALoad<"za64", "l", "aarch64_sme_ld1d", [ImmCheck<0, ImmCheck0_7>]>; defm SVLD1_ZA128 : ZALoad<"za128", "q", "aarch64_sme_ld1q", [ImmCheck<0, ImmCheck0_15>]>; -def SVLDR_VNUM_ZA : MInst<"svldr_vnum_za", "vmQi", "", +def SVLDR_VNUM_ZA : MInst<"svldr_vnum_za", "vmQl", "", [IsOverloadNone, IsStreamingCompatible, IsSharedZA], - MemEltTyDefault, "aarch64_sme_ldr", - [ImmCheck<2, ImmCheck0_15>]>; + MemEltTyDefault, "aarch64_sme_ldr">; def SVLDR_ZA : MInst<"svldr_za", "vmQ", "", [IsOverloadNone, IsStreamingCompatible, IsSharedZA], @@ -82,10 +81,9 @@ defm SVST1_ZA32 : ZAStore<"za32", "i", "aarch64_sme_st1w", [ImmCheck<0, ImmCheck defm SVST1_ZA64 : ZAStore<"za64", "l", "aarch64_sme_st1d", [ImmCheck<0, ImmCheck0_7>]>; defm SVST1_ZA128 : ZAStore<"za128", "q", "aarch64_sme_st1q", [ImmCheck<0, ImmCheck0_15>]>; -def SVSTR_VNUM_ZA : MInst<"svstr_vnum_za", "vm%i", "", +def SVSTR_VNUM_ZA : MInst<"svstr_vnum_za", "vm%l", "", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], - MemEltTyDefault, "aarch64_sme_str", - [ImmCheck<2, ImmCheck0_15>]>; + MemEltTyDefault, "aarch64_sme_str">; def SVSTR_ZA : MInst<"svstr_za", "vm%", "", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 43ace3e11e610..f1c199e165fca 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9694,11 +9694,6 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, return Store; } -Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) { - llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int32Ty, false); - return Builder.CreateAdd(Base, CastOffset, "tileslice"); -} - Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { @@ -9757,13 +9752,13 @@ Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags, if (Ops.size() == 3) { Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); - llvm::Value *MulVL = Builder.CreateMul( - CntsbCall, - Builder.getInt64(cast(Ops[2])->getZExtValue()), - "mulvl"); + + llvm::Value *VecNum = Ops[2]; + llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl"); Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); - Ops[0] = EmitTileslice(Ops[0], Ops[2]); + Ops[0] = Builder.CreateAdd( + Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice"); Ops.erase(&Ops[2]); } Function *F = CGM.getIntrinsic(IntID, {}); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index d5336382a2b9c..6bc6d244bee20 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4280,7 +4280,6 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl &Ops, unsigned BuiltinID); - llvm::Value *EmitTileslice(llvm::Value *Offset, llvm::Value *Base); llvm::Value *EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned BuiltinID); diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c index acddc2ef50a3d..3f8bb6a8cdfeb 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c @@ -34,6 +34,22 @@ void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]]) // CHECK-NEXT: ret void +// void test_svldr_za(uint32_t slice_base, const void *ptr) { svldr_za(slice_base, ptr); } + +// CHECK-C-LABEL: @test_svldr_vnum_za_var( +// CHECK-CXX-LABEL: @_Z22test_svldr_vnum_za_varjPKvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[SVLB]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[VNUM:%.*]] to i32 +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[TMP1]], [[SLICE_BASE:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[TILESLICE]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, int64_t vnum) { + svldr_vnum_za(slice_base, ptr, vnum); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c index 2728f9ac0cd12..94c95b6664a0a 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c @@ -38,3 +38,18 @@ void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) { void test_svstr_za(uint32_t slice_base, void *ptr) { svstr_za(slice_base, ptr); } + +// CHECK-C-LABEL: @test_svstr_vnum_za_var( +// CHECK-CXX-LABEL: @_Z22test_svstr_vnum_za_varjPvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[SVLB]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[VNUM:%.*]] to i32 +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[TMP1]], [[SLICE_BASE:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[TILESLICE]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +void test_svstr_vnum_za_var(uint32_t slice_base, void *ptr, int64_t vnum) { + svstr_vnum_za(slice_base, ptr, vnum); +} diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp index 7475fd53b80ba..1faa5638c801c 100644 --- a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp +++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp @@ -143,11 +143,6 @@ void test_range_0_15(uint32_t slice, svbool_t pg, void *ptr) { // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} SVE_ACLE_FUNC(svst1_ver_vnum_za128,,,)(16, slice, pg, ptr, 1); - // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svldr_vnum_za,,,)(-1, ptr, 16); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svstr_vnum_za,,,)(-1, ptr, -1); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} SVE_ACLE_FUNC(svread_hor_za128, _s8, _m,)(svundef_s8(), pg, -1, slice); // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} @@ -171,9 +166,6 @@ void test_constant(uint64_t u64, svbool_t pg, void *ptr) { SVE_ACLE_FUNC(svld1_hor_vnum_za8,,,)(u64, 0, pg, ptr, u64); // expected-error {{argument to 'svld1_hor_vnum_za8' must be a constant integer}} SVE_ACLE_FUNC(svst1_hor_vnum_za32,,,)(u64, 0, pg, ptr, u64); // expected-error {{argument to 'svst1_hor_vnum_za32' must be a constant integer}} - SVE_ACLE_FUNC(svldr_vnum_za,,,)(u64, ptr, u64); // expected-error {{argument to 'svldr_vnum_za' must be a constant integer}} - SVE_ACLE_FUNC(svstr_vnum_za,,,)(u64, ptr, u64); // expected-error {{argument to 'svstr_vnum_za' must be a constant integer}} - SVE_ACLE_FUNC(svread_ver_za16, _s16, _m,)(svundef_s16(), pg, u64, 0); // expected-error-re {{argument to 'svread_ver_za16{{.*}}_m' must be a constant integer}} SVE_ACLE_FUNC(svwrite_ver_za64, _s64, _m,)(u64, 0, pg, svundef_s64()); // expected-error-re {{argument to 'svwrite_ver_za64{{.*}}_m' must be a constant integer}} } From dffd93b30b56557a4b98f8b68fec0aa6cc706deb Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Tue, 17 Oct 2023 08:20:38 -0700 Subject: [PATCH 347/720] [flang][runtime] Fix SAME_TYPE_AS()/EXTENDS_TYPE_OF() for CLASS(*) (#67727) Ensure that the f18Addendum flag is preserved in AllocatableApplyMold(), that raw().type is reinitialized in AllocatableDeallocatePolymorphic(), and that the implementations of SameTypeAs() and ExtendsTypeOf() handle unallocated unlimited polymorphic arguments correctly. --- flang/include/flang/Runtime/descriptor.h | 2 + flang/runtime/allocatable.cpp | 17 +--- flang/runtime/derived-api.cpp | 102 ++++++++++------------- flang/runtime/descriptor.cpp | 16 ++++ flang/runtime/pointer.cpp | 17 +--- flang/runtime/type-info.cpp | 10 ++- 6 files changed, 74 insertions(+), 90 deletions(-) diff --git a/flang/include/flang/Runtime/descriptor.h b/flang/include/flang/Runtime/descriptor.h index a5747f98ff2bd..fa68d97769695 100644 --- a/flang/include/flang/Runtime/descriptor.h +++ b/flang/include/flang/Runtime/descriptor.h @@ -413,6 +413,8 @@ class Descriptor { const SubscriptValue *upper = nullptr, const SubscriptValue *stride = nullptr); + RT_API_ATTRS void ApplyMold(const Descriptor &, int rank); + RT_API_ATTRS void Check() const; void Dump(FILE * = stdout) const; diff --git a/flang/runtime/allocatable.cpp b/flang/runtime/allocatable.cpp index 4b9e438e8a109..409255aaa214d 100644 --- a/flang/runtime/allocatable.cpp +++ b/flang/runtime/allocatable.cpp @@ -130,17 +130,7 @@ void RTNAME(AllocatableApplyMold)( // 9.7.1.3 Return so the error can be emitted by AllocatableAllocate. return; } - descriptor = mold; - descriptor.set_base_addr(nullptr); - descriptor.raw().attribute = CFI_attribute_allocatable; - descriptor.raw().rank = rank; - if (auto *descAddendum{descriptor.Addendum()}) { - if (const auto *moldAddendum{mold.Addendum()}) { - if (const auto *derived{moldAddendum->derivedType()}) { - descAddendum->set_derivedType(derived); - } - } - } + descriptor.ApplyMold(mold, rank); } int RTNAME(AllocatableAllocate)(Descriptor &descriptor, bool hasStat, @@ -198,14 +188,15 @@ int RTNAME(AllocatableDeallocatePolymorphic)(Descriptor &descriptor, int stat{RTNAME(AllocatableDeallocate)( descriptor, hasStat, errMsg, sourceFile, sourceLine)}; if (stat == StatOk) { - DescriptorAddendum *addendum{descriptor.Addendum()}; - if (addendum) { + if (DescriptorAddendum * addendum{descriptor.Addendum()}) { addendum->set_derivedType(derivedType); + descriptor.raw().type = derivedType ? CFI_type_struct : CFI_type_other; } else { // Unlimited polymorphic descriptors initialized with // AllocatableInitIntrinsic do not have an addendum. Make sure the // derivedType is null in that case. INTERNAL_CHECK(!derivedType); + descriptor.raw().type = CFI_type_other; } } return stat; diff --git a/flang/runtime/derived-api.cpp b/flang/runtime/derived-api.cpp index 66123030f98b5..39bf0521e73b1 100644 --- a/flang/runtime/derived-api.cpp +++ b/flang/runtime/derived-api.cpp @@ -101,73 +101,55 @@ static const typeInfo::DerivedType *GetDerivedType(const Descriptor &desc) { } bool RTNAME(SameTypeAs)(const Descriptor &a, const Descriptor &b) { - // Unlimited polymorphic with intrinsic dynamic type. - if (a.raw().type != CFI_type_struct && a.raw().type != CFI_type_other && - b.raw().type != CFI_type_struct && b.raw().type != CFI_type_other) - return a.raw().type == b.raw().type; - - const typeInfo::DerivedType *derivedTypeA{GetDerivedType(a)}; - const typeInfo::DerivedType *derivedTypeB{GetDerivedType(b)}; - - // No dynamic type in one or both descriptor. - if (derivedTypeA == nullptr || derivedTypeB == nullptr) { - return false; - } - - // Exact match of derived type. - if (derivedTypeA == derivedTypeB) { - return true; + auto aType{a.raw().type}; + auto bType{b.raw().type}; + if ((aType != CFI_type_struct && aType != CFI_type_other) || + (bType != CFI_type_struct && bType != CFI_type_other)) { + // If either type is intrinsic, they must match. + return aType == bType; + } else { + const typeInfo::DerivedType *derivedTypeA{GetDerivedType(a)}; + const typeInfo::DerivedType *derivedTypeB{GetDerivedType(b)}; + if (derivedTypeA == nullptr || derivedTypeB == nullptr) { + // Unallocated/disassociated CLASS(*) never matches. + return false; + } else if (derivedTypeA == derivedTypeB) { + // Exact match of derived type. + return true; + } else { + // Otherwise compare with the name. Note 16.29 kind type parameters are + // not considered in the test. + return CompareDerivedTypeNames( + derivedTypeA->name(), derivedTypeB->name()); + } } - // Otherwise compare with the name. Note 16.29 kind type parameters are not - // considered in the test. - return CompareDerivedTypeNames(derivedTypeA->name(), derivedTypeB->name()); } bool RTNAME(ExtendsTypeOf)(const Descriptor &a, const Descriptor &mold) { - if (a.raw().type != CFI_type_struct && a.raw().type != CFI_type_other && - mold.raw().type != CFI_type_struct && mold.raw().type != CFI_type_other) - return a.raw().type == mold.raw().type; - - const typeInfo::DerivedType *derivedTypeA{GetDerivedType(a)}; - const typeInfo::DerivedType *derivedTypeMold{GetDerivedType(mold)}; - - // If MOLD is unlimited polymorphic and is either a disassociated pointer or - // unallocated allocatable, the result is true. - // Unlimited polymorphic descriptors are initialized with a CFI_type_other - // type. - if (mold.type().raw() == CFI_type_other && - (mold.IsAllocatable() || mold.IsPointer()) && - derivedTypeMold == nullptr) { - return true; - } - - // If A is unlimited polymorphic and is either a disassociated pointer or - // unallocated allocatable, the result is false. - // Unlimited polymorphic descriptors are initialized with a CFI_type_other - // type. - if (a.type().raw() == CFI_type_other && - (a.IsAllocatable() || a.IsPointer()) && derivedTypeA == nullptr) { - return false; - } - - if (derivedTypeA == nullptr || derivedTypeMold == nullptr) { + auto aType{a.raw().type}; + auto moldType{mold.raw().type}; + if ((aType != CFI_type_struct && aType != CFI_type_other) || + (moldType != CFI_type_struct && moldType != CFI_type_other)) { + // If either type is intrinsic, they must match. + return aType == moldType; + } else if (const typeInfo::DerivedType * + derivedTypeMold{GetDerivedType(mold)}) { + // If A is unlimited polymorphic and is either a disassociated pointer or + // unallocated allocatable, the result is false. + // Otherwise if the dynamic type of A or MOLD is extensible, the result is + // true if and only if the dynamic type of A is an extension type of the + // dynamic type of MOLD. + for (const typeInfo::DerivedType *derivedTypeA{GetDerivedType(a)}; + derivedTypeA; derivedTypeA = derivedTypeA->GetParentType()) { + if (CompareDerivedType(derivedTypeA, derivedTypeMold)) { + return true; + } + } return false; - } - - // Otherwise if the dynamic type of A or MOLD is extensible, the result is - // true if and only if the dynamic type of A is an extension type of the - // dynamic type of MOLD. - if (CompareDerivedType(derivedTypeA, derivedTypeMold)) { + } else { + // MOLD is unlimited polymorphic and unallocated/disassociated. return true; } - const typeInfo::DerivedType *parent{derivedTypeA->GetParentType()}; - while (parent) { - if (CompareDerivedType(parent, derivedTypeMold)) { - return true; - } - parent = parent->GetParentType(); - } - return false; } void RTNAME(DestroyWithoutFinalization)(const Descriptor &descriptor) { diff --git a/flang/runtime/descriptor.cpp b/flang/runtime/descriptor.cpp index 8dd3f215279ba..34ca33a6a8e30 100644 --- a/flang/runtime/descriptor.cpp +++ b/flang/runtime/descriptor.cpp @@ -243,6 +243,22 @@ RT_API_ATTRS bool Descriptor::EstablishPointerSection(const Descriptor &source, return CFI_section(&raw_, &source.raw_, lower, upper, stride) == CFI_SUCCESS; } +RT_API_ATTRS void Descriptor::ApplyMold(const Descriptor &mold, int rank) { + raw_.elem_len = mold.raw_.elem_len; + raw_.rank = rank; + raw_.type = mold.raw_.type; + for (int j{0}; j < rank && j < mold.raw_.rank; ++j) { + GetDimension(j) = mold.GetDimension(j); + } + if (auto *addendum{Addendum()}) { + if (auto *moldAddendum{mold.Addendum()}) { + *addendum = *moldAddendum; + } else { + INTERNAL_CHECK(!addendum->derivedType()); + } + } +} + RT_API_ATTRS void Descriptor::Check() const { // TODO } diff --git a/flang/runtime/pointer.cpp b/flang/runtime/pointer.cpp index 0320468ffdc79..b0003add7b358 100644 --- a/flang/runtime/pointer.cpp +++ b/flang/runtime/pointer.cpp @@ -56,17 +56,7 @@ void RTNAME(PointerSetDerivedLength)( void RTNAME(PointerApplyMold)( Descriptor &pointer, const Descriptor &mold, int rank) { - pointer = mold; - pointer.set_base_addr(nullptr); - pointer.raw().attribute = CFI_attribute_pointer; - pointer.raw().rank = rank; - if (auto *pointerAddendum{pointer.Addendum()}) { - if (const auto *moldAddendum{mold.Addendum()}) { - if (const auto *derived{moldAddendum->derivedType()}) { - pointerAddendum->set_derivedType(derived); - } - } - } + pointer.ApplyMold(mold, rank); } void RTNAME(PointerAssociateScalar)(Descriptor &pointer, void *target) { @@ -183,14 +173,15 @@ int RTNAME(PointerDeallocatePolymorphic)(Descriptor &pointer, int stat{RTNAME(PointerDeallocate)( pointer, hasStat, errMsg, sourceFile, sourceLine)}; if (stat == StatOk) { - DescriptorAddendum *addendum{pointer.Addendum()}; - if (addendum) { + if (DescriptorAddendum * addendum{pointer.Addendum()}) { addendum->set_derivedType(derivedType); + pointer.raw().type = derivedType ? CFI_type_struct : CFI_type_other; } else { // Unlimited polymorphic descriptors initialized with // PointerNullifyIntrinsic do not have an addendum. Make sure the // derivedType is null in that case. INTERNAL_CHECK(!derivedType); + pointer.raw().type = CFI_type_other; } } return stat; diff --git a/flang/runtime/type-info.cpp b/flang/runtime/type-info.cpp index baf446e0c79d3..b30a2c832a138 100644 --- a/flang/runtime/type-info.cpp +++ b/flang/runtime/type-info.cpp @@ -251,10 +251,12 @@ FILE *DerivedType::Dump(FILE *f) const { std::fprintf( f, "\n special descriptor (byteSize 0x%zx): ", special_.byteSize); specialDesc.Dump(f); - std::size_t specials{specialDesc.Elements()}; - for (std::size_t j{0}; j < specials; ++j) { - std::fprintf(f, " [%3zd] ", j); - specialDesc.ZeroBasedIndexedElement(j)->Dump(f); + if (specialDesc.IsAllocated()) { + std::size_t specials{specialDesc.Elements()}; + for (std::size_t j{0}; j < specials; ++j) { + std::fprintf(f, " [%3zd] ", j); + specialDesc.ZeroBasedIndexedElement(j)->Dump(f); + } } return f; } From a559de0c2fdd04f38fa91821b4b8a50b8233a6ff Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 17 Oct 2023 15:12:01 +0100 Subject: [PATCH 348/720] [AMDGPU] Simplify definition of SIbuffer_atomic_*. NFC. --- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 74 ++++++++++----------------- 1 file changed, 28 insertions(+), 46 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index b4adb444600c4..b0b91d8317188 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -158,36 +158,18 @@ def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16", SDTBufferStore, [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; -class SDBufferAtomic : SDNode , // rsrc - SDTCisVT<3, i32>, // vindex(VGPR) - SDTCisVT<4, i32>, // voffset(VGPR) - SDTCisVT<5, i32>, // soffset(SGPR) - SDTCisVT<6, i32>, // offset(imm) - SDTCisVT<7, i32>, // cachepolicy(imm) - SDTCisVT<8, i1>]>, // idxen(imm) - [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] ->; - -def SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">; -def SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">; -def SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">; -def SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">; -def SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">; -def SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">; -def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">; -def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">; -def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">; -def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">; -def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">; -def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">; -def SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">; -def SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">; -def SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">; -def SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">; - -multiclass SDBufferAtomicNoRet { +multiclass SDBufferAtomic { + def "" : SDNode , // rsrc + SDTCisVT<3, i32>, // vindex(VGPR) + SDTCisVT<4, i32>, // voffset(VGPR) + SDTCisVT<5, i32>, // soffset(SGPR) + SDTCisVT<6, i32>, // offset(imm) + SDTCisVT<7, i32>, // cachepolicy(imm) + SDTCisVT<8, i1>]>, // idxen(imm) + [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] + >; def "_noret" : PatFrag< (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, node:$idxen), @@ -198,22 +180,22 @@ multiclass SDBufferAtomicNoRet { } } -defm SIbuffer_atomic_swap : SDBufferAtomicNoRet; -defm SIbuffer_atomic_add : SDBufferAtomicNoRet; -defm SIbuffer_atomic_sub : SDBufferAtomicNoRet; -defm SIbuffer_atomic_smin : SDBufferAtomicNoRet; -defm SIbuffer_atomic_umin : SDBufferAtomicNoRet; -defm SIbuffer_atomic_smax : SDBufferAtomicNoRet; -defm SIbuffer_atomic_umax : SDBufferAtomicNoRet; -defm SIbuffer_atomic_and : SDBufferAtomicNoRet; -defm SIbuffer_atomic_or : SDBufferAtomicNoRet; -defm SIbuffer_atomic_xor : SDBufferAtomicNoRet; -defm SIbuffer_atomic_inc : SDBufferAtomicNoRet; -defm SIbuffer_atomic_dec : SDBufferAtomicNoRet; -defm SIbuffer_atomic_csub : SDBufferAtomicNoRet; -defm SIbuffer_atomic_fadd : SDBufferAtomicNoRet; -defm SIbuffer_atomic_fmin : SDBufferAtomicNoRet; -defm SIbuffer_atomic_fmax : SDBufferAtomicNoRet; +defm SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">; +defm SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">; +defm SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">; +defm SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">; +defm SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">; +defm SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">; +defm SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">; +defm SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">; +defm SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">; +defm SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">; +defm SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">; +defm SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">; +defm SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">; +defm SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">; +defm SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">; +defm SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">; def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP", SDTypeProfile<1, 9, From 7f3435575404cc811c976410d9b01c7c10fd03e2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 17 Oct 2023 08:29:50 -0700 Subject: [PATCH 349/720] [RISCV] Use separate CCValAssign for both parts of f64 with ilp32. (#69129) Mark any registers as CustomReg and any stack slot as CustomMem. This allows us to more directly emit the register or memory access for the high part. Previously we needed a memory access if the low register was X17 and we assumed the stack offset was 0. If the low part wasn't X17, we assumed the high register was the next register after the low register. This is another part of supporting FP arguments with GISel. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 96 +++++++++++++-------- 1 file changed, 58 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 666998fecd6e1..e8f001e491cdc 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16452,9 +16452,16 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, return false; } LocVT = MVT::i32; - if (!State.AllocateReg(ArgGPRs)) - State.AllocateStack(4, Align(4)); - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + Register HiReg = State.AllocateReg(ArgGPRs); + if (HiReg) { + State.addLoc( + CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo)); + } else { + unsigned StackOffset = State.AllocateStack(4, Align(4)); + State.addLoc( + CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); + } return false; } @@ -16763,7 +16770,9 @@ static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, } static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, - const CCValAssign &VA, const SDLoc &DL) { + const CCValAssign &VA, + const CCValAssign &HiVA, + const SDLoc &DL) { assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && "Unexpected VA"); MachineFunction &MF = DAG.getMachineFunction(); @@ -16776,16 +16785,17 @@ static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, RegInfo.addLiveIn(VA.getLocReg(), LoVReg); SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); SDValue Hi; - if (VA.getLocReg() == RISCV::X17) { + if (HiVA.isMemLoc()) { // Second half of f64 is passed on the stack. - int FI = MFI.CreateFixedObject(4, 0, /*IsImmutable=*/true); + int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(), + /*IsImmutable=*/true); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, MachinePointerInfo::getFixedStack(MF, FI)); } else { // Second half of f64 is passed in another GPR. Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); - RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); + RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg); Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); } return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); @@ -17028,15 +17038,16 @@ SDValue RISCVTargetLowering::LowerFormalArguments( CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV); - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) { CCValAssign &VA = ArgLocs[i]; SDValue ArgValue; // Passing f64 on RV32D with a soft float ABI must be handled as a special // case. - if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) - ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); - else if (VA.isRegLoc()) - ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this); + if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { + assert(VA.needsCustom()); + ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL); + } else if (VA.isRegLoc()) + ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this); else ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); @@ -17048,12 +17059,12 @@ SDValue RISCVTargetLowering::LowerFormalArguments( // stores are relative to that. InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, MachinePointerInfo())); - unsigned ArgIndex = Ins[i].OrigArgIndex; - unsigned ArgPartOffset = Ins[i].PartOffset; + unsigned ArgIndex = Ins[InsIdx].OrigArgIndex; + unsigned ArgPartOffset = Ins[InsIdx].PartOffset; assert(VA.getValVT().isVector() || ArgPartOffset == 0); - while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { + while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) { CCValAssign &PartVA = ArgLocs[i + 1]; - unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; + unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset; SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); if (PartVA.getValVT().isScalableVector()) Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset); @@ -17061,6 +17072,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments( InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, MachinePointerInfo())); ++i; + ++InsIdx; } continue; } @@ -17276,14 +17288,16 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVector, 8> RegsToPass; SmallVector MemOpChains; SDValue StackPtr; - for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { + for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e; + ++i, ++OutIdx) { CCValAssign &VA = ArgLocs[i]; - SDValue ArgValue = OutVals[i]; - ISD::ArgFlagsTy Flags = Outs[i].Flags; + SDValue ArgValue = OutVals[OutIdx]; + ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags; // Handle passing f64 on RV32D with a soft float ABI as a special case. if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { assert(VA.isRegLoc() && "Expected register VA assignment"); + assert(VA.needsCustom()); SDValue SplitF64 = DAG.getNode( RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); SDValue Lo = SplitF64.getValue(0); @@ -17292,18 +17306,22 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, Register RegLo = VA.getLocReg(); RegsToPass.push_back(std::make_pair(RegLo, Lo)); - if (RegLo == RISCV::X17) { + // Get the CCValAssign for the Hi part. + CCValAssign &HiVA = ArgLocs[++i]; + + if (HiVA.isMemLoc()) { // Second half of f64 is passed on the stack. - // Work out the address of the stack slot. if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); + SDValue Address = + DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, + DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL)); // Emit the store. MemOpChains.push_back( - DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); + DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo())); } else { // Second half of f64 is passed in another GPR. - assert(RegLo < RISCV::X31 && "Invalid register pair"); - Register RegHigh = RegLo + 1; + Register RegHigh = HiVA.getLocReg(); RegsToPass.push_back(std::make_pair(RegHigh, Hi)); } continue; @@ -17314,7 +17332,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, if (VA.getLocInfo() == CCValAssign::Indirect) { // Store the argument in a stack slot and pass its address. Align StackAlign = - std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG), + std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG), getPrefTypeAlign(ArgValue.getValueType(), DAG)); TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); // If the original argument was split (e.g. i128), we need @@ -17322,16 +17340,16 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, // Vectors may be partly split to registers and partly to the stack, in // which case the base address is partly offset and subsequent stores are // relative to that. - unsigned ArgIndex = Outs[i].OrigArgIndex; - unsigned ArgPartOffset = Outs[i].PartOffset; + unsigned ArgIndex = Outs[OutIdx].OrigArgIndex; + unsigned ArgPartOffset = Outs[OutIdx].PartOffset; assert(VA.getValVT().isVector() || ArgPartOffset == 0); // Calculate the total size to store. We don't have access to what we're // actually storing other than performing the loop and collecting the // info. SmallVector> Parts; - while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { - SDValue PartValue = OutVals[i + 1]; - unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; + while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) { + SDValue PartValue = OutVals[OutIdx + 1]; + unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset; SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); EVT PartVT = PartValue.getValueType(); if (PartVT.isScalableVector()) @@ -17340,6 +17358,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG)); Parts.push_back(std::make_pair(PartValue, Offset)); ++i; + ++OutIdx; } SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); int FI = cast(SpillSlot)->getIndex(); @@ -17481,7 +17500,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV); // Copy all of the result registers out of their specified physreg. - for (auto &VA : RVLocs) { + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + auto &VA = RVLocs[i]; // Copy the value out SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); @@ -17490,9 +17510,9 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, Glue = RetValue.getValue(2); if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { - assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); - SDValue RetValue2 = - DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); + assert(VA.needsCustom()); + SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(), + MVT::i32, Glue); Chain = RetValue2.getValue(1); Glue = RetValue2.getValue(2); RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, @@ -17555,21 +17575,21 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SmallVector RetOps(1, Chain); // Copy the result values into the output registers. - for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { - SDValue Val = OutVals[i]; + for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) { + SDValue Val = OutVals[OutIdx]; CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { // Handle returning f64 on RV32D with a soft float ABI. assert(VA.isRegLoc() && "Expected return via registers"); + assert(VA.needsCustom()); SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), Val); SDValue Lo = SplitF64.getValue(0); SDValue Hi = SplitF64.getValue(1); Register RegLo = VA.getLocReg(); - assert(RegLo < RISCV::X31 && "Invalid register pair"); - Register RegHi = RegLo + 1; + Register RegHi = RVLocs[++i].getLocReg(); if (STI.isRegisterReservedByUser(RegLo) || STI.isRegisterReservedByUser(RegHi)) From 2f329d88bc2e6e6fc1d79a723bf150df49e04684 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 17 Oct 2023 15:56:13 +0100 Subject: [PATCH 350/720] [DAG] foldConstantFPMath - accept ArrayRef Ops instead of explicit N1/N2 ops First step towards adding unary/ternary fp ops handling, and not just binops --- llvm/include/llvm/CodeGen/SelectionDAG.h | 6 +++--- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 10 ++++++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 2c629f3f96a0c..e867448b9d551 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1919,10 +1919,10 @@ class SelectionDAG { SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef Ops); - /// Fold floating-point operations with 2 operands when both operands are - /// constants and/or undefined. + /// Fold floating-point operations when all operands are constants and/or + /// undefined. SDValue foldConstantFPMath(unsigned Opcode, const SDLoc &DL, EVT VT, - SDValue N1, SDValue N2); + ArrayRef Ops); /// Constant fold a setcc to true or false. SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 3f06d0bd4eaa1..01da5c0ec49ee 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6236,7 +6236,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, // Handle binops special cases. if (NumOps == 2) { - if (SDValue CFP = foldConstantFPMath(Opcode, DL, VT, Ops[0], Ops[1])) + if (SDValue CFP = foldConstantFPMath(Opcode, DL, VT, Ops)) return CFP; if (auto *C1 = dyn_cast(Ops[0])) { @@ -6429,11 +6429,17 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, } SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, - EVT VT, SDValue N1, SDValue N2) { + EVT VT, ArrayRef Ops) { + // TODO: Add support for unary/ternary fp opcodes. + if (Ops.size() != 2) + return SDValue(); + // TODO: We don't do any constant folding for strict FP opcodes here, but we // should. That will require dealing with a potentially non-default // rounding mode, checking the "opStatus" return value from the APFloat // math calculations, and possibly other variations. + SDValue N1 = Ops[0]; + SDValue N2 = Ops[1]; ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, /*AllowUndefs*/ false); ConstantFPSDNode *N2CFP = isConstOrConstSplatFP(N2, /*AllowUndefs*/ false); if (N1CFP && N2CFP) { From 3162cf0430210cfa7a992cecf9338b965bf4362e Mon Sep 17 00:00:00 2001 From: Jan Patrick Lehr Date: Tue, 17 Oct 2023 17:34:04 +0200 Subject: [PATCH 351/720] [Github][OpenMP] Adding rule for OpenMP label (#65331) This adds initial labelling for OpenMP (clang, libomp, libomptarget) --- .github/new-prs-labeler.yml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml index ae658f848ecfb..e4bc53e60066e 100644 --- a/.github/new-prs-labeler.yml +++ b/.github/new-prs-labeler.yml @@ -794,3 +794,35 @@ llvm:binary-utilities: - llvm/tools/llvm-size/** - llvm/tools/llvm-strings/** - llvm/tools/llvm-symbolizer/** + +clang:openmp: + - clang/include/clang/Basic/OpenMP* + - clang/include/clang/AST/OpenMPClause.h + - clang/include/clang/AST/DeclOpenMP.h + - clang/include/clang/AST/ExprOpenMP.h + - clang/include/clang/AST/StmtOpenMP.h + - clang/lib/AST/DeclOpenMP.cpp + - clang/lib/AST/OpenMPClause.cpp + - clang/lib/AST/StmtOpenMP.cpp + - clang/lib/Headers/openmp_wrappers/** + - clang/lib/Parse/ParseOpenMP.cpp + - clang/lib/Basic/OpenMPKinds.cpp + - clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp + - clang/lib/Driver/ToolChains/AMDGPUOpenMP.h + - clang/lib/CodeGen/CgStmtOpenMP.cpp + - clang/lib/CodeGen/CGOpenMP* + - clang/lib/Sema/SemaOpenMP.cpp + - clang/test/OpenMP/** + - clang/test/AST/ast-dump-openmp-* + - llvm/lib/Frontend/OpenMP/** + - llvm/lib/Transforms/IPO/OpenMPOpt.cpp + - llvm/include/llvm/Frontend/OpenMP/** + - llvm/include/llvm/Transforms/IPO/OpenMPOpt.h + - llvm/unittests/Frontend/OpenMP* + - llvm/test/Transforms/OpenMP/** + +openmp:libomp: + - any: ['openmp/**', '!openmp/libomptarget/**'] + +openmp:libomptarget: + - any: ['openmp/**', '!openmp/runtime/**'] From abd0d5d2626022d835c784b1fed557caf90e793f Mon Sep 17 00:00:00 2001 From: Vladislav Dzhidzhoev Date: Mon, 14 Aug 2023 11:16:04 +0200 Subject: [PATCH 352/720] Reland: [AArch64][GlobalISel] Adopt dup(load) -> LD1R patterns from SelectionDAG This relands the fb8f59156f0f208f6192ed808fc223eda6c0e7ec and makes isAArch64FrameOffsetLegal function recognize LD1R instructions. Original PR: https://github.com/llvm/llvm-project/pull/66914 PR of the fix: https://github.com/llvm/llvm-project/pull/69003 --- llvm/lib/Target/AArch64/AArch64InstrGISel.td | 17 +++++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 8 ++ .../AArch64/arm64-indexed-vector-ldst.ll | 73 +++++++++++++------ llvm/test/CodeGen/AArch64/arm64-ld1.ll | 29 +++++++- llvm/test/CodeGen/AArch64/arm64-st1.ll | 2 +- 5 files changed, 103 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index c6ff7bea4bd2c..27338bd243933 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -511,3 +511,20 @@ let AddedComplexity = 19 in { defm : VecROStoreLane64_0Pat; defm : VecROStoreLane64_0Pat; } + +def : Pat<(v8i8 (AArch64dup (i8 (load (am_indexed8 GPR64sp:$Rn))))), + (LD1Rv8b GPR64sp:$Rn)>; +def : Pat<(v16i8 (AArch64dup (i8 (load GPR64sp:$Rn)))), + (LD1Rv16b GPR64sp:$Rn)>; +def : Pat<(v4i16 (AArch64dup (i16 (load GPR64sp:$Rn)))), + (LD1Rv4h GPR64sp:$Rn)>; +def : Pat<(v8i16 (AArch64dup (i16 (load GPR64sp:$Rn)))), + (LD1Rv8h GPR64sp:$Rn)>; +def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), + (LD1Rv2s GPR64sp:$Rn)>; +def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), + (LD1Rv4s GPR64sp:$Rn)>; +def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), + (LD1Rv2d GPR64sp:$Rn)>; +def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), + (LD1Rv1d GPR64sp:$Rn)>; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 8f0e272a6fac7..05c79b610cb36 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5584,6 +5584,14 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, switch (MI.getOpcode()) { default: break; + case AArch64::LD1Rv1d: + case AArch64::LD1Rv2s: + case AArch64::LD1Rv2d: + case AArch64::LD1Rv4h: + case AArch64::LD1Rv4s: + case AArch64::LD1Rv8b: + case AArch64::LD1Rv8h: + case AArch64::LD1Rv16b: case AArch64::LD1Twov2d: case AArch64::LD1Threev2d: case AArch64::LD1Fourv2d: diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index 1b9583464edea..2cab4932def07 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s --check-prefixes=CHECK,SDAG -; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-GISEL +; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=arm64-apple-ios7.0 -o - %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GISEL ; CHECK-GISEL-NOT: warning: Instruction selection used fallback path for test_v8i8_pre_load ; CHECK-GISEL-NOT: warning: Instruction selection used fallback path for test_v8i8_post_load @@ -620,9 +620,6 @@ ; CHECK-GISEL-NOT: warning: Instruction selection used fallback path for load_single_extract_variable_index_i8 ; CHECK-GISEL-NOT: warning: Instruction selection used fallback path for load_single_extract_variable_index_i16 ; CHECK-GISEL-NOT: warning: Instruction selection used fallback path for load_single_extract_variable_index_i32 -; CHECK-GISEL-NOT: warning: Instruction selection used fallback path for load_single_extract_variable_index_v3i32_small_align -; CHECK-GISEL-NOT: warning: Instruction selection used fallback path for load_single_extract_variable_index_v3i32_default_align -; CHECK-GISEL-NOT: warning: Instruction selection used fallback path for load_single_extract_valid_const_index_v3i32 ; CHECK-GISEL-NOT: warning: Instruction selection used fallback path for load_single_extract_variable_index_masked_i32 ; CHECK-GISEL-NOT: warning: Instruction selection used fallback path for load_single_extract_variable_index_masked2_i32 @@ -13786,11 +13783,18 @@ define ptr @test_v1f64_post_reg_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x declare void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, ptr) define <16 x i8> @test_v16i8_post_imm_ld1r(ptr %bar, ptr %ptr) { -; CHECK-LABEL: test_v16i8_post_imm_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1r.16b { v0 }, [x0], #1 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_imm_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1r.16b { v0 }, [x0], #1 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret +; +; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld1r: +; CHECK-GISEL: ; %bb.0: +; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0] +; CHECK-GISEL-NEXT: add x8, x0, #1 +; CHECK-GISEL-NEXT: str x8, [x1] +; CHECK-GISEL-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <16 x i8> , i8 %tmp1, i32 0 %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1 @@ -13814,11 +13818,18 @@ define <16 x i8> @test_v16i8_post_imm_ld1r(ptr %bar, ptr %ptr) { } define <16 x i8> @test_v16i8_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v16i8_post_reg_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1r.16b { v0 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v16i8_post_reg_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1r.16b { v0 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret +; +; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld1r: +; CHECK-GISEL: ; %bb.0: +; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0] +; CHECK-GISEL-NEXT: add x8, x0, x2 +; CHECK-GISEL-NEXT: str x8, [x1] +; CHECK-GISEL-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <16 x i8> , i8 %tmp1, i32 0 %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1 @@ -13842,11 +13853,18 @@ define <16 x i8> @test_v16i8_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { } define <8 x i8> @test_v8i8_post_imm_ld1r(ptr %bar, ptr %ptr) { -; CHECK-LABEL: test_v8i8_post_imm_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1r.8b { v0 }, [x0], #1 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_imm_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1r.8b { v0 }, [x0], #1 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret +; +; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld1r: +; CHECK-GISEL: ; %bb.0: +; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0] +; CHECK-GISEL-NEXT: add x8, x0, #1 +; CHECK-GISEL-NEXT: str x8, [x1] +; CHECK-GISEL-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <8 x i8> , i8 %tmp1, i32 0 %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1 @@ -13862,11 +13880,18 @@ define <8 x i8> @test_v8i8_post_imm_ld1r(ptr %bar, ptr %ptr) { } define <8 x i8> @test_v8i8_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { -; CHECK-LABEL: test_v8i8_post_reg_ld1r: -; CHECK: ; %bb.0: -; CHECK-NEXT: ld1r.8b { v0 }, [x0], x2 -; CHECK-NEXT: str x0, [x1] -; CHECK-NEXT: ret +; SDAG-LABEL: test_v8i8_post_reg_ld1r: +; SDAG: ; %bb.0: +; SDAG-NEXT: ld1r.8b { v0 }, [x0], x2 +; SDAG-NEXT: str x0, [x1] +; SDAG-NEXT: ret +; +; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld1r: +; CHECK-GISEL: ; %bb.0: +; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0] +; CHECK-GISEL-NEXT: add x8, x0, x2 +; CHECK-GISEL-NEXT: str x8, [x1] +; CHECK-GISEL-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <8 x i8> , i8 %tmp1, i32 0 %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1 diff --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll index 96468b2cfa8ac..54b96520dce41 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -global-isel=1 -global-isel-abort=2 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -global-isel=1 -global-isel-abort=1 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-GI %struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } @@ -1712,3 +1712,30 @@ define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(ptr %addr) { %val = call %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr %addr) ret %struct.__neon_float64x2x4_t %val } + +define <8 x i8> @dup_ld1_from_stack(ptr %__ret) { +; CHECK-SD-LABEL: dup_ld1_from_stack: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: add x8, sp, #15 +; CHECK-SD-NEXT: ld1r.8b { v0 }, [x8] +; CHECK-SD-NEXT: add sp, sp, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: dup_ld1_from_stack: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w29, -16 +; CHECK-GI-NEXT: add x8, sp, #15 +; CHECK-GI-NEXT: ld1r.8b { v0 }, [x8] +; CHECK-GI-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret +entry: + %item = alloca i8, align 1 + %0 = load i8, ptr %item, align 1 + %1 = insertelement <8 x i8> poison, i8 %0, i32 0 + %lane = shufflevector <8 x i8> %1, <8 x i8> %1, <8 x i32> zeroinitializer + ret <8 x i8> %lane +} diff --git a/llvm/test/CodeGen/AArch64/arm64-st1.ll b/llvm/test/CodeGen/AArch64/arm64-st1.ll index 121ca69bee21d..6f87c66c87345 100644 --- a/llvm/test/CodeGen/AArch64/arm64-st1.ll +++ b/llvm/test/CodeGen/AArch64/arm64-st1.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -global-isel -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s ; The instruction latencies of Exynos-M3 trigger the transform we see under the Exynos check. ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs -mcpu=exynos-m3 | FileCheck --check-prefix=EXYNOS %s From 20af0e5e8d5692327753286ac869ff1c347d819b Mon Sep 17 00:00:00 2001 From: AdityaK <1894981+hiraditya@users.noreply.github.com> Date: Tue, 17 Oct 2023 08:52:55 -0700 Subject: [PATCH 353/720] Enable v for RISCV64 Android (#69261) Android has already enabled V by default for aosp: https://android-review.googlesource.com/c/platform/build/soong/+/2752805 four weeks back. --- clang/lib/Driver/ToolChains/Arch/RISCV.cpp | 4 ++-- clang/test/Driver/riscv-features.c | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp index bb097356d0c12..a05f4b7ea64b4 100644 --- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp @@ -309,7 +309,7 @@ StringRef riscv::getRISCVArch(const llvm::opt::ArgList &Args, return "rv32imafdc"; else if (MABI.starts_with_insensitive("lp64")) { if (Triple.isAndroid()) - return "rv64imafdc_zba_zbb_zbs"; + return "rv64imafdcv_zba_zbb_zbs"; return "rv64imafdc"; } @@ -329,7 +329,7 @@ StringRef riscv::getRISCVArch(const llvm::opt::ArgList &Args, if (Triple.getOS() == llvm::Triple::UnknownOS) return "rv64imac"; else if (Triple.isAndroid()) - return "rv64imafdc_zba_zbb_zbs"; + return "rv64imafdcv_zba_zbb_zbs"; else return "rv64imafdc"; } diff --git a/clang/test/Driver/riscv-features.c b/clang/test/Driver/riscv-features.c index 0039c230ec476..851a7c0507eb3 100644 --- a/clang/test/Driver/riscv-features.c +++ b/clang/test/Driver/riscv-features.c @@ -10,6 +10,7 @@ // RUN: %clang --target=riscv32-unknown-elf -### %s -mrelax 2>&1 | FileCheck %s -check-prefix=RELAX // RUN: %clang --target=riscv32-unknown-elf -### %s -mno-relax 2>&1 | FileCheck %s -check-prefix=NO-RELAX +// ANDROID: "-target-feature" "+v" // ANDROID: "-target-feature" "+zba" // ANDROID: "-target-feature" "+zbb" // ANDROID: "-target-feature" "+zbs" From 7b1e6851b65a4776e52602c2987b9861fbdc1170 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Tue, 17 Oct 2023 16:03:26 +0000 Subject: [PATCH 354/720] [hwasan] Exclude bcmp interceptor test from Android This fixes a buildbot breakage (e.g., https://lab.llvm.org/buildbot/#/builders/77/builds/31422/steps/21/logs/stdio) that was caused by the introduction of this test (https://github.com/llvm/llvm-project/commit/ff1329e29709477472a93e9ce975f166f75999a3). Build error from buildbot: /var/lib/buildbot/sanitizer-buildbot6/sanitizer-x86_64-linux-android/build/llvm-project/compiler-rt/test/hwasan/TestCases/bcmp.cpp:18:10: error: use of undeclared identifier 'bcmp' 18 | return bcmp(p, a, size); --- compiler-rt/test/hwasan/TestCases/bcmp.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/compiler-rt/test/hwasan/TestCases/bcmp.cpp b/compiler-rt/test/hwasan/TestCases/bcmp.cpp index 3dee4b8490efc..a83147b0f3205 100644 --- a/compiler-rt/test/hwasan/TestCases/bcmp.cpp +++ b/compiler-rt/test/hwasan/TestCases/bcmp.cpp @@ -2,6 +2,7 @@ // RUN: %clangxx_hwasan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s // RUN: %clangxx_hwasan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s // RUN: %clangxx_hwasan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s +// REQUIRES: !android #include #include From be57381a4a08b0b6a89d5b5fdec0880b202e99f4 Mon Sep 17 00:00:00 2001 From: Dhruv Chawla Date: Tue, 17 Oct 2023 21:40:18 +0530 Subject: [PATCH 355/720] [InstCombine] Create a class to lazily track computed known bits (#66611) This patch adds a new class "WithCache" which stores a pointer to any type passable to computeKnownBits along with KnownBits information which is computed on-demand when getKnownBits() is called. This allows reusing the known bits information when it is passed as an argument to multiple functions. It also changes a few functions to accept WithCache(s) so that known bits information computed in some callees can be propagated to others from the top level visitAddSub caller. This gives a speedup of 0.14%: https://llvm-compile-time-tracker.com/compare.php?from=499d41cef2e7bbb65804f6a815b9fa8b27efce0f&to=fbea87f1f1e6d5552e2bc309f8e201a3af6d28ec&stat=instructions:u --- llvm/include/llvm/Analysis/ValueTracking.h | 19 ++++- llvm/include/llvm/Analysis/WithCache.h | 71 ++++++++++++++++++ .../Transforms/InstCombine/InstCombiner.h | 13 ++-- llvm/lib/Analysis/ValueTracking.cpp | 75 +++++++++---------- .../InstCombine/InstCombineAddSub.cpp | 8 +- .../InstCombine/InstCombineInternal.h | 6 +- 6 files changed, 140 insertions(+), 52 deletions(-) create mode 100644 llvm/include/llvm/Analysis/WithCache.h diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 25272e0581c93..0e02d0d5b4865 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -17,6 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/SimplifyQuery.h" +#include "llvm/Analysis/WithCache.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/FMF.h" @@ -90,6 +91,12 @@ KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, const DominatorTree *DT = nullptr, bool UseInstrInfo = true); +KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, + unsigned Depth, const SimplifyQuery &Q); + +KnownBits computeKnownBits(const Value *V, unsigned Depth, + const SimplifyQuery &Q); + /// Compute known bits from the range metadata. /// \p KnownZero the set of bits that are known to be zero /// \p KnownOne the set of bits that are known to be one @@ -107,7 +114,8 @@ KnownBits analyzeKnownBitsFromAndXorOr( bool UseInstrInfo = true); /// Return true if LHS and RHS have no common bits set. -bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, +bool haveNoCommonBitsSet(const WithCache &LHSCache, + const WithCache &RHSCache, const SimplifyQuery &SQ); /// Return true if the given value is known to have exactly one bit set when @@ -847,9 +855,12 @@ OverflowResult computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const SimplifyQuery &SQ); OverflowResult computeOverflowForSignedMul(const Value *LHS, const Value *RHS, const SimplifyQuery &SQ); -OverflowResult computeOverflowForUnsignedAdd(const Value *LHS, const Value *RHS, - const SimplifyQuery &SQ); -OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, +OverflowResult +computeOverflowForUnsignedAdd(const WithCache &LHS, + const WithCache &RHS, + const SimplifyQuery &SQ); +OverflowResult computeOverflowForSignedAdd(const WithCache &LHS, + const WithCache &RHS, const SimplifyQuery &SQ); /// This version also leverages the sign bit of Add if known. OverflowResult computeOverflowForSignedAdd(const AddOperator *Add, diff --git a/llvm/include/llvm/Analysis/WithCache.h b/llvm/include/llvm/Analysis/WithCache.h new file mode 100644 index 0000000000000..8065c45738f84 --- /dev/null +++ b/llvm/include/llvm/Analysis/WithCache.h @@ -0,0 +1,71 @@ +//===- llvm/Analysis/WithCache.h - KnownBits cache for pointers -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Store a pointer to any type along with the KnownBits information for it +// that is computed lazily (if required). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_WITHCACHE_H +#define LLVM_ANALYSIS_WITHCACHE_H + +#include "llvm/IR/Value.h" +#include "llvm/Support/KnownBits.h" +#include + +namespace llvm { +struct SimplifyQuery; +KnownBits computeKnownBits(const Value *V, unsigned Depth, + const SimplifyQuery &Q); + +template class WithCache { + static_assert(std::is_pointer_v, "WithCache requires a pointer type!"); + + using UnderlyingType = std::remove_pointer_t; + constexpr static bool IsConst = std::is_const_v; + + template + using conditionally_const_t = std::conditional_t; + + using PointerType = conditionally_const_t; + using ReferenceType = conditionally_const_t; + + // Store the presence of the KnownBits information in one of the bits of + // Pointer. + // true -> present + // false -> absent + mutable PointerIntPair Pointer; + mutable KnownBits Known; + + void calculateKnownBits(const SimplifyQuery &Q) const { + Known = computeKnownBits(Pointer.getPointer(), 0, Q); + Pointer.setInt(true); + } + +public: + WithCache(PointerType Pointer) : Pointer(Pointer, false) {} + WithCache(PointerType Pointer, const KnownBits &Known) + : Pointer(Pointer, true), Known(Known) {} + + [[nodiscard]] PointerType getValue() const { return Pointer.getPointer(); } + + [[nodiscard]] const KnownBits &getKnownBits(const SimplifyQuery &Q) const { + if (!hasKnownBits()) + calculateKnownBits(Q); + return Known; + } + + [[nodiscard]] bool hasKnownBits() const { return Pointer.getInt(); } + + operator PointerType() const { return Pointer.getPointer(); } + PointerType operator->() const { return Pointer.getPointer(); } + ReferenceType operator*() const { return *Pointer.getPointer(); } +}; +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h index dcfcc8f41dd58..f8b3874267ded 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -510,15 +510,18 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner { SQ.getWithInstruction(CxtI)); } - OverflowResult computeOverflowForUnsignedAdd(const Value *LHS, - const Value *RHS, - const Instruction *CxtI) const { + OverflowResult + computeOverflowForUnsignedAdd(const WithCache &LHS, + const WithCache &RHS, + const Instruction *CxtI) const { return llvm::computeOverflowForUnsignedAdd(LHS, RHS, SQ.getWithInstruction(CxtI)); } - OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, - const Instruction *CxtI) const { + OverflowResult + computeOverflowForSignedAdd(const WithCache &LHS, + const WithCache &RHS, + const Instruction *CxtI) const { return llvm::computeOverflowForSignedAdd(LHS, RHS, SQ.getWithInstruction(CxtI)); } diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 82310444326d6..1e0281b3f1bd7 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -33,6 +33,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/Analysis/WithCache.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -178,17 +179,11 @@ void llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); } -static KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, - unsigned Depth, const SimplifyQuery &Q); - -static KnownBits computeKnownBits(const Value *V, unsigned Depth, - const SimplifyQuery &Q); - KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) { - return ::computeKnownBits( + return computeKnownBits( V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); } @@ -196,13 +191,17 @@ KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) { - return ::computeKnownBits( + return computeKnownBits( V, DemandedElts, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); } -bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, +bool llvm::haveNoCommonBitsSet(const WithCache &LHSCache, + const WithCache &RHSCache, const SimplifyQuery &SQ) { + const Value *LHS = LHSCache.getValue(); + const Value *RHS = RHSCache.getValue(); + assert(LHS->getType() == RHS->getType() && "LHS and RHS should have the same type"); assert(LHS->getType()->isIntOrIntVectorTy() && @@ -250,12 +249,9 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, match(LHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) return true; } - IntegerType *IT = cast(LHS->getType()->getScalarType()); - KnownBits LHSKnown(IT->getBitWidth()); - KnownBits RHSKnown(IT->getBitWidth()); - ::computeKnownBits(LHS, LHSKnown, 0, SQ); - ::computeKnownBits(RHS, RHSKnown, 0, SQ); - return KnownBits::haveNoCommonBitsSet(LHSKnown, RHSKnown); + + return KnownBits::haveNoCommonBitsSet(LHSCache.getKnownBits(SQ), + RHSCache.getKnownBits(SQ)); } bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) { @@ -1784,19 +1780,19 @@ static void computeKnownBitsFromOperator(const Operator *I, /// Determine which bits of V are known to be either zero or one and return /// them. -KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, - unsigned Depth, const SimplifyQuery &Q) { +KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, + unsigned Depth, const SimplifyQuery &Q) { KnownBits Known(getBitWidth(V->getType(), Q.DL)); - computeKnownBits(V, DemandedElts, Known, Depth, Q); + ::computeKnownBits(V, DemandedElts, Known, Depth, Q); return Known; } /// Determine which bits of V are known to be either zero or one and return /// them. -KnownBits computeKnownBits(const Value *V, unsigned Depth, - const SimplifyQuery &Q) { +KnownBits llvm::computeKnownBits(const Value *V, unsigned Depth, + const SimplifyQuery &Q) { KnownBits Known(getBitWidth(V->getType(), Q.DL)); - computeKnownBits(V, Known, Depth, Q); + ::computeKnownBits(V, Known, Depth, Q); return Known; } @@ -6256,10 +6252,11 @@ static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) { /// Combine constant ranges from computeConstantRange() and computeKnownBits(). static ConstantRange -computeConstantRangeIncludingKnownBits(const Value *V, bool ForSigned, +computeConstantRangeIncludingKnownBits(const WithCache &V, + bool ForSigned, const SimplifyQuery &SQ) { - KnownBits Known = ::computeKnownBits(V, /*Depth=*/0, SQ); - ConstantRange CR1 = ConstantRange::fromKnownBits(Known, ForSigned); + ConstantRange CR1 = + ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned); ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo); ConstantRange::PreferredRangeType RangeType = ForSigned ? ConstantRange::Signed : ConstantRange::Unsigned; @@ -6269,8 +6266,8 @@ computeConstantRangeIncludingKnownBits(const Value *V, bool ForSigned, OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const SimplifyQuery &SQ) { - KnownBits LHSKnown = ::computeKnownBits(LHS, /*Depth=*/0, SQ); - KnownBits RHSKnown = ::computeKnownBits(RHS, /*Depth=*/0, SQ); + KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ); + KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ); ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false); ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false); return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange)); @@ -6307,17 +6304,18 @@ OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS, // product is exactly the minimum negative number. // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000 // For simplicity we just check if at least one side is not negative. - KnownBits LHSKnown = ::computeKnownBits(LHS, /*Depth=*/0, SQ); - KnownBits RHSKnown = ::computeKnownBits(RHS, /*Depth=*/0, SQ); + KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ); + KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ); if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) return OverflowResult::NeverOverflows; } return OverflowResult::MayOverflow; } -OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS, - const Value *RHS, - const SimplifyQuery &SQ) { +OverflowResult +llvm::computeOverflowForUnsignedAdd(const WithCache &LHS, + const WithCache &RHS, + const SimplifyQuery &SQ) { ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ); ConstantRange RHSRange = @@ -6325,10 +6323,10 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS, return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange)); } -static OverflowResult computeOverflowForSignedAdd(const Value *LHS, - const Value *RHS, - const AddOperator *Add, - const SimplifyQuery &SQ) { +static OverflowResult +computeOverflowForSignedAdd(const WithCache &LHS, + const WithCache &RHS, + const AddOperator *Add, const SimplifyQuery &SQ) { if (Add && Add->hasNoSignedWrap()) { return OverflowResult::NeverOverflows; } @@ -6944,9 +6942,10 @@ OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add, Add, SQ); } -OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS, - const Value *RHS, - const SimplifyQuery &SQ) { +OverflowResult +llvm::computeOverflowForSignedAdd(const WithCache &LHS, + const WithCache &RHS, + const SimplifyQuery &SQ) { return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, SQ); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 44f6e37cb3b44..87181650e7587 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1566,7 +1566,8 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { return replaceInstUsesWith(I, Constant::getNullValue(I.getType())); // A+B --> A|B iff A and B have no bits set in common. - if (haveNoCommonBitsSet(LHS, RHS, SQ.getWithInstruction(&I))) + WithCache LHSCache(LHS), RHSCache(RHS); + if (haveNoCommonBitsSet(LHSCache, RHSCache, SQ.getWithInstruction(&I))) return BinaryOperator::CreateOr(LHS, RHS); if (Instruction *Ext = narrowMathIfNoOverflow(I)) @@ -1661,11 +1662,12 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { // willNotOverflowUnsignedAdd to reduce the number of invocations of // computeKnownBits. bool Changed = false; - if (!I.hasNoSignedWrap() && willNotOverflowSignedAdd(LHS, RHS, I)) { + if (!I.hasNoSignedWrap() && willNotOverflowSignedAdd(LHSCache, RHSCache, I)) { Changed = true; I.setHasNoSignedWrap(true); } - if (!I.hasNoUnsignedWrap() && willNotOverflowUnsignedAdd(LHS, RHS, I)) { + if (!I.hasNoUnsignedWrap() && + willNotOverflowUnsignedAdd(LHSCache, RHSCache, I)) { Changed = true; I.setHasNoUnsignedWrap(true); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 83c127a0ef012..a53d67b2899b7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -295,13 +295,15 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final Instruction *transformSExtICmp(ICmpInst *Cmp, SExtInst &Sext); - bool willNotOverflowSignedAdd(const Value *LHS, const Value *RHS, + bool willNotOverflowSignedAdd(const WithCache &LHS, + const WithCache &RHS, const Instruction &CxtI) const { return computeOverflowForSignedAdd(LHS, RHS, &CxtI) == OverflowResult::NeverOverflows; } - bool willNotOverflowUnsignedAdd(const Value *LHS, const Value *RHS, + bool willNotOverflowUnsignedAdd(const WithCache &LHS, + const WithCache &RHS, const Instruction &CxtI) const { return computeOverflowForUnsignedAdd(LHS, RHS, &CxtI) == OverflowResult::NeverOverflows; From 7cad5a9eb48e44a10121044d0342ccfbdd8df672 Mon Sep 17 00:00:00 2001 From: Caroline Concatto Date: Tue, 17 Oct 2023 14:23:15 +0000 Subject: [PATCH 356/720] [Clang][SVE2.1] Add svpext builtins As described in: https://github.com/ARM-software/acle/pull/257 Reviewed By: hassnaa-arm Differential Revision: https://reviews.llvm.org/D151081 --- clang/include/clang/Basic/arm_sve.td | 4 +- clang/include/clang/Basic/arm_sve_sme_incl.td | 3 +- clang/lib/CodeGen/CGBuiltin.cpp | 37 ++++- clang/lib/CodeGen/CodeGenFunction.h | 5 + .../acle_sve2p1_pext.c | 152 ++++++++++++++++++ .../acle_sve2p1_imm.cpp | 23 +++ clang/utils/TableGen/SveEmitter.cpp | 74 ++++++--- 7 files changed, 273 insertions(+), 25 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 07dc8cdece990..f54e65ef7119c 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1862,11 +1862,13 @@ def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sv let TargetGuard = "sve2p1" in { def SVFCLAMP : SInst<"svclamp[_{d}]", "dddd", "hfd", MergeNone, "aarch64_sve_fclamp", [], []>; def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone], []>; + +def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [], [ImmCheck<1, ImmCheck0_3>]>; +def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [], [ImmCheck<1, ImmCheck0_1>]>; } let TargetGuard = "sve2p1" in { def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sclamp", [], []>; def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [], []>; def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>; - } diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td index da15f1fb31847..c3a6dc4e4d44a 100644 --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -61,7 +61,8 @@ // ------------------- // prototype: return (arg, arg, ...) // -// 2,3,4: array of default vectors +// 2,3,4: array of vectors +// .: indicator for multi-vector modifier that will follow (e.g. 2.x) // v: void // x: vector of signed integers // u: vector of unsigned integers diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index f1c199e165fca..116af1435fe6e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9853,6 +9853,41 @@ Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags, return Call; } +Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) { + // Multi-vector results should be broken up into a single (wide) result + // vector. + auto *StructTy = dyn_cast(Call->getType()); + if (!StructTy) + return Call; + + auto *VTy = dyn_cast(StructTy->getTypeAtIndex(0U)); + if (!VTy) + return Call; + unsigned N = StructTy->getNumElements(); + + // We may need to emit a cast to a svbool_t + bool IsPredTy = VTy->getElementType()->isIntegerTy(1); + unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements(); + + ScalableVectorType *WideVTy = + ScalableVectorType::get(VTy->getElementType(), MinElts * N); + Value *Ret = llvm::PoisonValue::get(WideVTy); + for (unsigned I = 0; I < N; ++I) { + Value *SRet = Builder.CreateExtractValue(Call, I); + assert(SRet->getType() == VTy && "Unexpected type for result value"); + Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); + + if (IsPredTy) + SRet = EmitSVEPredicateCast( + SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16)); + + Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx); + } + Call = Ret; + + return Call; +} + Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { // Find out if any arguments are required to be integer constant expressions. @@ -9966,7 +10001,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, if (PredTy->getScalarType()->isIntegerTy(1)) Call = EmitSVEPredicateCast(Call, cast(Ty)); - return Call; + return FormSVEBuiltinResult(Call); } switch (BuiltinID) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 6bc6d244bee20..e82115e2d706c 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4292,6 +4292,11 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID); + /// FormSVEBuiltinResult - Returns the struct of scalable vectors as a wider + /// vector. It extracts the scalable vector from the struct and inserts into + /// the wider vector. This avoids the error when allocating space in llvm + /// for struct of scalable vectors if a function returns struct. + llvm::Value *FormSVEBuiltinResult(llvm::Value *Call); llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitSMELd1St1(const SVETypeFlags &TypeFlags, diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c new file mode 100644 index 0000000000000..fe15d5a9db81f --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c @@ -0,0 +1,152 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +#include + +// CHECK-LABEL: @test_svpext_lane_c8_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z21test_svpext_lane_c8_0u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbool_t test_svpext_lane_c8_0(svcount_t c) { + return svpext_lane_c8(c, 0); +} + +// CHECK-LABEL: @test_svpext_lane_c8_3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 3) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z21test_svpext_lane_c8_3u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 3) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbool_t test_svpext_lane_c8_3(svcount_t c) { + return svpext_lane_c8(c, 3); +} + +// CHECK-LABEL: @test_svpext_lane_c16_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c16_0u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpext_lane_c16_0(svcount_t c) { + return svpext_lane_c16(c, 0); +} + +// CHECK-LABEL: @test_svpext_lane_c16_3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 3) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c16_3u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 3) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpext_lane_c16_3(svcount_t c) { + return svpext_lane_c16(c, 3); +} + +// CHECK-LABEL: @test_svpext_lane_c32_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c32_0u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpext_lane_c32_0(svcount_t c) { + return svpext_lane_c32(c, 0); +} + +// CHECK-LABEL: @test_svpext_lane_c32_3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 3) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c32_3u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 3) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpext_lane_c32_3(svcount_t c) { + return svpext_lane_c32(c, 3); +} + +// CHECK-LABEL: @test_svpext_lane_c64_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c64_0u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpext_lane_c64_0(svcount_t c) { + return svpext_lane_c64(c, 0); +} + +// CHECK-LABEL: @test_svpext_lane_c64_3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 3) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c64_3u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 3) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpext_lane_c64_3(svcount_t c) { + return svpext_lane_c64(c, 3); +} + +// CHECK-LABEL: @test_svpext_lane_c8_x2_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z24test_svpext_lane_c8_x2_0u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svboolx2_t test_svpext_lane_c8_x2_0(svcount_t c) { + return svpext_lane_c8_x2(c, 0); +} diff --git a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp index 781757a2b9c23..39ed13614f5a5 100644 --- a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp +++ b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp @@ -3,6 +3,29 @@ // REQUIRES: aarch14-registered-target #include +void test_svpext_lane_imm_0_3(svcount_t c) { + svpext_lane_c8(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svpext_lane_c16(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svpext_lane_c32(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svpext_lane_c64(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + + svpext_lane_c8(c, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + svpext_lane_c16(c, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + svpext_lane_c32(c, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + svpext_lane_c64(c, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +} + +void test_svpext_lane_x2_imm_0_1(svcount_t c) { + svpext_lane_c8_x2(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svpext_lane_c16_x2(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svpext_lane_c32_x2(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svpext_lane_c64_x2(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + + svpext_lane_c8_x2(c, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + svpext_lane_c16_x2(c, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + svpext_lane_c32_x2(c, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + svpext_lane_c64_x2(c, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} +} void test_cntp(svcount_t c) { svcntp_c8(c, 1); // expected-error {{argument value 1 is outside the valid range [2, 4]}} diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index f725c39540050..7e9afc538c2b5 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -73,12 +73,12 @@ class SVEType { public: SVEType() : SVEType(TypeSpec(), 'v') {} - SVEType(TypeSpec TS, char CharMod) + SVEType(TypeSpec TS, char CharMod, unsigned NumVectors = 1) : TS(TS), Float(false), Signed(true), Immediate(false), Void(false), Constant(false), Pointer(false), BFloat(false), DefaultType(false), IsScalable(true), Predicate(false), PredicatePattern(false), PrefetchOp(false), Svcount(false), Bitwidth(128), ElementBitwidth(~0U), - NumVectors(1) { + NumVectors(NumVectors) { if (!TS.empty()) applyTypespec(); applyModifier(CharMod); @@ -194,7 +194,9 @@ class Intrinsic { SVEType getReturnType() const { return Types[0]; } ArrayRef getTypes() const { return Types; } SVEType getParamType(unsigned I) const { return Types[I + 1]; } - unsigned getNumParams() const { return Proto.size() - 1; } + unsigned getNumParams() const { + return Proto.size() - (2 * std::count(Proto.begin(), Proto.end(), '.')) - 1; + } uint64_t getFlags() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag;} @@ -228,11 +230,19 @@ class Intrinsic { /// Return the parameter index of the splat operand. unsigned getSplatIdx() const { - // These prototype modifiers are described in arm_sve.td. - auto Idx = Proto.find_first_of("ajfrKLR@"); - assert(Idx != std::string::npos && Idx > 0 && - "Prototype has no splat operand"); - return Idx - 1; + unsigned I = 1, Param = 0; + for (; I < Proto.size(); ++I, ++Param) { + if (Proto[I] == 'a' || Proto[I] == 'j' || Proto[I] == 'f' || + Proto[I] == 'r' || Proto[I] == 'K' || Proto[I] == 'L' || + Proto[I] == 'R' || Proto[I] == '@') + break; + + // Multivector modifier can be skipped + if (Proto[I] == '.') + I += 2; + } + assert(I != Proto.size() && "Prototype has no splat operand"); + return Param; } /// Emits the intrinsic declaration to the ostream. @@ -540,15 +550,6 @@ void SVEType::applyTypespec() { void SVEType::applyModifier(char Mod) { switch (Mod) { - case '2': - NumVectors = 2; - break; - case '3': - NumVectors = 3; - break; - case '4': - NumVectors = 4; - break; case 'v': Void = true; break; @@ -859,11 +860,36 @@ void SVEType::applyModifier(char Mod) { Float = false; BFloat = false; break; + case '.': + llvm_unreachable(". is never a type in itself"); + break; default: llvm_unreachable("Unhandled character!"); } } +/// Returns the modifier and number of vectors for the given operand \p Op. +std::pair getProtoModifier(StringRef Proto, unsigned Op) { + for (unsigned P = 0; !Proto.empty(); ++P) { + unsigned NumVectors = 1; + unsigned CharsToSkip = 1; + char Mod = Proto[0]; + if (Mod == '2' || Mod == '3' || Mod == '4') { + NumVectors = Mod - '0'; + Mod = 'd'; + if (Proto.size() > 1 && Proto[1] == '.') { + Mod = Proto[2]; + CharsToSkip = 3; + } + } + + if (P == Op) + return {Mod, NumVectors}; + + Proto = Proto.drop_front(CharsToSkip); + } + llvm_unreachable("Unexpected Op"); +} //===----------------------------------------------------------------------===// // Intrinsic implementation @@ -879,8 +905,11 @@ Intrinsic::Intrinsic(StringRef Name, StringRef Proto, uint64_t MergeTy, MergeSuffix(MergeSuffix.str()), BaseType(BT, 'd'), Flags(Flags), ImmChecks(Checks.begin(), Checks.end()) { // Types[0] is the return value. - for (unsigned I = 0; I < Proto.size(); ++I) { - SVEType T(BaseTypeSpec, Proto[I]); + for (unsigned I = 0; I < (getNumParams() + 1); ++I) { + char Mod; + unsigned NumVectors; + std::tie(Mod, NumVectors) = getProtoModifier(Proto, I); + SVEType T(BaseTypeSpec, Mod, NumVectors); Types.push_back(T); // Add range checks for immediates @@ -1124,10 +1153,11 @@ void SVEEmitter::createIntrinsic( assert(Arg >= 0 && Kind >= 0 && "Arg and Kind must be nonnegative"); unsigned ElementSizeInBits = 0; + char Mod; + unsigned NumVectors; + std::tie(Mod, NumVectors) = getProtoModifier(Proto, EltSizeArg + 1); if (EltSizeArg >= 0) - ElementSizeInBits = - SVEType(TS, Proto[EltSizeArg + /* offset by return arg */ 1]) - .getElementSizeInBits(); + ElementSizeInBits = SVEType(TS, Mod, NumVectors).getElementSizeInBits(); ImmChecks.push_back(ImmCheck(Arg, Kind, ElementSizeInBits)); } From 08d6b8745430e133cc9d257cded623229e58fddd Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Tue, 17 Oct 2023 09:20:46 -0700 Subject: [PATCH 357/720] [flang] Round derived type byte sizes up to alignment multiple (#67571) When calculating sizes and offsets of types and components, be sure to round the size of a derived type up to a multiple of its alignment. --- flang/lib/Semantics/compute-offsets.cpp | 2 ++ flang/test/Semantics/offsets02.f90 | 10 ++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/flang/lib/Semantics/compute-offsets.cpp b/flang/lib/Semantics/compute-offsets.cpp index 139a8eb7c8c37..375d2e3f7df37 100644 --- a/flang/lib/Semantics/compute-offsets.cpp +++ b/flang/lib/Semantics/compute-offsets.cpp @@ -116,6 +116,8 @@ void ComputeOffsetsHelper::Compute(Scope &scope) { DoSymbol(*symbol); } } + // Ensure that the size is a multiple of the alignment + offset_ = Align(offset_, alignment_); scope.set_size(offset_); scope.SetAlignment(alignment_); // Assign offsets in COMMON blocks, unless this scope is a BLOCK construct, diff --git a/flang/test/Semantics/offsets02.f90 b/flang/test/Semantics/offsets02.f90 index 387bbac5ff6d4..11e086cf68bee 100644 --- a/flang/test/Semantics/offsets02.f90 +++ b/flang/test/Semantics/offsets02.f90 @@ -8,11 +8,17 @@ subroutine s1 real(8) :: a real(4) :: b end type - !CHECK: x1 size=12 offset=0: - !CHECK: y1 size=12 offset=16: + type t2 + type(t1) c + real(4) d + end type + !CHECK: x1 size=16 offset=0: + !CHECK: y1 size=16 offset=16: type(t1) :: x1, y1 !CHECK: z1 size=160 offset=32: type(t1) :: z1(10) + !CHECK: z2 size=24 offset=192 + type(t2) z2 end ! Like t1 but t2 does not need to be aligned on 64-bit boundary From 760e7d00d142ba85fcf48c00e0acc14a355da7c3 Mon Sep 17 00:00:00 2001 From: Guozhi Wei Date: Tue, 17 Oct 2023 16:22:42 +0000 Subject: [PATCH 358/720] [X86, Peephole] Enable FoldImmediate for X86 Enable FoldImmediate for X86 by implementing X86InstrInfo::FoldImmediate. Also enhanced peephole by deleting identical instructions after FoldImmediate. Differential Revision: https://reviews.llvm.org/D151848 --- llvm/lib/CodeGen/PeepholeOptimizer.cpp | 60 +- llvm/lib/Target/X86/X86InstrInfo.cpp | 340 +- llvm/lib/Target/X86/X86InstrInfo.h | 9 + .../test/CodeGen/AMDGPU/peephole-fold-imm.mir | 1 - llvm/test/CodeGen/X86/GlobalISel/phi.ll | 18 +- .../X86/div-rem-pair-recomposition-signed.ll | 357 +- .../div-rem-pair-recomposition-unsigned.ll | 99 +- llvm/test/CodeGen/X86/fast-isel-freeze.ll | 4 +- llvm/test/CodeGen/X86/foldimmediate-size.ll | 57 + llvm/test/CodeGen/X86/foldimmediate.mir | 143 + llvm/test/CodeGen/X86/pcsections-atomics.ll | 3609 +++++++++-------- llvm/test/CodeGen/X86/physreg-pairs.ll | 2 +- llvm/test/CodeGen/X86/popcnt.ll | 222 +- llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 2 +- llvm/test/CodeGen/X86/remat-phys-dead.ll | 2 +- llvm/test/CodeGen/X86/select_const_i128.ll | 3 +- llvm/test/CodeGen/X86/shrink_vmul.ll | 48 +- ...speculative-load-hardening-call-and-ret.ll | 75 +- llvm/test/CodeGen/X86/swifterror.ll | 28 +- .../vector-shuffle-combining-avx512bwvl.ll | 5 +- 20 files changed, 2874 insertions(+), 2210 deletions(-) create mode 100644 llvm/test/CodeGen/X86/foldimmediate-size.ll create mode 100644 llvm/test/CodeGen/X86/foldimmediate.mir diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp index a08cc78f11b1b..f413ca5b04f48 100644 --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -202,7 +202,8 @@ namespace { bool isMoveImmediate(MachineInstr &MI, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs); bool foldImmediate(MachineInstr &MI, SmallSet &ImmDefRegs, - DenseMap &ImmDefMIs); + DenseMap &ImmDefMIs, + bool &Deleted); /// Finds recurrence cycles, but only ones that formulated around /// a def operand and a use operand that are tied. If there is a use @@ -217,8 +218,11 @@ namespace { /// set \p CopyMIs. If this virtual register was previously seen as a /// copy, replace the uses of this copy with the previously seen copy's /// destination register. + /// \p LocalMIs contains all previous seen instructions. An optimized away + /// instruction should be deleted from LocalMIs. bool foldRedundantCopy(MachineInstr &MI, - DenseMap &CopyMIs); + DenseMap &CopyMIs, + SmallPtrSetImpl &LocalMIs); /// Is the register \p Reg a non-allocatable physical register? bool isNAPhysCopy(Register Reg); @@ -1351,18 +1355,19 @@ bool PeepholeOptimizer::isMoveImmediate( MachineInstr &MI, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs) { const MCInstrDesc &MCID = MI.getDesc(); - if (!MI.isMoveImmediate()) - return false; - if (MCID.getNumDefs() != 1) + if (MCID.getNumDefs() != 1 || !MI.getOperand(0).isReg()) return false; Register Reg = MI.getOperand(0).getReg(); - if (Reg.isVirtual()) { - ImmDefMIs.insert(std::make_pair(Reg, &MI)); - ImmDefRegs.insert(Reg); - return true; - } + if (!Reg.isVirtual()) + return false; - return false; + int64_t ImmVal; + if (!MI.isMoveImmediate() && !TII->getConstValDefinedInReg(MI, Reg, ImmVal)) + return false; + + ImmDefMIs.insert(std::make_pair(Reg, &MI)); + ImmDefRegs.insert(Reg); + return true; } /// Try folding register operands that are defined by move immediate @@ -1370,7 +1375,8 @@ bool PeepholeOptimizer::isMoveImmediate( /// and only if the def and use are in the same BB. bool PeepholeOptimizer::foldImmediate( MachineInstr &MI, SmallSet &ImmDefRegs, - DenseMap &ImmDefMIs) { + DenseMap &ImmDefMIs, bool &Deleted) { + Deleted = false; for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || MO.isDef()) @@ -1384,6 +1390,19 @@ bool PeepholeOptimizer::foldImmediate( assert(II != ImmDefMIs.end() && "couldn't find immediate definition"); if (TII->FoldImmediate(MI, *II->second, Reg, MRI)) { ++NumImmFold; + // FoldImmediate can delete ImmDefMI if MI was its only user. If ImmDefMI + // is not deleted, and we happened to get a same MI, we can delete MI and + // replace its users. + if (MRI->getVRegDef(Reg) && + MI.isIdenticalTo(*II->second, MachineInstr::IgnoreVRegDefs)) { + Register DstReg = MI.getOperand(0).getReg(); + if (DstReg.isVirtual() && + MRI->getRegClass(DstReg) == MRI->getRegClass(Reg)) { + MRI->replaceRegWith(DstReg, Reg); + MI.eraseFromParent(); + Deleted = true; + } + } return true; } } @@ -1405,7 +1424,8 @@ bool PeepholeOptimizer::foldImmediate( // // Should replace %2 uses with %1:sub1 bool PeepholeOptimizer::foldRedundantCopy( - MachineInstr &MI, DenseMap &CopyMIs) { + MachineInstr &MI, DenseMap &CopyMIs, + SmallPtrSetImpl &LocalMIs) { assert(MI.isCopy() && "expected a COPY machine instruction"); Register SrcReg = MI.getOperand(1).getReg(); @@ -1425,6 +1445,8 @@ bool PeepholeOptimizer::foldRedundantCopy( } MachineInstr *PrevCopy = CopyMIs.find(SrcPair)->second; + if (!LocalMIs.count(PrevCopy)) + return false; assert(SrcSubReg == PrevCopy->getOperand(1).getSubReg() && "Unexpected mismatching subreg!"); @@ -1732,7 +1754,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { continue; } - if (MI->isCopy() && (foldRedundantCopy(*MI, CopySrcMIs) || + if (MI->isCopy() && (foldRedundantCopy(*MI, CopySrcMIs, LocalMIs) || foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) { LocalMIs.erase(MI); LLVM_DEBUG(dbgs() << "Deleting redundant copy: " << *MI << "\n"); @@ -1750,8 +1772,14 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { // next iteration sees the new instructions. MII = MI; ++MII; - if (SeenMoveImm) - Changed |= foldImmediate(*MI, ImmDefRegs, ImmDefMIs); + if (SeenMoveImm) { + bool Deleted; + Changed |= foldImmediate(*MI, ImmDefRegs, ImmDefMIs, Deleted); + if (Deleted) { + LocalMIs.erase(MI); + continue; + } + } } // Check whether MI is a load candidate for folding into a later diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 4320a0e94b7a7..f0c46419ab351 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3867,12 +3867,42 @@ bool X86InstrInfo::verifyInstruction(const MachineInstr &MI, bool X86InstrInfo::getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const { - if (MI.getOpcode() != X86::MOV32ri && MI.getOpcode() != X86::MOV64ri) + Register MovReg = Reg; + const MachineInstr *MovMI = &MI; + + // Follow use-def for SUBREG_TO_REG to find the real move immediate + // instruction. It is quite common for x86-64. + if (MI.isSubregToReg()) { + // We use following pattern to setup 64b immediate. + // %8:gr32 = MOV32r0 implicit-def dead $eflags + // %6:gr64 = SUBREG_TO_REG 0, killed %8:gr32, %subreg.sub_32bit + if (!MI.getOperand(1).isImm()) + return false; + unsigned FillBits = MI.getOperand(1).getImm(); + unsigned SubIdx = MI.getOperand(3).getImm(); + MovReg = MI.getOperand(2).getReg(); + if (SubIdx != X86::sub_32bit || FillBits != 0) + return false; + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + MovMI = MRI.getUniqueVRegDef(MovReg); + if (!MovMI) + return false; + } + + if (MovMI->getOpcode() == X86::MOV32r0 && + MovMI->getOperand(0).getReg() == MovReg) { + ImmVal = 0; + return true; + } + + if (MovMI->getOpcode() != X86::MOV32ri && + MovMI->getOpcode() != X86::MOV64ri && + MovMI->getOpcode() != X86::MOV32ri64 && MovMI->getOpcode() != X86::MOV8ri) return false; // Mov Src can be a global address. - if (!MI.getOperand(1).isImm() || MI.getOperand(0).getReg() != Reg) + if (!MovMI->getOperand(1).isImm() || MovMI->getOperand(0).getReg() != MovReg) return false; - ImmVal = MI.getOperand(1).getImm(); + ImmVal = MovMI->getOperand(1).getImm(); return true; } @@ -4769,6 +4799,310 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI, return nullptr; } +/// Convert an ALUrr opcode to corresponding ALUri opcode. Such as +/// ADD32rr ==> ADD32ri +/// ShiftRotate will be set to true if the Opcode is shift or rotate. +/// If the ALUri can be further changed to COPY when the immediate is 0, set +/// CanConvert2Copy to true. +static unsigned ConvertALUrr2ALUri(unsigned Opcode, bool &CanConvert2Copy, + bool &ShiftRotate) { + CanConvert2Copy = false; + ShiftRotate = false; + unsigned NewOpcode = 0; + switch (Opcode) { + case X86::ADD64rr: + NewOpcode = X86::ADD64ri32; + CanConvert2Copy = true; + break; + case X86::ADC64rr: + NewOpcode = X86::ADC64ri32; + break; + case X86::SUB64rr: + NewOpcode = X86::SUB64ri32; + CanConvert2Copy = true; + break; + case X86::SBB64rr: + NewOpcode = X86::SBB64ri32; + break; + case X86::AND64rr: + NewOpcode = X86::AND64ri32; + break; + case X86::OR64rr: + NewOpcode = X86::OR64ri32; + CanConvert2Copy = true; + break; + case X86::XOR64rr: + NewOpcode = X86::XOR64ri32; + CanConvert2Copy = true; + break; + case X86::TEST64rr: + NewOpcode = X86::TEST64ri32; + break; + case X86::CMP64rr: + NewOpcode = X86::CMP64ri32; + break; + case X86::SHR64rCL: + NewOpcode = X86::SHR64ri; + ShiftRotate = true; + break; + case X86::SHL64rCL: + NewOpcode = X86::SHL64ri; + ShiftRotate = true; + break; + case X86::SAR64rCL: + NewOpcode = X86::SAR64ri; + ShiftRotate = true; + break; + case X86::ROL64rCL: + NewOpcode = X86::ROL64ri; + ShiftRotate = true; + break; + case X86::ROR64rCL: + NewOpcode = X86::ROR64ri; + ShiftRotate = true; + break; + case X86::RCL64rCL: + NewOpcode = X86::RCL64ri; + ShiftRotate = true; + break; + case X86::RCR64rCL: + NewOpcode = X86::RCR64ri; + ShiftRotate = true; + break; + case X86::ADD32rr: + NewOpcode = X86::ADD32ri; + CanConvert2Copy = true; + break; + case X86::ADC32rr: + NewOpcode = X86::ADC32ri; + break; + case X86::SUB32rr: + NewOpcode = X86::SUB32ri; + CanConvert2Copy = true; + break; + case X86::SBB32rr: + NewOpcode = X86::SBB32ri; + break; + case X86::AND32rr: + NewOpcode = X86::AND32ri; + break; + case X86::OR32rr: + NewOpcode = X86::OR32ri; + CanConvert2Copy = true; + break; + case X86::XOR32rr: + NewOpcode = X86::XOR32ri; + CanConvert2Copy = true; + break; + case X86::TEST32rr: + NewOpcode = X86::TEST32ri; + break; + case X86::CMP32rr: + NewOpcode = X86::CMP32ri; + break; + case X86::SHR32rCL: + NewOpcode = X86::SHR32ri; + ShiftRotate = true; + break; + case X86::SHL32rCL: + NewOpcode = X86::SHL32ri; + ShiftRotate = true; + break; + case X86::SAR32rCL: + NewOpcode = X86::SAR32ri; + ShiftRotate = true; + break; + case X86::ROL32rCL: + NewOpcode = X86::ROL32ri; + ShiftRotate = true; + break; + case X86::ROR32rCL: + NewOpcode = X86::ROR32ri; + ShiftRotate = true; + break; + case X86::RCL32rCL: + NewOpcode = X86::RCL32ri; + ShiftRotate = true; + break; + case X86::RCR32rCL: + NewOpcode = X86::RCR32ri; + ShiftRotate = true; + break; + } + return NewOpcode; +} + +/// Real implementation of FoldImmediate. +/// Reg is assigned ImmVal in DefMI, and is used in UseMI. +/// If MakeChange is true, this function tries to replace Reg by ImmVal in +/// UseMI. If MakeChange is false, just check if folding is possible. +/// Return true if folding is successful or possible. +bool X86InstrInfo::FoldImmediateImpl(MachineInstr &UseMI, MachineInstr *DefMI, + Register Reg, int64_t ImmVal, + MachineRegisterInfo *MRI, + bool MakeChange) const { + bool Modified = false; + bool ShiftRotate = false; + // When ImmVal is 0, some instructions can be changed to COPY. + bool CanChangeToCopy = false; + unsigned Opc = UseMI.getOpcode(); + + // 64 bit operations accept sign extended 32 bit immediates. + // 32 bit operations accept all 32 bit immediates, so we don't need to check + // them. + const TargetRegisterClass *RC = nullptr; + if (Reg.isVirtual()) + RC = MRI->getRegClass(Reg); + if ((Reg.isPhysical() && X86::GR64RegClass.contains(Reg)) || + (Reg.isVirtual() && X86::GR64RegClass.hasSubClassEq(RC))) { + if (!isInt<32>(ImmVal)) + return false; + } + + if (UseMI.findRegisterUseOperand(Reg)->getSubReg()) + return false; + // Immediate has larger code size than register. So avoid folding the + // immediate if it has more than 1 use and we are optimizing for size. + if (UseMI.getMF()->getFunction().hasOptSize() && Reg.isVirtual() && + !MRI->hasOneNonDBGUse(Reg)) + return false; + + unsigned NewOpc; + if (Opc == TargetOpcode::COPY) { + Register ToReg = UseMI.getOperand(0).getReg(); + const TargetRegisterClass *RC = nullptr; + if (ToReg.isVirtual()) + RC = MRI->getRegClass(ToReg); + bool GR32Reg = (ToReg.isVirtual() && X86::GR32RegClass.hasSubClassEq(RC)) || + (ToReg.isPhysical() && X86::GR32RegClass.contains(ToReg)); + bool GR64Reg = (ToReg.isVirtual() && X86::GR64RegClass.hasSubClassEq(RC)) || + (ToReg.isPhysical() && X86::GR64RegClass.contains(ToReg)); + bool GR8Reg = (ToReg.isVirtual() && X86::GR8RegClass.hasSubClassEq(RC)) || + (ToReg.isPhysical() && X86::GR8RegClass.contains(ToReg)); + + if (ImmVal == 0) { + // We have MOV32r0 only. + if (!GR32Reg) + return false; + } + + if (GR64Reg) { + if (isUInt<32>(ImmVal)) + NewOpc = X86::MOV32ri64; + else + NewOpc = X86::MOV64ri; + } else if (GR32Reg) { + NewOpc = X86::MOV32ri; + if (ImmVal == 0) { + // MOV32r0 clobbers EFLAGS. + const TargetRegisterInfo *TRI = &getRegisterInfo(); + if (UseMI.getParent()->computeRegisterLiveness(TRI, X86::EFLAGS, UseMI) + != MachineBasicBlock::LQR_Dead) + return false; + + // MOV32r0 is different than other cases because it doesn't encode the + // immediate in the instruction. So we directly modify it here. + if (!MakeChange) + return true; + UseMI.setDesc(get(X86::MOV32r0)); + UseMI.removeOperand(UseMI.findRegisterUseOperandIdx(Reg)); + UseMI.addOperand(MachineOperand::CreateReg(X86::EFLAGS, /*isDef=*/ true, + /*isImp=*/ true, + /*isKill=*/ false, + /*isDead=*/ true)); + Modified = true; + } + } else if (GR8Reg) + NewOpc = X86::MOV8ri; + else + return false; + } else + NewOpc = ConvertALUrr2ALUri(Opc, CanChangeToCopy, ShiftRotate); + + if (!NewOpc) + return false; + + // For SUB instructions the immediate can only be the second source operand. + if ((NewOpc == X86::SUB64ri32 || NewOpc == X86::SUB32ri || + NewOpc == X86::SBB64ri32 || NewOpc == X86::SBB32ri) && + UseMI.findRegisterUseOperandIdx(Reg) != 2) + return false; + // For CMP instructions the immediate can only be at index 1. + if ((NewOpc == X86::CMP64ri32 || NewOpc == X86::CMP32ri) && + UseMI.findRegisterUseOperandIdx(Reg) != 1) + return false; + + if (ShiftRotate) { + unsigned RegIdx = UseMI.findRegisterUseOperandIdx(Reg); + if (RegIdx < 2) + return false; + if (!isInt<8>(ImmVal)) + return false; + assert(Reg == X86::CL); + + if (!MakeChange) + return true; + UseMI.setDesc(get(NewOpc)); + UseMI.removeOperand(RegIdx); + UseMI.addOperand(MachineOperand::CreateImm(ImmVal)); + // Reg is physical register $cl, so we don't know if DefMI is dead through + // MRI. Let the caller handle it, or pass dead-mi-elimination can delete + // the dead physical register define instruction. + return true; + } + + if (!MakeChange) + return true; + + if (!Modified) { + // Modify the instruction. + if (ImmVal == 0 && CanChangeToCopy && + UseMI.registerDefIsDead(X86::EFLAGS)) { + // %100 = add %101, 0 + // ==> + // %100 = COPY %101 + UseMI.setDesc(get(TargetOpcode::COPY)); + UseMI.removeOperand(UseMI.findRegisterUseOperandIdx(Reg)); + UseMI.removeOperand(UseMI.findRegisterDefOperandIdx(X86::EFLAGS)); + UseMI.untieRegOperand(0); + UseMI.clearFlag(MachineInstr::MIFlag::NoSWrap); + UseMI.clearFlag(MachineInstr::MIFlag::NoUWrap); + } else { + unsigned Op1 = 1, Op2 = CommuteAnyOperandIndex; + unsigned ImmOpNum = 2; + if (!UseMI.getOperand(0).isDef()) { + Op1 = 0; // TEST, CMP + ImmOpNum = 1; + } + if (Opc == TargetOpcode::COPY) + ImmOpNum = 1; + if (findCommutedOpIndices(UseMI, Op1, Op2) && + UseMI.getOperand(Op1).getReg() == Reg) + commuteInstruction(UseMI); + + assert(UseMI.getOperand(ImmOpNum).getReg() == Reg); + UseMI.setDesc(get(NewOpc)); + UseMI.getOperand(ImmOpNum).ChangeToImmediate(ImmVal); + } + } + + if (Reg.isVirtual() && MRI->use_nodbg_empty(Reg)) + DefMI->eraseFromBundle(); + + return true; +} + +/// FoldImmediate - 'Reg' is known to be defined by a move immediate +/// instruction, try to fold the immediate into the use instruction. +bool X86InstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, + Register Reg, MachineRegisterInfo *MRI) const { + int64_t ImmVal; + if (!getConstValDefinedInReg(DefMI, Reg, ImmVal)) + return false; + + return FoldImmediateImpl(UseMI, &DefMI, Reg, ImmVal, MRI, true); +} + /// Expand a single-def pseudo instruction to a two-addr /// instruction with two undef reads of the register being defined. /// This is used for mapping: diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 8119302f73e8b..4d261a803421c 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -550,6 +550,15 @@ class X86InstrInfo final : public X86GenInstrInfo { Register &FoldAsLoadDefReg, MachineInstr *&DefMI) const override; + bool FoldImmediateImpl(MachineInstr &UseMI, MachineInstr *DefMI, Register Reg, + int64_t ImmVal, MachineRegisterInfo *MRI, + bool MakeChange) const; + + /// Reg is known to be defined by a move immediate instruction, try to fold + /// the immediate into the use instruction. + bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, + MachineRegisterInfo *MRI) const override; + std::pair decomposeMachineOperandsTargetFlags(unsigned TF) const override; diff --git a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir index 099aaa449b1c9..4a77c03a8facd 100644 --- a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir +++ b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir @@ -8,7 +8,6 @@ body: | ; GCN-LABEL: name: fold_simm_virtual ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: SI_RETURN_TO_EPILOG %0:sreg_32 = S_MOV_B32 0 %1:sreg_32 = COPY killed %0 diff --git a/llvm/test/CodeGen/X86/GlobalISel/phi.ll b/llvm/test/CodeGen/X86/GlobalISel/phi.ll index d2ce98d0fb41a..b29540f002598 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/phi.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/phi.ll @@ -4,8 +4,7 @@ define i8 @test_i8(i32 %a, i8 %f, i8 %t) { ; ALL-LABEL: test_i8: ; ALL: # %bb.0: # %entry -; ALL-NEXT: xorl %ecx, %ecx -; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: cmpl $0, %edi ; ALL-NEXT: setg %cl ; ALL-NEXT: testb $1, %cl ; ALL-NEXT: je .LBB0_2 @@ -35,8 +34,7 @@ cond.end: ; preds = %cond.false, %cond.t define i16 @test_i16(i32 %a, i16 %f, i16 %t) { ; ALL-LABEL: test_i16: ; ALL: # %bb.0: # %entry -; ALL-NEXT: xorl %ecx, %ecx -; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: cmpl $0, %edi ; ALL-NEXT: setg %cl ; ALL-NEXT: testb $1, %cl ; ALL-NEXT: je .LBB1_2 @@ -67,8 +65,7 @@ define i32 @test_i32(i32 %a, i32 %f, i32 %t) { ; ALL-LABEL: test_i32: ; ALL: # %bb.0: # %entry ; ALL-NEXT: movl %esi, %eax -; ALL-NEXT: xorl %ecx, %ecx -; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: cmpl $0, %edi ; ALL-NEXT: setg %cl ; ALL-NEXT: testb $1, %cl ; ALL-NEXT: je .LBB2_1 @@ -96,8 +93,7 @@ define i64 @test_i64(i32 %a, i64 %f, i64 %t) { ; ALL-LABEL: test_i64: ; ALL: # %bb.0: # %entry ; ALL-NEXT: movq %rsi, %rax -; ALL-NEXT: xorl %ecx, %ecx -; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: cmpl $0, %edi ; ALL-NEXT: setg %cl ; ALL-NEXT: testb $1, %cl ; ALL-NEXT: je .LBB3_1 @@ -124,8 +120,7 @@ cond.end: ; preds = %cond.false, %cond.t define float @test_float(i32 %a, float %f, float %t) { ; ALL-LABEL: test_float: ; ALL: # %bb.0: # %entry -; ALL-NEXT: xorl %eax, %eax -; ALL-NEXT: cmpl %eax, %edi +; ALL-NEXT: cmpl $0, %edi ; ALL-NEXT: setg %al ; ALL-NEXT: testb $1, %al ; ALL-NEXT: je .LBB4_1 @@ -152,8 +147,7 @@ cond.end: ; preds = %cond.false, %cond.t define double @test_double(i32 %a, double %f, double %t) { ; ALL-LABEL: test_double: ; ALL: # %bb.0: # %entry -; ALL-NEXT: xorl %eax, %eax -; ALL-NEXT: cmpl %eax, %edi +; ALL-NEXT: cmpl $0, %edi ; ALL-NEXT: setg %al ; ALL-NEXT: testb $1, %al ; ALL-NEXT: je .LBB5_1 diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll index d26f4b7044cf3..e12ca56023a7f 100644 --- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll +++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll @@ -178,15 +178,15 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: subl $152, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: sarl $31, %eax -; X86-NEXT: movl %ebp, %edx -; X86-NEXT: sarl $31, %edx +; X86-NEXT: movl %edx, %edi +; X86-NEXT: sarl $31, %edi ; X86-NEXT: movl %eax, %esi ; X86-NEXT: xorl %ecx, %esi -; X86-NEXT: movl %esi, %edi +; X86-NEXT: movl %esi, %ebp ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, %ebx @@ -195,66 +195,67 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl %eax, %esi ; X86-NEXT: xorl {{[0-9]+}}(%esp), %esi ; X86-NEXT: subl %eax, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, (%esp) # 4-byte Spill ; X86-NEXT: sbbl %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl %eax, %ebx -; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X86-NEXT: sbbl %eax, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, %edi -; X86-NEXT: xorl %ebp, %edi -; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %eax, %ebp +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edi, %esi +; X86-NEXT: xorl %edx, %esi +; X86-NEXT: movl %edi, %edx +; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edi, %ebx ; X86-NEXT: xorl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: xorl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %edi, %ebp ; X86-NEXT: xorl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: subl %edx, %ebp -; X86-NEXT: sbbl %edx, %esi -; X86-NEXT: sbbl %edx, %ebx -; X86-NEXT: sbbl %edx, %edi -; X86-NEXT: xorl %eax, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %esi, %eax -; X86-NEXT: orl %edi, %eax +; X86-NEXT: subl %edi, %ebp +; X86-NEXT: sbbl %edi, %ebx +; X86-NEXT: sbbl %edi, %edx +; X86-NEXT: sbbl %edi, %esi +; X86-NEXT: xorl %eax, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: orl %esi, %eax ; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: orl %ebx, %ecx +; X86-NEXT: orl %edx, %ecx +; X86-NEXT: movl %edx, %edi ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: sete %cl ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: orl (%esp), %edx # 4-byte Folded Reload +; X86-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: orl %eax, %edx ; X86-NEXT: sete %al ; X86-NEXT: orb %cl, %al ; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X86-NEXT: bsrl %edi, %edx +; X86-NEXT: bsrl %esi, %edx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: bsrl %ebx, %ecx +; X86-NEXT: bsrl %edi, %ecx ; X86-NEXT: xorl $31, %ecx ; X86-NEXT: addl $32, %ecx -; X86-NEXT: testl %edi, %edi +; X86-NEXT: testl %esi, %esi ; X86-NEXT: cmovnel %edx, %ecx -; X86-NEXT: bsrl %esi, %edx +; X86-NEXT: bsrl %ebx, %edx ; X86-NEXT: xorl $31, %edx ; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: bsrl %ebp, %ebp ; X86-NEXT: xorl $31, %ebp ; X86-NEXT: addl $32, %ebp -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: testl %esi, %esi +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: testl %ebx, %ebx ; X86-NEXT: cmovnel %edx, %ebp ; X86-NEXT: addl $64, %ebp -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %edi, %ebx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %esi, %edi ; X86-NEXT: cmovnel %ecx, %ebp ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-NEXT: bsrl %edi, %edx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: bsrl %eax, %ecx ; X86-NEXT: xorl $31, %ecx ; X86-NEXT: addl $32, %ecx @@ -263,7 +264,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: bsrl %ebx, %esi ; X86-NEXT: xorl $31, %esi -; X86-NEXT: bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: bsrl (%esp), %edx # 4-byte Folded Reload ; X86-NEXT: xorl $31, %edx ; X86-NEXT: addl $32, %edx ; X86-NEXT: testl %ebx, %ebx @@ -272,53 +273,50 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl %eax, %esi ; X86-NEXT: orl %edi, %esi ; X86-NEXT: cmovnel %ecx, %edx -; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: xorl %esi, %esi ; X86-NEXT: subl %edx, %ebp -; X86-NEXT: movl $0, %eax -; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: movl $0, %ebx +; X86-NEXT: sbbl %ebx, %ebx ; X86-NEXT: movl $0, %edx ; X86-NEXT: sbbl %edx, %edx -; X86-NEXT: movl $0, %esi -; X86-NEXT: sbbl %esi, %esi +; X86-NEXT: movl $0, %eax +; X86-NEXT: sbbl %eax, %eax ; X86-NEXT: movl $127, %ecx ; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: cmpl %ebp, %ecx -; X86-NEXT: movl %esi, %ebp ; X86-NEXT: movl $0, %ecx -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %eax, %ecx +; X86-NEXT: sbbl %ebx, %ecx ; X86-NEXT: movl $0, %ecx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl %edx, %ecx ; X86-NEXT: movl $0, %ecx -; X86-NEXT: sbbl %esi, %ecx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %eax, %ecx ; X86-NEXT: setb %cl ; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload -; X86-NEXT: cmovnel %ebx, %edi -; X86-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NEXT: cmovnel %ebx, %edx +; X86-NEXT: cmovnel %esi, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: cmovnel %esi, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovnel %ebx, %eax -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: jne .LBB4_1 -; X86-NEXT: # %bb.8: # %_udiv-special-cases -; X86-NEXT: movl %ebp, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-NEXT: xorl $127, %ebp -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %ebx, %ecx -; X86-NEXT: orl %ebp, %ecx +; X86-NEXT: cmovnel %esi, %eax +; X86-NEXT: cmovel (%esp), %esi # 4-byte Folded Reload +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: jne .LBB4_8 +; X86-NEXT: # %bb.1: # %_udiv-special-cases +; X86-NEXT: movl %ebx, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: je .LBB4_9 -; X86-NEXT: # %bb.5: # %udiv-bb1 -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: xorl $127, %ebx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: orl %ebx, %ecx +; X86-NEXT: je .LBB4_8 +; X86-NEXT: # %bb.2: # %udiv-bb1 +; X86-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) @@ -326,57 +324,49 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: movl %ecx, %ebp +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: xorb $127, %al ; X86-NEXT: movb %al, %ch ; X86-NEXT: andb $7, %ch ; X86-NEXT: shrb $3, %al ; X86-NEXT: andb $15, %al ; X86-NEXT: negb %al -; X86-NEXT: movsbl %al, %edi -; X86-NEXT: movl 144(%esp,%edi), %edx -; X86-NEXT: movl 148(%esp,%edi), %esi +; X86-NEXT: movsbl %al, %ebx +; X86-NEXT: movl 144(%esp,%ebx), %edx +; X86-NEXT: movl 148(%esp,%ebx), %edi ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shldl %cl, %edx, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %edi ; X86-NEXT: shll %cl, %edx ; X86-NEXT: notb %cl -; X86-NEXT: movl 140(%esp,%edi), %eax +; X86-NEXT: movl 140(%esp,%ebx), %eax ; X86-NEXT: movl %eax, %esi ; X86-NEXT: shrl %esi ; X86-NEXT: shrl %cl, %esi ; X86-NEXT: orl %edx, %esi ; X86-NEXT: movl %esi, %edx -; X86-NEXT: movl 136(%esp,%edi), %esi +; X86-NEXT: movl 136(%esp,%ebx), %esi ; X86-NEXT: movb %ch, %cl ; X86-NEXT: shldl %cl, %esi, %eax ; X86-NEXT: shll %cl, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl $1, %ebp -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: adcl $0, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: adcl $0, %esi -; X86-NEXT: jae .LBB4_2 +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: jae .LBB4_3 ; X86-NEXT: # %bb.6: -; X86-NEXT: xorl %ebp, %ebp -; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: xorl %esi, %esi ; X86-NEXT: jmp .LBB4_7 -; X86-NEXT: .LBB4_1: -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: jmp .LBB4_9 -; X86-NEXT: .LBB4_2: # %udiv-preheader -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: .LBB4_3: # %udiv-preheader +; X86-NEXT: movl (%esp), %esi # 4-byte Reload ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT: movl (%esp), %esi # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) @@ -384,37 +374,36 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: movb %bl, %ch ; X86-NEXT: andb $7, %ch ; X86-NEXT: movb %bl, %cl ; X86-NEXT: shrb $3, %cl ; X86-NEXT: andb $15, %cl -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movzbl %cl, %ebx -; X86-NEXT: movl 100(%esp,%ebx), %esi -; X86-NEXT: movl %esi, (%esp) # 4-byte Spill -; X86-NEXT: movl 96(%esp,%ebx), %edi +; X86-NEXT: movzbl %cl, %ebp +; X86-NEXT: movl 100(%esp,%ebp), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 96(%esp,%ebp), %ebx +; X86-NEXT: movl %ebp, %eax ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, %ebp +; X86-NEXT: movl %ebx, %edx ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shrdl %cl, %esi, %ebp -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movl 88(%esp,%ebx), %esi -; X86-NEXT: movl 92(%esp,%ebx), %ebx -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: shrdl %cl, %esi, %edx +; X86-NEXT: movl 88(%esp,%ebp), %ebp +; X86-NEXT: movl 92(%esp,%eax), %esi +; X86-NEXT: movl %esi, %eax ; X86-NEXT: shrl %cl, %eax ; X86-NEXT: notb %cl -; X86-NEXT: addl %edi, %edi -; X86-NEXT: shll %cl, %edi -; X86-NEXT: orl %eax, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl %ebx, %ebx +; X86-NEXT: shll %cl, %ebx +; X86-NEXT: orl %eax, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shrl %cl, (%esp) # 4-byte Folded Spill -; X86-NEXT: shrdl %cl, %ebx, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shrl %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: shrdl %cl, %esi, %ebp ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: addl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -424,115 +413,117 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: adcl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: xorl %esi, %esi ; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB4_3: # %udiv-do-while +; X86-NEXT: .LBB4_4: # %udiv-do-while ; X86-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebp, %edx -; X86-NEXT: shldl $1, %ebp, (%esp) # 4-byte Folded Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-NEXT: shldl $1, %ebp, %edx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: shldl $1, %ebx, %ebp -; X86-NEXT: shldl $1, %esi, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: shldl $1, %ecx, %esi +; X86-NEXT: shldl $1, %edx, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: shldl $1, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: shldl $1, %ebp, %edx +; X86-NEXT: shldl $1, %edi, %ebp ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: orl %eax, %esi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: shldl $1, %edi, %ecx -; X86-NEXT: orl %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl $1, %eax, %edi +; X86-NEXT: orl %esi, %edi +; X86-NEXT: movl %edi, (%esp) # 4-byte Spill +; X86-NEXT: movl %ecx, %edi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: shldl $1, %ecx, %edi -; X86-NEXT: orl %eax, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: shldl $1, %ecx, %eax +; X86-NEXT: orl %esi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl $1, %eax, %ecx +; X86-NEXT: orl %esi, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmpl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: sbbl %ebp, %ecx +; X86-NEXT: addl %eax, %eax +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: cmpl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: sbbl %edx, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: sbbl (%esp), %ecx # 4-byte Folded Reload +; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: sbbl %ebx, %ecx ; X86-NEXT: sarl $31, %ecx ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: andl $1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: andl %edi, %esi ; X86-NEXT: movl %ecx, %edi ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: subl %ecx, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %eax, %ebp -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %edi, %edx -; X86-NEXT: movl %edx, %ebp +; X86-NEXT: subl %ecx, %ebp +; X86-NEXT: sbbl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: sbbl %eax, (%esp) # 4-byte Folded Spill +; X86-NEXT: sbbl %edi, %edx +; X86-NEXT: movl (%esp), %edi # 4-byte Reload +; X86-NEXT: sbbl %esi, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: addl $-1, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $-1, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: adcl $-1, %edi -; X86-NEXT: adcl $-1, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: adcl $-1, %esi +; X86-NEXT: adcl $-1, %ebx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %edx, %eax +; X86-NEXT: orl %ebx, %eax ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %edi, %ecx -; X86-NEXT: orl %eax, %ecx -; X86-NEXT: jne .LBB4_3 -; X86-NEXT: # %bb.4: ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: orl %esi, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: jne .LBB4_4 +; X86-NEXT: # %bb.5: ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: .LBB4_7: # %udiv-loop-exit -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-NEXT: shldl $1, %edx, %edi -; X86-NEXT: orl %ecx, %edi +; X86-NEXT: orl %esi, %edi ; X86-NEXT: shldl $1, %eax, %edx -; X86-NEXT: orl %ecx, %edx +; X86-NEXT: orl %esi, %edx +; X86-NEXT: movl %esi, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: shldl $1, %esi, %eax ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: addl %esi, %esi -; X86-NEXT: orl %ebp, %esi -; X86-NEXT: .LBB4_9: # %udiv-end -; X86-NEXT: xorl %ebx, %edi -; X86-NEXT: xorl %ebx, %edx -; X86-NEXT: xorl %ebx, %eax -; X86-NEXT: xorl %ebx, %esi -; X86-NEXT: subl %ebx, %esi +; X86-NEXT: orl %ebx, %esi +; X86-NEXT: .LBB4_8: # %udiv-end +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: xorl %ecx, %edi +; X86-NEXT: xorl %ecx, %edx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: xorl %ecx, %esi +; X86-NEXT: subl %ecx, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %ebx, %eax +; X86-NEXT: sbbl %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %ebx, %edx -; X86-NEXT: sbbl %ebx, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %esi, (%ecx) -; X86-NEXT: movl %eax, 4(%ecx) -; X86-NEXT: movl %edx, 8(%ecx) -; X86-NEXT: movl %edi, 12(%ecx) +; X86-NEXT: sbbl %ecx, %edx +; X86-NEXT: sbbl %ecx, %edi +; X86-NEXT: movl %edi, (%esp) # 4-byte Spill +; X86-NEXT: movl %esi, (%ebp) +; X86-NEXT: movl %eax, 4(%ebp) +; X86-NEXT: movl %edx, 8(%ebp) +; X86-NEXT: movl %edi, 12(%ebp) ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl %edx, %ebx @@ -541,7 +532,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %esi, %eax ; X86-NEXT: mull %edi -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %edx, %edi ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X86-NEXT: adcl $0, %ecx @@ -562,10 +553,10 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: adcl %eax, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl (%esp), %ecx # 4-byte Reload ; X86-NEXT: imull %eax, %ecx ; X86-NEXT: mull %ebx -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: imull {{[0-9]+}}(%esp), %ebx ; X86-NEXT: addl %edx, %ebx ; X86-NEXT: addl %ecx, %ebx @@ -577,12 +568,12 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: mull %edx ; X86-NEXT: addl %edx, %ebp ; X86-NEXT: addl %ecx, %ebp -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload ; X86-NEXT: adcl %ebx, %ebp ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: subl (%esp), %edx # 4-byte Folded Reload +; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll index ebb95f16a723c..ae57d31167ba6 100644 --- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll +++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll @@ -304,7 +304,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl 128(%esp,%eax), %esi ; X86-NEXT: movb %ch, %cl ; X86-NEXT: shldl %cl, %edx, %esi -; X86-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: shll %cl, %edx ; X86-NEXT: notb %cl ; X86-NEXT: movl 120(%esp,%eax), %ebp @@ -319,10 +319,10 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: shll %cl, %ebp ; X86-NEXT: addl $1, %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: adcl $0, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $0, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: adcl $0, %edi ; X86-NEXT: adcl $0, %ebx ; X86-NEXT: jae .LBB4_3 ; X86-NEXT: # %bb.6: @@ -331,14 +331,14 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl %edx, %ebx ; X86-NEXT: jmp .LBB4_7 ; X86-NEXT: .LBB4_3: # %udiv-preheader -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -348,22 +348,23 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movb %al, %ch ; X86-NEXT: andb $7, %ch -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: shrb $3, %al ; X86-NEXT: andb $15, %al ; X86-NEXT: movzbl %al, %eax -; X86-NEXT: movl 80(%esp,%eax), %ebp +; X86-NEXT: movl 80(%esp,%eax), %edi +; X86-NEXT: movl %edi, (%esp) # 4-byte Spill ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl 76(%esp,%eax), %edi ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %edi, %ebx ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shrdl %cl, %ebp, %ebx +; X86-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-NEXT: shrdl %cl, %edx, %ebx ; X86-NEXT: movl 68(%esp,%eax), %esi -; X86-NEXT: movl 72(%esp,%eax), %edx -; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl 72(%esp,%eax), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: shrl %cl, %eax ; X86-NEXT: notb %cl ; X86-NEXT: addl %edi, %edi @@ -371,8 +372,10 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: orl %eax, %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shrl %cl, %ebp -; X86-NEXT: shrdl %cl, %edx, %esi +; X86-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-NEXT: shrl %cl, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shrdl %cl, %eax, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: addl $-1, %eax @@ -383,19 +386,20 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: adcl $-1, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: adcl $-1, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: adcl $-1, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl (%esp), %esi # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: .p2align 4, 0x90 ; X86-NEXT: .LBB4_4: # %udiv-do-while ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X86-NEXT: shldl $1, %ebx, %ebp +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl $1, %ebx, %edx +; X86-NEXT: movl %edx, (%esp) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: shldl $1, %ebx, (%esp) # 4-byte Folded Spill +; X86-NEXT: shldl $1, %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: shldl $1, %edx, %ebx ; X86-NEXT: shldl $1, %esi, %edx @@ -407,27 +411,25 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: shldl $1, %ecx, %eax ; X86-NEXT: orl %edi, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: shldl $1, %eax, %ecx +; X86-NEXT: shldl $1, %ebp, %ecx ; X86-NEXT: orl %edi, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %eax, %eax -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl %ebp, %ebp +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: cmpl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: sbbl %ebx, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: sbbl (%esp), %ecx # 4-byte Folded Reload +; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: sbbl %ebp, %ecx +; X86-NEXT: sbbl (%esp), %ecx # 4-byte Folded Reload ; X86-NEXT: sarl $31, %ecx ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: andl $1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, %ebp +; X86-NEXT: andl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl %ecx, %edi ; X86-NEXT: andl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %ecx, %eax @@ -437,36 +439,35 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl %eax, %ebx ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl (%esp), %ebx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: sbbl %edi, %ebx -; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X86-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: sbbl %ebp, (%esp) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: addl $-1, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $-1, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: adcl $-1, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-NEXT: adcl $-1, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: adcl $-1, %edx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %edi, %eax -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %edx, %ecx -; X86-NEXT: movl (%esp), %ebp # 4-byte Reload +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %edi, %ecx ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: jne .LBB4_4 ; X86-NEXT: # %bb.5: -; X86-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: .LBB4_7: # %udiv-loop-exit -; X86-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: shldl $1, %esi, %edx ; X86-NEXT: orl %eax, %edx ; X86-NEXT: shldl $1, %ebx, %esi diff --git a/llvm/test/CodeGen/X86/fast-isel-freeze.ll b/llvm/test/CodeGen/X86/fast-isel-freeze.ll index 8308a28e00a1d..031bccb018772 100644 --- a/llvm/test/CodeGen/X86/fast-isel-freeze.ll +++ b/llvm/test/CodeGen/X86/fast-isel-freeze.ll @@ -11,8 +11,8 @@ define i32 @freeze(i32 %t) { ; ; FAST-LABEL: freeze: ; FAST: # %bb.0: -; FAST-NEXT: movl $10, %eax -; FAST-NEXT: xorl %edi, %eax +; FAST-NEXT: movl %edi, %eax +; FAST-NEXT: xorl $10, %eax ; FAST-NEXT: retq %1 = freeze i32 %t %2 = freeze i32 10 diff --git a/llvm/test/CodeGen/X86/foldimmediate-size.ll b/llvm/test/CodeGen/X86/foldimmediate-size.ll new file mode 100644 index 0000000000000..8d4c0a462d02d --- /dev/null +++ b/llvm/test/CodeGen/X86/foldimmediate-size.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s + +; When optimize for size, the constant $858993459 is moved into a register, +; and use that register in following two andl instructions. + +define i32 @cnt32_optsize(i32 %x) nounwind readnone optsize { +; CHECK-LABEL: cnt32_optsize: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl %eax +; CHECK-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; CHECK-NEXT: subl %eax, %edi +; CHECK-NEXT: movl $858993459, %eax # imm = 0x33333333 +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: andl %eax, %ecx +; CHECK-NEXT: shrl $2, %edi +; CHECK-NEXT: andl %eax, %edi +; CHECK-NEXT: addl %ecx, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $4, %eax +; CHECK-NEXT: addl %edi, %eax +; CHECK-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; CHECK-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 +; CHECK-NEXT: shrl $24, %eax +; CHECK-NEXT: retq + %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) + ret i32 %cnt +} + +; When optimize for speed, the constant $858993459 can be directly folded into +; two andl instructions. + +define i32 @cnt32_optspeed(i32 %x) nounwind readnone { +; CHECK-LABEL: cnt32_optspeed: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl %eax +; CHECK-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; CHECK-NEXT: subl %eax, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $858993459, %eax # imm = 0x33333333 +; CHECK-NEXT: shrl $2, %edi +; CHECK-NEXT: andl $858993459, %edi # imm = 0x33333333 +; CHECK-NEXT: addl %eax, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $4, %eax +; CHECK-NEXT: addl %edi, %eax +; CHECK-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; CHECK-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 +; CHECK-NEXT: shrl $24, %eax +; CHECK-NEXT: retq + %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) + ret i32 %cnt +} + +declare i32 @llvm.ctpop.i32(i32) nounwind readnone diff --git a/llvm/test/CodeGen/X86/foldimmediate.mir b/llvm/test/CodeGen/X86/foldimmediate.mir new file mode 100644 index 0000000000000..5fd5ae9c1ca9f --- /dev/null +++ b/llvm/test/CodeGen/X86/foldimmediate.mir @@ -0,0 +1,143 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -mtriple=x86_64-- -run-pass=peephole-opt %s -o - | FileCheck %s +--- | + define void @foldImmediate() { ret void } +... +--- +# Check that immediates can be folded into ALU instructions. +name: foldImmediate +registers: + - { id: 0, class: gr32 } + - { id: 1, class: gr32 } + - { id: 2, class: gr32 } + - { id: 3, class: gr32 } + - { id: 4, class: gr32 } + - { id: 5, class: gr32 } + - { id: 6, class: gr32 } + - { id: 7, class: gr64 } + - { id: 8, class: gr64 } + - { id: 9, class: gr64 } + - { id: 10, class: gr64 } + - { id: 11, class: gr64 } + - { id: 12, class: gr64 } + - { id: 13, class: gr64 } + - { id: 14, class: gr64 } + - { id: 15, class: gr64 } + - { id: 16, class: gr32 } + - { id: 17, class: gr64 } + - { id: 18, class: gr32 } + +body: | + bb.0: + liveins: $rdi, $rsi + + ; CHECK-LABEL: name: foldImmediate + ; CHECK: liveins: $rdi, $rsi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV32ri:%[0-9]+]]:gr32 = MOV32ri 81 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi + ; CHECK-NEXT: [[ADD32ri:%[0-9]+]]:gr32 = ADD32ri [[COPY]], 81, implicit-def $eflags + ; CHECK-NEXT: NOOP implicit [[ADD32ri]] + ; CHECK-NEXT: [[SUB32ri:%[0-9]+]]:gr32 = SUB32ri [[COPY]], 81, implicit-def $eflags + ; CHECK-NEXT: NOOP implicit [[SUB32ri]] + ; CHECK-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[COPY]], 81, implicit-def $eflags + ; CHECK-NEXT: NOOP implicit [[AND32ri]] + ; CHECK-NEXT: [[OR32ri:%[0-9]+]]:gr32 = OR32ri [[COPY]], 81, implicit-def $eflags + ; CHECK-NEXT: NOOP implicit [[OR32ri]] + ; CHECK-NEXT: [[XOR32ri:%[0-9]+]]:gr32 = XOR32ri [[COPY]], 81, implicit-def $eflags + ; CHECK-NEXT: NOOP implicit [[XOR32ri]] + ; CHECK-NEXT: TEST32ri [[COPY]], 81, implicit-def $eflags + ; CHECK-NEXT: NOOP implicit $eflags + ; CHECK-NEXT: CMP32ri [[COPY]], 81, implicit-def $eflags + ; CHECK-NEXT: NOOP implicit $eflags + ; CHECK-NEXT: [[ADC32ri:%[0-9]+]]:gr32 = ADC32ri [[COPY]], 81, implicit-def $eflags, implicit $eflags + ; CHECK-NEXT: NOOP implicit [[ADC32ri]] + ; CHECK-NEXT: [[SBB32ri:%[0-9]+]]:gr32 = SBB32ri [[COPY]], 81, implicit-def $eflags, implicit $eflags + ; CHECK-NEXT: NOOP implicit [[SBB32ri]] + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, killed [[MOV32ri]], %subreg.sub_32bit + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: [[ADD64ri32_:%[0-9]+]]:gr64 = ADD64ri32 [[COPY1]], 81, implicit-def $eflags + ; CHECK-NEXT: NOOP implicit [[ADD64ri32_]] + ; CHECK-NEXT: [[SUB64ri32_:%[0-9]+]]:gr64 = SUB64ri32 [[COPY1]], 81, implicit-def $eflags + ; CHECK-NEXT: NOOP implicit [[SUB64ri32_]] + ; CHECK-NEXT: [[AND64ri32_:%[0-9]+]]:gr64 = AND64ri32 [[COPY1]], 81, implicit-def $eflags + ; CHECK-NEXT: NOOP implicit [[AND64ri32_]] + ; CHECK-NEXT: [[OR64ri32_:%[0-9]+]]:gr64 = OR64ri32 [[COPY1]], 81, implicit-def $eflags + ; CHECK-NEXT: NOOP implicit [[OR64ri32_]] + ; CHECK-NEXT: [[XOR64ri32_:%[0-9]+]]:gr64 = XOR64ri32 [[COPY1]], 81, implicit-def $eflags + ; CHECK-NEXT: NOOP implicit [[XOR64ri32_]] + ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64 = MOV32ri64 81 + ; CHECK-NEXT: NOOP implicit [[MOV32ri64_]] + ; CHECK-NEXT: TEST64ri32 [[COPY1]], 81, implicit-def $eflags + ; CHECK-NEXT: NOOP implicit $eflags + ; CHECK-NEXT: [[ADC64ri32_:%[0-9]+]]:gr64 = ADC64ri32 [[COPY1]], 81, implicit-def $eflags, implicit $eflags + ; CHECK-NEXT: NOOP implicit [[ADC64ri32_]] + ; CHECK-NEXT: [[SBB64ri32_:%[0-9]+]]:gr64 = SBB64ri32 [[COPY1]], 81, implicit-def $eflags, implicit $eflags + ; CHECK-NEXT: NOOP implicit [[SBB64ri32_]] + ; CHECK-NEXT: CMP64ri32 [[COPY1]], 81, implicit-def $eflags + ; CHECK-NEXT: NOOP implicit $eflags + ; CHECK-NEXT: CMP64rr [[SUBREG_TO_REG]], [[COPY1]], implicit-def $eflags + ; CHECK-NEXT: NOOP implicit $eflags + %0 = MOV32ri 81 + %1 = COPY $edi + %2 = ADD32rr %0, %1, implicit-def $eflags + NOOP implicit %2 + + %3 = SUB32rr %1, %0, implicit-def $eflags + NOOP implicit %3 + + %4 = AND32rr %0, %1, implicit-def $eflags + NOOP implicit %4 + + %5 = OR32rr %0, %1, implicit-def $eflags + NOOP implicit %5 + + %6 = XOR32rr %0, %1, implicit-def $eflags + NOOP implicit %6 + + TEST32rr %0, %1, implicit-def $eflags + NOOP implicit $eflags + + CMP32rr %1, %0, implicit-def $eflags + NOOP implicit $eflags + + %16 = ADC32rr %0, %1, implicit-def $eflags, implicit $eflags + NOOP implicit %16 + + %18 = SBB32rr %1, %0, implicit-def $eflags, implicit $eflags + NOOP implicit %18 + + %7 = SUBREG_TO_REG 0, killed %0:gr32, %subreg.sub_32bit + %8 = COPY $rsi + %9 = ADD64rr %7, %8, implicit-def $eflags + NOOP implicit %9 + + %10 = SUB64rr %8, %7, implicit-def $eflags + NOOP implicit %10 + + %11 = AND64rr %8, %7, implicit-def $eflags + NOOP implicit %11 + + %12 = OR64rr %8, %7, implicit-def $eflags + NOOP implicit %12 + + %13 = XOR64rr %8, %7, implicit-def $eflags + NOOP implicit %13 + + %14 = COPY %7 + NOOP implicit %14 + + TEST64rr %8, %7, implicit-def $eflags + NOOP implicit $eflags + + %15 = ADC64rr %8, %7, implicit-def $eflags, implicit $eflags + NOOP implicit %15 + + %17 = SBB64rr %8, %7, implicit-def $eflags, implicit $eflags + NOOP implicit %17 + + CMP64rr %8, %7, implicit-def $eflags + NOOP implicit $eflags + CMP64rr %7, %8, implicit-def $eflags + NOOP implicit $eflags +... diff --git a/llvm/test/CodeGen/X86/pcsections-atomics.ll b/llvm/test/CodeGen/X86/pcsections-atomics.ll index e6604c957f1fa..cfc9d50763af4 100644 --- a/llvm/test/CodeGen/X86/pcsections-atomics.ll +++ b/llvm/test/CodeGen/X86/pcsections-atomics.ll @@ -2148,14 +2148,17 @@ define void @atomic8_cas_monotonic(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movb $1, %cl -; O1-NEXT: movb $42, %al ; O1-NEXT: .Lpcsection65: -; O1-NEXT: lock cmpxchgb %cl, (%rdi) ; O1-NEXT: movb $42, %al ; O1-NEXT: .Lpcsection66: ; O1-NEXT: lock cmpxchgb %cl, (%rdi) -; O1-NEXT: movb $42, %al ; O1-NEXT: .Lpcsection67: +; O1-NEXT: movb $42, %al +; O1-NEXT: .Lpcsection68: +; O1-NEXT: lock cmpxchgb %cl, (%rdi) +; O1-NEXT: .Lpcsection69: +; O1-NEXT: movb $42, %al +; O1-NEXT: .Lpcsection70: ; O1-NEXT: lock cmpxchgb %cl, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -2164,14 +2167,17 @@ define void @atomic8_cas_monotonic(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movb $1, %cl -; O2-NEXT: movb $42, %al ; O2-NEXT: .Lpcsection65: -; O2-NEXT: lock cmpxchgb %cl, (%rdi) ; O2-NEXT: movb $42, %al ; O2-NEXT: .Lpcsection66: ; O2-NEXT: lock cmpxchgb %cl, (%rdi) -; O2-NEXT: movb $42, %al ; O2-NEXT: .Lpcsection67: +; O2-NEXT: movb $42, %al +; O2-NEXT: .Lpcsection68: +; O2-NEXT: lock cmpxchgb %cl, (%rdi) +; O2-NEXT: .Lpcsection69: +; O2-NEXT: movb $42, %al +; O2-NEXT: .Lpcsection70: ; O2-NEXT: lock cmpxchgb %cl, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -2180,14 +2186,17 @@ define void @atomic8_cas_monotonic(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movb $1, %cl -; O3-NEXT: movb $42, %al ; O3-NEXT: .Lpcsection65: -; O3-NEXT: lock cmpxchgb %cl, (%rdi) ; O3-NEXT: movb $42, %al ; O3-NEXT: .Lpcsection66: ; O3-NEXT: lock cmpxchgb %cl, (%rdi) -; O3-NEXT: movb $42, %al ; O3-NEXT: .Lpcsection67: +; O3-NEXT: movb $42, %al +; O3-NEXT: .Lpcsection68: +; O3-NEXT: lock cmpxchgb %cl, (%rdi) +; O3-NEXT: .Lpcsection69: +; O3-NEXT: movb $42, %al +; O3-NEXT: .Lpcsection70: ; O3-NEXT: lock cmpxchgb %cl, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -2226,14 +2235,17 @@ define void @atomic8_cas_acquire(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movb $1, %cl +; O1-NEXT: .Lpcsection71: ; O1-NEXT: movb $42, %al -; O1-NEXT: .Lpcsection68: +; O1-NEXT: .Lpcsection72: ; O1-NEXT: lock cmpxchgb %cl, (%rdi) +; O1-NEXT: .Lpcsection73: ; O1-NEXT: movb $42, %al -; O1-NEXT: .Lpcsection69: +; O1-NEXT: .Lpcsection74: ; O1-NEXT: lock cmpxchgb %cl, (%rdi) +; O1-NEXT: .Lpcsection75: ; O1-NEXT: movb $42, %al -; O1-NEXT: .Lpcsection70: +; O1-NEXT: .Lpcsection76: ; O1-NEXT: lock cmpxchgb %cl, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -2242,14 +2254,17 @@ define void @atomic8_cas_acquire(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movb $1, %cl +; O2-NEXT: .Lpcsection71: ; O2-NEXT: movb $42, %al -; O2-NEXT: .Lpcsection68: +; O2-NEXT: .Lpcsection72: ; O2-NEXT: lock cmpxchgb %cl, (%rdi) +; O2-NEXT: .Lpcsection73: ; O2-NEXT: movb $42, %al -; O2-NEXT: .Lpcsection69: +; O2-NEXT: .Lpcsection74: ; O2-NEXT: lock cmpxchgb %cl, (%rdi) +; O2-NEXT: .Lpcsection75: ; O2-NEXT: movb $42, %al -; O2-NEXT: .Lpcsection70: +; O2-NEXT: .Lpcsection76: ; O2-NEXT: lock cmpxchgb %cl, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -2258,14 +2273,17 @@ define void @atomic8_cas_acquire(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movb $1, %cl +; O3-NEXT: .Lpcsection71: ; O3-NEXT: movb $42, %al -; O3-NEXT: .Lpcsection68: +; O3-NEXT: .Lpcsection72: ; O3-NEXT: lock cmpxchgb %cl, (%rdi) +; O3-NEXT: .Lpcsection73: ; O3-NEXT: movb $42, %al -; O3-NEXT: .Lpcsection69: +; O3-NEXT: .Lpcsection74: ; O3-NEXT: lock cmpxchgb %cl, (%rdi) +; O3-NEXT: .Lpcsection75: ; O3-NEXT: movb $42, %al -; O3-NEXT: .Lpcsection70: +; O3-NEXT: .Lpcsection76: ; O3-NEXT: lock cmpxchgb %cl, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -2304,14 +2322,17 @@ define void @atomic8_cas_release(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movb $1, %cl +; O1-NEXT: .Lpcsection77: ; O1-NEXT: movb $42, %al -; O1-NEXT: .Lpcsection71: +; O1-NEXT: .Lpcsection78: ; O1-NEXT: lock cmpxchgb %cl, (%rdi) +; O1-NEXT: .Lpcsection79: ; O1-NEXT: movb $42, %al -; O1-NEXT: .Lpcsection72: +; O1-NEXT: .Lpcsection80: ; O1-NEXT: lock cmpxchgb %cl, (%rdi) +; O1-NEXT: .Lpcsection81: ; O1-NEXT: movb $42, %al -; O1-NEXT: .Lpcsection73: +; O1-NEXT: .Lpcsection82: ; O1-NEXT: lock cmpxchgb %cl, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -2320,14 +2341,17 @@ define void @atomic8_cas_release(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movb $1, %cl +; O2-NEXT: .Lpcsection77: ; O2-NEXT: movb $42, %al -; O2-NEXT: .Lpcsection71: +; O2-NEXT: .Lpcsection78: ; O2-NEXT: lock cmpxchgb %cl, (%rdi) +; O2-NEXT: .Lpcsection79: ; O2-NEXT: movb $42, %al -; O2-NEXT: .Lpcsection72: +; O2-NEXT: .Lpcsection80: ; O2-NEXT: lock cmpxchgb %cl, (%rdi) +; O2-NEXT: .Lpcsection81: ; O2-NEXT: movb $42, %al -; O2-NEXT: .Lpcsection73: +; O2-NEXT: .Lpcsection82: ; O2-NEXT: lock cmpxchgb %cl, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -2336,14 +2360,17 @@ define void @atomic8_cas_release(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movb $1, %cl +; O3-NEXT: .Lpcsection77: ; O3-NEXT: movb $42, %al -; O3-NEXT: .Lpcsection71: +; O3-NEXT: .Lpcsection78: ; O3-NEXT: lock cmpxchgb %cl, (%rdi) +; O3-NEXT: .Lpcsection79: ; O3-NEXT: movb $42, %al -; O3-NEXT: .Lpcsection72: +; O3-NEXT: .Lpcsection80: ; O3-NEXT: lock cmpxchgb %cl, (%rdi) +; O3-NEXT: .Lpcsection81: ; O3-NEXT: movb $42, %al -; O3-NEXT: .Lpcsection73: +; O3-NEXT: .Lpcsection82: ; O3-NEXT: lock cmpxchgb %cl, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -2382,14 +2409,17 @@ define void @atomic8_cas_acq_rel(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movb $1, %cl +; O1-NEXT: .Lpcsection83: ; O1-NEXT: movb $42, %al -; O1-NEXT: .Lpcsection74: +; O1-NEXT: .Lpcsection84: ; O1-NEXT: lock cmpxchgb %cl, (%rdi) +; O1-NEXT: .Lpcsection85: ; O1-NEXT: movb $42, %al -; O1-NEXT: .Lpcsection75: +; O1-NEXT: .Lpcsection86: ; O1-NEXT: lock cmpxchgb %cl, (%rdi) +; O1-NEXT: .Lpcsection87: ; O1-NEXT: movb $42, %al -; O1-NEXT: .Lpcsection76: +; O1-NEXT: .Lpcsection88: ; O1-NEXT: lock cmpxchgb %cl, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -2398,14 +2428,17 @@ define void @atomic8_cas_acq_rel(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movb $1, %cl +; O2-NEXT: .Lpcsection83: ; O2-NEXT: movb $42, %al -; O2-NEXT: .Lpcsection74: +; O2-NEXT: .Lpcsection84: ; O2-NEXT: lock cmpxchgb %cl, (%rdi) +; O2-NEXT: .Lpcsection85: ; O2-NEXT: movb $42, %al -; O2-NEXT: .Lpcsection75: +; O2-NEXT: .Lpcsection86: ; O2-NEXT: lock cmpxchgb %cl, (%rdi) +; O2-NEXT: .Lpcsection87: ; O2-NEXT: movb $42, %al -; O2-NEXT: .Lpcsection76: +; O2-NEXT: .Lpcsection88: ; O2-NEXT: lock cmpxchgb %cl, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -2414,14 +2447,17 @@ define void @atomic8_cas_acq_rel(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movb $1, %cl +; O3-NEXT: .Lpcsection83: ; O3-NEXT: movb $42, %al -; O3-NEXT: .Lpcsection74: +; O3-NEXT: .Lpcsection84: ; O3-NEXT: lock cmpxchgb %cl, (%rdi) +; O3-NEXT: .Lpcsection85: ; O3-NEXT: movb $42, %al -; O3-NEXT: .Lpcsection75: +; O3-NEXT: .Lpcsection86: ; O3-NEXT: lock cmpxchgb %cl, (%rdi) +; O3-NEXT: .Lpcsection87: ; O3-NEXT: movb $42, %al -; O3-NEXT: .Lpcsection76: +; O3-NEXT: .Lpcsection88: ; O3-NEXT: lock cmpxchgb %cl, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -2460,14 +2496,17 @@ define void @atomic8_cas_seq_cst(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movb $1, %cl +; O1-NEXT: .Lpcsection89: ; O1-NEXT: movb $42, %al -; O1-NEXT: .Lpcsection77: +; O1-NEXT: .Lpcsection90: ; O1-NEXT: lock cmpxchgb %cl, (%rdi) +; O1-NEXT: .Lpcsection91: ; O1-NEXT: movb $42, %al -; O1-NEXT: .Lpcsection78: +; O1-NEXT: .Lpcsection92: ; O1-NEXT: lock cmpxchgb %cl, (%rdi) +; O1-NEXT: .Lpcsection93: ; O1-NEXT: movb $42, %al -; O1-NEXT: .Lpcsection79: +; O1-NEXT: .Lpcsection94: ; O1-NEXT: lock cmpxchgb %cl, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -2476,14 +2515,17 @@ define void @atomic8_cas_seq_cst(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movb $1, %cl +; O2-NEXT: .Lpcsection89: ; O2-NEXT: movb $42, %al -; O2-NEXT: .Lpcsection77: +; O2-NEXT: .Lpcsection90: ; O2-NEXT: lock cmpxchgb %cl, (%rdi) +; O2-NEXT: .Lpcsection91: ; O2-NEXT: movb $42, %al -; O2-NEXT: .Lpcsection78: +; O2-NEXT: .Lpcsection92: ; O2-NEXT: lock cmpxchgb %cl, (%rdi) +; O2-NEXT: .Lpcsection93: ; O2-NEXT: movb $42, %al -; O2-NEXT: .Lpcsection79: +; O2-NEXT: .Lpcsection94: ; O2-NEXT: lock cmpxchgb %cl, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -2492,14 +2534,17 @@ define void @atomic8_cas_seq_cst(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movb $1, %cl +; O3-NEXT: .Lpcsection89: ; O3-NEXT: movb $42, %al -; O3-NEXT: .Lpcsection77: +; O3-NEXT: .Lpcsection90: ; O3-NEXT: lock cmpxchgb %cl, (%rdi) +; O3-NEXT: .Lpcsection91: ; O3-NEXT: movb $42, %al -; O3-NEXT: .Lpcsection78: +; O3-NEXT: .Lpcsection92: ; O3-NEXT: lock cmpxchgb %cl, (%rdi) +; O3-NEXT: .Lpcsection93: ; O3-NEXT: movb $42, %al -; O3-NEXT: .Lpcsection79: +; O3-NEXT: .Lpcsection94: ; O3-NEXT: lock cmpxchgb %cl, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -2524,7 +2569,7 @@ define i16 @atomic16_load_unordered(ptr %a) { ; O1-LABEL: atomic16_load_unordered: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection80: +; O1-NEXT: .Lpcsection95: ; O1-NEXT: movzwl (%rdi), %eax ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -2532,7 +2577,7 @@ define i16 @atomic16_load_unordered(ptr %a) { ; O2-LABEL: atomic16_load_unordered: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection80: +; O2-NEXT: .Lpcsection95: ; O2-NEXT: movzwl (%rdi), %eax ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -2540,7 +2585,7 @@ define i16 @atomic16_load_unordered(ptr %a) { ; O3-LABEL: atomic16_load_unordered: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection80: +; O3-NEXT: .Lpcsection95: ; O3-NEXT: movzwl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -2563,7 +2608,7 @@ define i16 @atomic16_load_monotonic(ptr %a) { ; O1-LABEL: atomic16_load_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection81: +; O1-NEXT: .Lpcsection96: ; O1-NEXT: movzwl (%rdi), %eax ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -2571,7 +2616,7 @@ define i16 @atomic16_load_monotonic(ptr %a) { ; O2-LABEL: atomic16_load_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection81: +; O2-NEXT: .Lpcsection96: ; O2-NEXT: movzwl (%rdi), %eax ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -2579,7 +2624,7 @@ define i16 @atomic16_load_monotonic(ptr %a) { ; O3-LABEL: atomic16_load_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection81: +; O3-NEXT: .Lpcsection96: ; O3-NEXT: movzwl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -2602,7 +2647,7 @@ define i16 @atomic16_load_acquire(ptr %a) { ; O1-LABEL: atomic16_load_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection82: +; O1-NEXT: .Lpcsection97: ; O1-NEXT: movzwl (%rdi), %eax ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -2610,7 +2655,7 @@ define i16 @atomic16_load_acquire(ptr %a) { ; O2-LABEL: atomic16_load_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection82: +; O2-NEXT: .Lpcsection97: ; O2-NEXT: movzwl (%rdi), %eax ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -2618,7 +2663,7 @@ define i16 @atomic16_load_acquire(ptr %a) { ; O3-LABEL: atomic16_load_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection82: +; O3-NEXT: .Lpcsection97: ; O3-NEXT: movzwl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -2641,7 +2686,7 @@ define i16 @atomic16_load_seq_cst(ptr %a) { ; O1-LABEL: atomic16_load_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection83: +; O1-NEXT: .Lpcsection98: ; O1-NEXT: movzwl (%rdi), %eax ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -2649,7 +2694,7 @@ define i16 @atomic16_load_seq_cst(ptr %a) { ; O2-LABEL: atomic16_load_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection83: +; O2-NEXT: .Lpcsection98: ; O2-NEXT: movzwl (%rdi), %eax ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -2657,7 +2702,7 @@ define i16 @atomic16_load_seq_cst(ptr %a) { ; O3-LABEL: atomic16_load_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection83: +; O3-NEXT: .Lpcsection98: ; O3-NEXT: movzwl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -2680,7 +2725,7 @@ define void @atomic16_store_unordered(ptr %a) { ; O1-LABEL: atomic16_store_unordered: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection84: +; O1-NEXT: .Lpcsection99: ; O1-NEXT: movw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -2688,7 +2733,7 @@ define void @atomic16_store_unordered(ptr %a) { ; O2-LABEL: atomic16_store_unordered: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection84: +; O2-NEXT: .Lpcsection99: ; O2-NEXT: movw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -2696,7 +2741,7 @@ define void @atomic16_store_unordered(ptr %a) { ; O3-LABEL: atomic16_store_unordered: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection84: +; O3-NEXT: .Lpcsection99: ; O3-NEXT: movw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -2719,7 +2764,7 @@ define void @atomic16_store_monotonic(ptr %a) { ; O1-LABEL: atomic16_store_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection85: +; O1-NEXT: .Lpcsection100: ; O1-NEXT: movw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -2727,7 +2772,7 @@ define void @atomic16_store_monotonic(ptr %a) { ; O2-LABEL: atomic16_store_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection85: +; O2-NEXT: .Lpcsection100: ; O2-NEXT: movw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -2735,7 +2780,7 @@ define void @atomic16_store_monotonic(ptr %a) { ; O3-LABEL: atomic16_store_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection85: +; O3-NEXT: .Lpcsection100: ; O3-NEXT: movw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -2758,7 +2803,7 @@ define void @atomic16_store_release(ptr %a) { ; O1-LABEL: atomic16_store_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection86: +; O1-NEXT: .Lpcsection101: ; O1-NEXT: movw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -2766,7 +2811,7 @@ define void @atomic16_store_release(ptr %a) { ; O2-LABEL: atomic16_store_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection86: +; O2-NEXT: .Lpcsection101: ; O2-NEXT: movw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -2774,7 +2819,7 @@ define void @atomic16_store_release(ptr %a) { ; O3-LABEL: atomic16_store_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection86: +; O3-NEXT: .Lpcsection101: ; O3-NEXT: movw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -2799,7 +2844,7 @@ define void @atomic16_store_seq_cst(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection87: +; O1-NEXT: .Lpcsection102: ; O1-NEXT: xchgw %ax, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -2808,7 +2853,7 @@ define void @atomic16_store_seq_cst(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection87: +; O2-NEXT: .Lpcsection102: ; O2-NEXT: xchgw %ax, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -2817,7 +2862,7 @@ define void @atomic16_store_seq_cst(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection87: +; O3-NEXT: .Lpcsection102: ; O3-NEXT: xchgw %ax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -2842,7 +2887,7 @@ define void @atomic16_xchg_monotonic(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection88: +; O1-NEXT: .Lpcsection103: ; O1-NEXT: xchgw %ax, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -2851,7 +2896,7 @@ define void @atomic16_xchg_monotonic(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection88: +; O2-NEXT: .Lpcsection103: ; O2-NEXT: xchgw %ax, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -2860,7 +2905,7 @@ define void @atomic16_xchg_monotonic(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection88: +; O3-NEXT: .Lpcsection103: ; O3-NEXT: xchgw %ax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -2883,7 +2928,7 @@ define void @atomic16_add_monotonic(ptr %a) { ; O1-LABEL: atomic16_add_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection89: +; O1-NEXT: .Lpcsection104: ; O1-NEXT: lock addw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -2891,7 +2936,7 @@ define void @atomic16_add_monotonic(ptr %a) { ; O2-LABEL: atomic16_add_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection89: +; O2-NEXT: .Lpcsection104: ; O2-NEXT: lock addw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -2899,7 +2944,7 @@ define void @atomic16_add_monotonic(ptr %a) { ; O3-LABEL: atomic16_add_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection89: +; O3-NEXT: .Lpcsection104: ; O3-NEXT: lock addw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -2922,7 +2967,7 @@ define void @atomic16_sub_monotonic(ptr %a) { ; O1-LABEL: atomic16_sub_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection90: +; O1-NEXT: .Lpcsection105: ; O1-NEXT: lock subw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -2930,7 +2975,7 @@ define void @atomic16_sub_monotonic(ptr %a) { ; O2-LABEL: atomic16_sub_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection90: +; O2-NEXT: .Lpcsection105: ; O2-NEXT: lock subw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -2938,7 +2983,7 @@ define void @atomic16_sub_monotonic(ptr %a) { ; O3-LABEL: atomic16_sub_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection90: +; O3-NEXT: .Lpcsection105: ; O3-NEXT: lock subw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -2961,7 +3006,7 @@ define void @atomic16_and_monotonic(ptr %a) { ; O1-LABEL: atomic16_and_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection91: +; O1-NEXT: .Lpcsection106: ; O1-NEXT: lock andw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -2969,7 +3014,7 @@ define void @atomic16_and_monotonic(ptr %a) { ; O2-LABEL: atomic16_and_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection91: +; O2-NEXT: .Lpcsection106: ; O2-NEXT: lock andw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -2977,7 +3022,7 @@ define void @atomic16_and_monotonic(ptr %a) { ; O3-LABEL: atomic16_and_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection91: +; O3-NEXT: .Lpcsection106: ; O3-NEXT: lock andw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -3000,7 +3045,7 @@ define void @atomic16_or_monotonic(ptr %a) { ; O1-LABEL: atomic16_or_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection92: +; O1-NEXT: .Lpcsection107: ; O1-NEXT: lock orw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -3008,7 +3053,7 @@ define void @atomic16_or_monotonic(ptr %a) { ; O2-LABEL: atomic16_or_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection92: +; O2-NEXT: .Lpcsection107: ; O2-NEXT: lock orw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -3016,7 +3061,7 @@ define void @atomic16_or_monotonic(ptr %a) { ; O3-LABEL: atomic16_or_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection92: +; O3-NEXT: .Lpcsection107: ; O3-NEXT: lock orw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -3039,7 +3084,7 @@ define void @atomic16_xor_monotonic(ptr %a) { ; O1-LABEL: atomic16_xor_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection93: +; O1-NEXT: .Lpcsection108: ; O1-NEXT: lock xorw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -3047,7 +3092,7 @@ define void @atomic16_xor_monotonic(ptr %a) { ; O2-LABEL: atomic16_xor_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection93: +; O2-NEXT: .Lpcsection108: ; O2-NEXT: lock xorw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -3055,7 +3100,7 @@ define void @atomic16_xor_monotonic(ptr %a) { ; O3-LABEL: atomic16_xor_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection93: +; O3-NEXT: .Lpcsection108: ; O3-NEXT: lock xorw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -3104,23 +3149,23 @@ define void @atomic16_nand_monotonic(ptr %a) { ; O1-LABEL: atomic16_nand_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection94: +; O1-NEXT: .Lpcsection109: ; O1-NEXT: movzwl (%rdi), %eax ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB64_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ecx -; O1-NEXT: .Lpcsection95: +; O1-NEXT: .Lpcsection110: ; O1-NEXT: notl %ecx -; O1-NEXT: .Lpcsection96: +; O1-NEXT: .Lpcsection111: ; O1-NEXT: orl $65493, %ecx # imm = 0xFFD5 -; O1-NEXT: .Lpcsection97: +; O1-NEXT: .Lpcsection112: ; O1-NEXT: # kill: def $ax killed $ax killed $eax -; O1-NEXT: .Lpcsection98: +; O1-NEXT: .Lpcsection113: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) -; O1-NEXT: .Lpcsection99: +; O1-NEXT: .Lpcsection114: ; O1-NEXT: # kill: def $ax killed $ax def $eax -; O1-NEXT: .Lpcsection100: +; O1-NEXT: .Lpcsection115: ; O1-NEXT: jne .LBB64_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -3129,23 +3174,23 @@ define void @atomic16_nand_monotonic(ptr %a) { ; O2-LABEL: atomic16_nand_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection94: +; O2-NEXT: .Lpcsection109: ; O2-NEXT: movzwl (%rdi), %eax ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB64_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ecx -; O2-NEXT: .Lpcsection95: +; O2-NEXT: .Lpcsection110: ; O2-NEXT: notl %ecx -; O2-NEXT: .Lpcsection96: +; O2-NEXT: .Lpcsection111: ; O2-NEXT: orl $65493, %ecx # imm = 0xFFD5 -; O2-NEXT: .Lpcsection97: +; O2-NEXT: .Lpcsection112: ; O2-NEXT: # kill: def $ax killed $ax killed $eax -; O2-NEXT: .Lpcsection98: +; O2-NEXT: .Lpcsection113: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) -; O2-NEXT: .Lpcsection99: +; O2-NEXT: .Lpcsection114: ; O2-NEXT: # kill: def $ax killed $ax def $eax -; O2-NEXT: .Lpcsection100: +; O2-NEXT: .Lpcsection115: ; O2-NEXT: jne .LBB64_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -3154,23 +3199,23 @@ define void @atomic16_nand_monotonic(ptr %a) { ; O3-LABEL: atomic16_nand_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection94: +; O3-NEXT: .Lpcsection109: ; O3-NEXT: movzwl (%rdi), %eax ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB64_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ecx -; O3-NEXT: .Lpcsection95: +; O3-NEXT: .Lpcsection110: ; O3-NEXT: notl %ecx -; O3-NEXT: .Lpcsection96: +; O3-NEXT: .Lpcsection111: ; O3-NEXT: orl $65493, %ecx # imm = 0xFFD5 -; O3-NEXT: .Lpcsection97: +; O3-NEXT: .Lpcsection112: ; O3-NEXT: # kill: def $ax killed $ax killed $eax -; O3-NEXT: .Lpcsection98: +; O3-NEXT: .Lpcsection113: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) -; O3-NEXT: .Lpcsection99: +; O3-NEXT: .Lpcsection114: ; O3-NEXT: # kill: def $ax killed $ax def $eax -; O3-NEXT: .Lpcsection100: +; O3-NEXT: .Lpcsection115: ; O3-NEXT: jne .LBB64_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -3196,7 +3241,7 @@ define void @atomic16_xchg_acquire(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection101: +; O1-NEXT: .Lpcsection116: ; O1-NEXT: xchgw %ax, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -3205,7 +3250,7 @@ define void @atomic16_xchg_acquire(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection101: +; O2-NEXT: .Lpcsection116: ; O2-NEXT: xchgw %ax, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -3214,7 +3259,7 @@ define void @atomic16_xchg_acquire(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection101: +; O3-NEXT: .Lpcsection116: ; O3-NEXT: xchgw %ax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -3237,7 +3282,7 @@ define void @atomic16_add_acquire(ptr %a) { ; O1-LABEL: atomic16_add_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection102: +; O1-NEXT: .Lpcsection117: ; O1-NEXT: lock addw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -3245,7 +3290,7 @@ define void @atomic16_add_acquire(ptr %a) { ; O2-LABEL: atomic16_add_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection102: +; O2-NEXT: .Lpcsection117: ; O2-NEXT: lock addw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -3253,7 +3298,7 @@ define void @atomic16_add_acquire(ptr %a) { ; O3-LABEL: atomic16_add_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection102: +; O3-NEXT: .Lpcsection117: ; O3-NEXT: lock addw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -3276,7 +3321,7 @@ define void @atomic16_sub_acquire(ptr %a) { ; O1-LABEL: atomic16_sub_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection103: +; O1-NEXT: .Lpcsection118: ; O1-NEXT: lock subw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -3284,7 +3329,7 @@ define void @atomic16_sub_acquire(ptr %a) { ; O2-LABEL: atomic16_sub_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection103: +; O2-NEXT: .Lpcsection118: ; O2-NEXT: lock subw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -3292,7 +3337,7 @@ define void @atomic16_sub_acquire(ptr %a) { ; O3-LABEL: atomic16_sub_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection103: +; O3-NEXT: .Lpcsection118: ; O3-NEXT: lock subw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -3315,7 +3360,7 @@ define void @atomic16_and_acquire(ptr %a) { ; O1-LABEL: atomic16_and_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection104: +; O1-NEXT: .Lpcsection119: ; O1-NEXT: lock andw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -3323,7 +3368,7 @@ define void @atomic16_and_acquire(ptr %a) { ; O2-LABEL: atomic16_and_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection104: +; O2-NEXT: .Lpcsection119: ; O2-NEXT: lock andw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -3331,7 +3376,7 @@ define void @atomic16_and_acquire(ptr %a) { ; O3-LABEL: atomic16_and_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection104: +; O3-NEXT: .Lpcsection119: ; O3-NEXT: lock andw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -3354,7 +3399,7 @@ define void @atomic16_or_acquire(ptr %a) { ; O1-LABEL: atomic16_or_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection105: +; O1-NEXT: .Lpcsection120: ; O1-NEXT: lock orw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -3362,7 +3407,7 @@ define void @atomic16_or_acquire(ptr %a) { ; O2-LABEL: atomic16_or_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection105: +; O2-NEXT: .Lpcsection120: ; O2-NEXT: lock orw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -3370,7 +3415,7 @@ define void @atomic16_or_acquire(ptr %a) { ; O3-LABEL: atomic16_or_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection105: +; O3-NEXT: .Lpcsection120: ; O3-NEXT: lock orw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -3393,7 +3438,7 @@ define void @atomic16_xor_acquire(ptr %a) { ; O1-LABEL: atomic16_xor_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection106: +; O1-NEXT: .Lpcsection121: ; O1-NEXT: lock xorw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -3401,7 +3446,7 @@ define void @atomic16_xor_acquire(ptr %a) { ; O2-LABEL: atomic16_xor_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection106: +; O2-NEXT: .Lpcsection121: ; O2-NEXT: lock xorw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -3409,7 +3454,7 @@ define void @atomic16_xor_acquire(ptr %a) { ; O3-LABEL: atomic16_xor_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection106: +; O3-NEXT: .Lpcsection121: ; O3-NEXT: lock xorw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -3458,23 +3503,23 @@ define void @atomic16_nand_acquire(ptr %a) { ; O1-LABEL: atomic16_nand_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection107: +; O1-NEXT: .Lpcsection122: ; O1-NEXT: movzwl (%rdi), %eax ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB71_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ecx -; O1-NEXT: .Lpcsection108: +; O1-NEXT: .Lpcsection123: ; O1-NEXT: notl %ecx -; O1-NEXT: .Lpcsection109: +; O1-NEXT: .Lpcsection124: ; O1-NEXT: orl $65493, %ecx # imm = 0xFFD5 -; O1-NEXT: .Lpcsection110: +; O1-NEXT: .Lpcsection125: ; O1-NEXT: # kill: def $ax killed $ax killed $eax -; O1-NEXT: .Lpcsection111: +; O1-NEXT: .Lpcsection126: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) -; O1-NEXT: .Lpcsection112: +; O1-NEXT: .Lpcsection127: ; O1-NEXT: # kill: def $ax killed $ax def $eax -; O1-NEXT: .Lpcsection113: +; O1-NEXT: .Lpcsection128: ; O1-NEXT: jne .LBB71_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -3483,23 +3528,23 @@ define void @atomic16_nand_acquire(ptr %a) { ; O2-LABEL: atomic16_nand_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection107: +; O2-NEXT: .Lpcsection122: ; O2-NEXT: movzwl (%rdi), %eax ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB71_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ecx -; O2-NEXT: .Lpcsection108: +; O2-NEXT: .Lpcsection123: ; O2-NEXT: notl %ecx -; O2-NEXT: .Lpcsection109: +; O2-NEXT: .Lpcsection124: ; O2-NEXT: orl $65493, %ecx # imm = 0xFFD5 -; O2-NEXT: .Lpcsection110: +; O2-NEXT: .Lpcsection125: ; O2-NEXT: # kill: def $ax killed $ax killed $eax -; O2-NEXT: .Lpcsection111: +; O2-NEXT: .Lpcsection126: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) -; O2-NEXT: .Lpcsection112: +; O2-NEXT: .Lpcsection127: ; O2-NEXT: # kill: def $ax killed $ax def $eax -; O2-NEXT: .Lpcsection113: +; O2-NEXT: .Lpcsection128: ; O2-NEXT: jne .LBB71_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -3508,23 +3553,23 @@ define void @atomic16_nand_acquire(ptr %a) { ; O3-LABEL: atomic16_nand_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection107: +; O3-NEXT: .Lpcsection122: ; O3-NEXT: movzwl (%rdi), %eax ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB71_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ecx -; O3-NEXT: .Lpcsection108: +; O3-NEXT: .Lpcsection123: ; O3-NEXT: notl %ecx -; O3-NEXT: .Lpcsection109: +; O3-NEXT: .Lpcsection124: ; O3-NEXT: orl $65493, %ecx # imm = 0xFFD5 -; O3-NEXT: .Lpcsection110: +; O3-NEXT: .Lpcsection125: ; O3-NEXT: # kill: def $ax killed $ax killed $eax -; O3-NEXT: .Lpcsection111: +; O3-NEXT: .Lpcsection126: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) -; O3-NEXT: .Lpcsection112: +; O3-NEXT: .Lpcsection127: ; O3-NEXT: # kill: def $ax killed $ax def $eax -; O3-NEXT: .Lpcsection113: +; O3-NEXT: .Lpcsection128: ; O3-NEXT: jne .LBB71_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -3550,7 +3595,7 @@ define void @atomic16_xchg_release(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection114: +; O1-NEXT: .Lpcsection129: ; O1-NEXT: xchgw %ax, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -3559,7 +3604,7 @@ define void @atomic16_xchg_release(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection114: +; O2-NEXT: .Lpcsection129: ; O2-NEXT: xchgw %ax, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -3568,7 +3613,7 @@ define void @atomic16_xchg_release(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection114: +; O3-NEXT: .Lpcsection129: ; O3-NEXT: xchgw %ax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -3591,7 +3636,7 @@ define void @atomic16_add_release(ptr %a) { ; O1-LABEL: atomic16_add_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection115: +; O1-NEXT: .Lpcsection130: ; O1-NEXT: lock addw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -3599,7 +3644,7 @@ define void @atomic16_add_release(ptr %a) { ; O2-LABEL: atomic16_add_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection115: +; O2-NEXT: .Lpcsection130: ; O2-NEXT: lock addw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -3607,7 +3652,7 @@ define void @atomic16_add_release(ptr %a) { ; O3-LABEL: atomic16_add_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection115: +; O3-NEXT: .Lpcsection130: ; O3-NEXT: lock addw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -3630,7 +3675,7 @@ define void @atomic16_sub_release(ptr %a) { ; O1-LABEL: atomic16_sub_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection116: +; O1-NEXT: .Lpcsection131: ; O1-NEXT: lock subw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -3638,7 +3683,7 @@ define void @atomic16_sub_release(ptr %a) { ; O2-LABEL: atomic16_sub_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection116: +; O2-NEXT: .Lpcsection131: ; O2-NEXT: lock subw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -3646,7 +3691,7 @@ define void @atomic16_sub_release(ptr %a) { ; O3-LABEL: atomic16_sub_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection116: +; O3-NEXT: .Lpcsection131: ; O3-NEXT: lock subw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -3669,7 +3714,7 @@ define void @atomic16_and_release(ptr %a) { ; O1-LABEL: atomic16_and_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection117: +; O1-NEXT: .Lpcsection132: ; O1-NEXT: lock andw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -3677,7 +3722,7 @@ define void @atomic16_and_release(ptr %a) { ; O2-LABEL: atomic16_and_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection117: +; O2-NEXT: .Lpcsection132: ; O2-NEXT: lock andw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -3685,7 +3730,7 @@ define void @atomic16_and_release(ptr %a) { ; O3-LABEL: atomic16_and_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection117: +; O3-NEXT: .Lpcsection132: ; O3-NEXT: lock andw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -3708,7 +3753,7 @@ define void @atomic16_or_release(ptr %a) { ; O1-LABEL: atomic16_or_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection118: +; O1-NEXT: .Lpcsection133: ; O1-NEXT: lock orw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -3716,7 +3761,7 @@ define void @atomic16_or_release(ptr %a) { ; O2-LABEL: atomic16_or_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection118: +; O2-NEXT: .Lpcsection133: ; O2-NEXT: lock orw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -3724,7 +3769,7 @@ define void @atomic16_or_release(ptr %a) { ; O3-LABEL: atomic16_or_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection118: +; O3-NEXT: .Lpcsection133: ; O3-NEXT: lock orw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -3747,7 +3792,7 @@ define void @atomic16_xor_release(ptr %a) { ; O1-LABEL: atomic16_xor_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection119: +; O1-NEXT: .Lpcsection134: ; O1-NEXT: lock xorw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -3755,7 +3800,7 @@ define void @atomic16_xor_release(ptr %a) { ; O2-LABEL: atomic16_xor_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection119: +; O2-NEXT: .Lpcsection134: ; O2-NEXT: lock xorw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -3763,7 +3808,7 @@ define void @atomic16_xor_release(ptr %a) { ; O3-LABEL: atomic16_xor_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection119: +; O3-NEXT: .Lpcsection134: ; O3-NEXT: lock xorw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -3812,23 +3857,23 @@ define void @atomic16_nand_release(ptr %a) { ; O1-LABEL: atomic16_nand_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection120: +; O1-NEXT: .Lpcsection135: ; O1-NEXT: movzwl (%rdi), %eax ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB78_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ecx -; O1-NEXT: .Lpcsection121: +; O1-NEXT: .Lpcsection136: ; O1-NEXT: notl %ecx -; O1-NEXT: .Lpcsection122: +; O1-NEXT: .Lpcsection137: ; O1-NEXT: orl $65493, %ecx # imm = 0xFFD5 -; O1-NEXT: .Lpcsection123: +; O1-NEXT: .Lpcsection138: ; O1-NEXT: # kill: def $ax killed $ax killed $eax -; O1-NEXT: .Lpcsection124: +; O1-NEXT: .Lpcsection139: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) -; O1-NEXT: .Lpcsection125: +; O1-NEXT: .Lpcsection140: ; O1-NEXT: # kill: def $ax killed $ax def $eax -; O1-NEXT: .Lpcsection126: +; O1-NEXT: .Lpcsection141: ; O1-NEXT: jne .LBB78_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -3837,23 +3882,23 @@ define void @atomic16_nand_release(ptr %a) { ; O2-LABEL: atomic16_nand_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection120: +; O2-NEXT: .Lpcsection135: ; O2-NEXT: movzwl (%rdi), %eax ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB78_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ecx -; O2-NEXT: .Lpcsection121: +; O2-NEXT: .Lpcsection136: ; O2-NEXT: notl %ecx -; O2-NEXT: .Lpcsection122: +; O2-NEXT: .Lpcsection137: ; O2-NEXT: orl $65493, %ecx # imm = 0xFFD5 -; O2-NEXT: .Lpcsection123: +; O2-NEXT: .Lpcsection138: ; O2-NEXT: # kill: def $ax killed $ax killed $eax -; O2-NEXT: .Lpcsection124: +; O2-NEXT: .Lpcsection139: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) -; O2-NEXT: .Lpcsection125: +; O2-NEXT: .Lpcsection140: ; O2-NEXT: # kill: def $ax killed $ax def $eax -; O2-NEXT: .Lpcsection126: +; O2-NEXT: .Lpcsection141: ; O2-NEXT: jne .LBB78_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -3862,23 +3907,23 @@ define void @atomic16_nand_release(ptr %a) { ; O3-LABEL: atomic16_nand_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection120: +; O3-NEXT: .Lpcsection135: ; O3-NEXT: movzwl (%rdi), %eax ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB78_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ecx -; O3-NEXT: .Lpcsection121: +; O3-NEXT: .Lpcsection136: ; O3-NEXT: notl %ecx -; O3-NEXT: .Lpcsection122: +; O3-NEXT: .Lpcsection137: ; O3-NEXT: orl $65493, %ecx # imm = 0xFFD5 -; O3-NEXT: .Lpcsection123: +; O3-NEXT: .Lpcsection138: ; O3-NEXT: # kill: def $ax killed $ax killed $eax -; O3-NEXT: .Lpcsection124: +; O3-NEXT: .Lpcsection139: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) -; O3-NEXT: .Lpcsection125: +; O3-NEXT: .Lpcsection140: ; O3-NEXT: # kill: def $ax killed $ax def $eax -; O3-NEXT: .Lpcsection126: +; O3-NEXT: .Lpcsection141: ; O3-NEXT: jne .LBB78_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -3904,7 +3949,7 @@ define void @atomic16_xchg_acq_rel(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection127: +; O1-NEXT: .Lpcsection142: ; O1-NEXT: xchgw %ax, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -3913,7 +3958,7 @@ define void @atomic16_xchg_acq_rel(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection127: +; O2-NEXT: .Lpcsection142: ; O2-NEXT: xchgw %ax, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -3922,7 +3967,7 @@ define void @atomic16_xchg_acq_rel(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection127: +; O3-NEXT: .Lpcsection142: ; O3-NEXT: xchgw %ax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -3945,7 +3990,7 @@ define void @atomic16_add_acq_rel(ptr %a) { ; O1-LABEL: atomic16_add_acq_rel: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection128: +; O1-NEXT: .Lpcsection143: ; O1-NEXT: lock addw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -3953,7 +3998,7 @@ define void @atomic16_add_acq_rel(ptr %a) { ; O2-LABEL: atomic16_add_acq_rel: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection128: +; O2-NEXT: .Lpcsection143: ; O2-NEXT: lock addw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -3961,7 +4006,7 @@ define void @atomic16_add_acq_rel(ptr %a) { ; O3-LABEL: atomic16_add_acq_rel: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection128: +; O3-NEXT: .Lpcsection143: ; O3-NEXT: lock addw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -3984,7 +4029,7 @@ define void @atomic16_sub_acq_rel(ptr %a) { ; O1-LABEL: atomic16_sub_acq_rel: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection129: +; O1-NEXT: .Lpcsection144: ; O1-NEXT: lock subw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -3992,7 +4037,7 @@ define void @atomic16_sub_acq_rel(ptr %a) { ; O2-LABEL: atomic16_sub_acq_rel: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection129: +; O2-NEXT: .Lpcsection144: ; O2-NEXT: lock subw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -4000,7 +4045,7 @@ define void @atomic16_sub_acq_rel(ptr %a) { ; O3-LABEL: atomic16_sub_acq_rel: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection129: +; O3-NEXT: .Lpcsection144: ; O3-NEXT: lock subw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -4023,7 +4068,7 @@ define void @atomic16_and_acq_rel(ptr %a) { ; O1-LABEL: atomic16_and_acq_rel: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection130: +; O1-NEXT: .Lpcsection145: ; O1-NEXT: lock andw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -4031,7 +4076,7 @@ define void @atomic16_and_acq_rel(ptr %a) { ; O2-LABEL: atomic16_and_acq_rel: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection130: +; O2-NEXT: .Lpcsection145: ; O2-NEXT: lock andw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -4039,7 +4084,7 @@ define void @atomic16_and_acq_rel(ptr %a) { ; O3-LABEL: atomic16_and_acq_rel: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection130: +; O3-NEXT: .Lpcsection145: ; O3-NEXT: lock andw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -4062,7 +4107,7 @@ define void @atomic16_or_acq_rel(ptr %a) { ; O1-LABEL: atomic16_or_acq_rel: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection131: +; O1-NEXT: .Lpcsection146: ; O1-NEXT: lock orw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -4070,7 +4115,7 @@ define void @atomic16_or_acq_rel(ptr %a) { ; O2-LABEL: atomic16_or_acq_rel: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection131: +; O2-NEXT: .Lpcsection146: ; O2-NEXT: lock orw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -4078,7 +4123,7 @@ define void @atomic16_or_acq_rel(ptr %a) { ; O3-LABEL: atomic16_or_acq_rel: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection131: +; O3-NEXT: .Lpcsection146: ; O3-NEXT: lock orw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -4101,7 +4146,7 @@ define void @atomic16_xor_acq_rel(ptr %a) { ; O1-LABEL: atomic16_xor_acq_rel: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection132: +; O1-NEXT: .Lpcsection147: ; O1-NEXT: lock xorw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -4109,7 +4154,7 @@ define void @atomic16_xor_acq_rel(ptr %a) { ; O2-LABEL: atomic16_xor_acq_rel: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection132: +; O2-NEXT: .Lpcsection147: ; O2-NEXT: lock xorw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -4117,7 +4162,7 @@ define void @atomic16_xor_acq_rel(ptr %a) { ; O3-LABEL: atomic16_xor_acq_rel: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection132: +; O3-NEXT: .Lpcsection147: ; O3-NEXT: lock xorw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -4166,23 +4211,23 @@ define void @atomic16_nand_acq_rel(ptr %a) { ; O1-LABEL: atomic16_nand_acq_rel: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection133: +; O1-NEXT: .Lpcsection148: ; O1-NEXT: movzwl (%rdi), %eax ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB85_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ecx -; O1-NEXT: .Lpcsection134: +; O1-NEXT: .Lpcsection149: ; O1-NEXT: notl %ecx -; O1-NEXT: .Lpcsection135: +; O1-NEXT: .Lpcsection150: ; O1-NEXT: orl $65493, %ecx # imm = 0xFFD5 -; O1-NEXT: .Lpcsection136: +; O1-NEXT: .Lpcsection151: ; O1-NEXT: # kill: def $ax killed $ax killed $eax -; O1-NEXT: .Lpcsection137: +; O1-NEXT: .Lpcsection152: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) -; O1-NEXT: .Lpcsection138: +; O1-NEXT: .Lpcsection153: ; O1-NEXT: # kill: def $ax killed $ax def $eax -; O1-NEXT: .Lpcsection139: +; O1-NEXT: .Lpcsection154: ; O1-NEXT: jne .LBB85_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -4191,23 +4236,23 @@ define void @atomic16_nand_acq_rel(ptr %a) { ; O2-LABEL: atomic16_nand_acq_rel: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection133: +; O2-NEXT: .Lpcsection148: ; O2-NEXT: movzwl (%rdi), %eax ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB85_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ecx -; O2-NEXT: .Lpcsection134: +; O2-NEXT: .Lpcsection149: ; O2-NEXT: notl %ecx -; O2-NEXT: .Lpcsection135: +; O2-NEXT: .Lpcsection150: ; O2-NEXT: orl $65493, %ecx # imm = 0xFFD5 -; O2-NEXT: .Lpcsection136: +; O2-NEXT: .Lpcsection151: ; O2-NEXT: # kill: def $ax killed $ax killed $eax -; O2-NEXT: .Lpcsection137: +; O2-NEXT: .Lpcsection152: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) -; O2-NEXT: .Lpcsection138: +; O2-NEXT: .Lpcsection153: ; O2-NEXT: # kill: def $ax killed $ax def $eax -; O2-NEXT: .Lpcsection139: +; O2-NEXT: .Lpcsection154: ; O2-NEXT: jne .LBB85_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -4216,23 +4261,23 @@ define void @atomic16_nand_acq_rel(ptr %a) { ; O3-LABEL: atomic16_nand_acq_rel: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection133: +; O3-NEXT: .Lpcsection148: ; O3-NEXT: movzwl (%rdi), %eax ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB85_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ecx -; O3-NEXT: .Lpcsection134: +; O3-NEXT: .Lpcsection149: ; O3-NEXT: notl %ecx -; O3-NEXT: .Lpcsection135: +; O3-NEXT: .Lpcsection150: ; O3-NEXT: orl $65493, %ecx # imm = 0xFFD5 -; O3-NEXT: .Lpcsection136: +; O3-NEXT: .Lpcsection151: ; O3-NEXT: # kill: def $ax killed $ax killed $eax -; O3-NEXT: .Lpcsection137: +; O3-NEXT: .Lpcsection152: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) -; O3-NEXT: .Lpcsection138: +; O3-NEXT: .Lpcsection153: ; O3-NEXT: # kill: def $ax killed $ax def $eax -; O3-NEXT: .Lpcsection139: +; O3-NEXT: .Lpcsection154: ; O3-NEXT: jne .LBB85_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -4258,7 +4303,7 @@ define void @atomic16_xchg_seq_cst(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection140: +; O1-NEXT: .Lpcsection155: ; O1-NEXT: xchgw %ax, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -4267,7 +4312,7 @@ define void @atomic16_xchg_seq_cst(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection140: +; O2-NEXT: .Lpcsection155: ; O2-NEXT: xchgw %ax, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -4276,7 +4321,7 @@ define void @atomic16_xchg_seq_cst(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection140: +; O3-NEXT: .Lpcsection155: ; O3-NEXT: xchgw %ax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -4299,7 +4344,7 @@ define void @atomic16_add_seq_cst(ptr %a) { ; O1-LABEL: atomic16_add_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection141: +; O1-NEXT: .Lpcsection156: ; O1-NEXT: lock addw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -4307,7 +4352,7 @@ define void @atomic16_add_seq_cst(ptr %a) { ; O2-LABEL: atomic16_add_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection141: +; O2-NEXT: .Lpcsection156: ; O2-NEXT: lock addw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -4315,7 +4360,7 @@ define void @atomic16_add_seq_cst(ptr %a) { ; O3-LABEL: atomic16_add_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection141: +; O3-NEXT: .Lpcsection156: ; O3-NEXT: lock addw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -4338,7 +4383,7 @@ define void @atomic16_sub_seq_cst(ptr %a) { ; O1-LABEL: atomic16_sub_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection142: +; O1-NEXT: .Lpcsection157: ; O1-NEXT: lock subw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -4346,7 +4391,7 @@ define void @atomic16_sub_seq_cst(ptr %a) { ; O2-LABEL: atomic16_sub_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection142: +; O2-NEXT: .Lpcsection157: ; O2-NEXT: lock subw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -4354,7 +4399,7 @@ define void @atomic16_sub_seq_cst(ptr %a) { ; O3-LABEL: atomic16_sub_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection142: +; O3-NEXT: .Lpcsection157: ; O3-NEXT: lock subw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -4377,7 +4422,7 @@ define void @atomic16_and_seq_cst(ptr %a) { ; O1-LABEL: atomic16_and_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection143: +; O1-NEXT: .Lpcsection158: ; O1-NEXT: lock andw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -4385,7 +4430,7 @@ define void @atomic16_and_seq_cst(ptr %a) { ; O2-LABEL: atomic16_and_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection143: +; O2-NEXT: .Lpcsection158: ; O2-NEXT: lock andw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -4393,7 +4438,7 @@ define void @atomic16_and_seq_cst(ptr %a) { ; O3-LABEL: atomic16_and_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection143: +; O3-NEXT: .Lpcsection158: ; O3-NEXT: lock andw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -4416,7 +4461,7 @@ define void @atomic16_or_seq_cst(ptr %a) { ; O1-LABEL: atomic16_or_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection144: +; O1-NEXT: .Lpcsection159: ; O1-NEXT: lock orw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -4424,7 +4469,7 @@ define void @atomic16_or_seq_cst(ptr %a) { ; O2-LABEL: atomic16_or_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection144: +; O2-NEXT: .Lpcsection159: ; O2-NEXT: lock orw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -4432,7 +4477,7 @@ define void @atomic16_or_seq_cst(ptr %a) { ; O3-LABEL: atomic16_or_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection144: +; O3-NEXT: .Lpcsection159: ; O3-NEXT: lock orw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -4455,7 +4500,7 @@ define void @atomic16_xor_seq_cst(ptr %a) { ; O1-LABEL: atomic16_xor_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection145: +; O1-NEXT: .Lpcsection160: ; O1-NEXT: lock xorw $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -4463,7 +4508,7 @@ define void @atomic16_xor_seq_cst(ptr %a) { ; O2-LABEL: atomic16_xor_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection145: +; O2-NEXT: .Lpcsection160: ; O2-NEXT: lock xorw $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -4471,7 +4516,7 @@ define void @atomic16_xor_seq_cst(ptr %a) { ; O3-LABEL: atomic16_xor_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection145: +; O3-NEXT: .Lpcsection160: ; O3-NEXT: lock xorw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -4520,23 +4565,23 @@ define void @atomic16_nand_seq_cst(ptr %a) { ; O1-LABEL: atomic16_nand_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection146: +; O1-NEXT: .Lpcsection161: ; O1-NEXT: movzwl (%rdi), %eax ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB92_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ecx -; O1-NEXT: .Lpcsection147: +; O1-NEXT: .Lpcsection162: ; O1-NEXT: notl %ecx -; O1-NEXT: .Lpcsection148: +; O1-NEXT: .Lpcsection163: ; O1-NEXT: orl $65493, %ecx # imm = 0xFFD5 -; O1-NEXT: .Lpcsection149: +; O1-NEXT: .Lpcsection164: ; O1-NEXT: # kill: def $ax killed $ax killed $eax -; O1-NEXT: .Lpcsection150: +; O1-NEXT: .Lpcsection165: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) -; O1-NEXT: .Lpcsection151: +; O1-NEXT: .Lpcsection166: ; O1-NEXT: # kill: def $ax killed $ax def $eax -; O1-NEXT: .Lpcsection152: +; O1-NEXT: .Lpcsection167: ; O1-NEXT: jne .LBB92_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -4545,23 +4590,23 @@ define void @atomic16_nand_seq_cst(ptr %a) { ; O2-LABEL: atomic16_nand_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection146: +; O2-NEXT: .Lpcsection161: ; O2-NEXT: movzwl (%rdi), %eax ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB92_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ecx -; O2-NEXT: .Lpcsection147: +; O2-NEXT: .Lpcsection162: ; O2-NEXT: notl %ecx -; O2-NEXT: .Lpcsection148: +; O2-NEXT: .Lpcsection163: ; O2-NEXT: orl $65493, %ecx # imm = 0xFFD5 -; O2-NEXT: .Lpcsection149: +; O2-NEXT: .Lpcsection164: ; O2-NEXT: # kill: def $ax killed $ax killed $eax -; O2-NEXT: .Lpcsection150: +; O2-NEXT: .Lpcsection165: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) -; O2-NEXT: .Lpcsection151: +; O2-NEXT: .Lpcsection166: ; O2-NEXT: # kill: def $ax killed $ax def $eax -; O2-NEXT: .Lpcsection152: +; O2-NEXT: .Lpcsection167: ; O2-NEXT: jne .LBB92_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -4570,23 +4615,23 @@ define void @atomic16_nand_seq_cst(ptr %a) { ; O3-LABEL: atomic16_nand_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection146: +; O3-NEXT: .Lpcsection161: ; O3-NEXT: movzwl (%rdi), %eax ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB92_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ecx -; O3-NEXT: .Lpcsection147: +; O3-NEXT: .Lpcsection162: ; O3-NEXT: notl %ecx -; O3-NEXT: .Lpcsection148: +; O3-NEXT: .Lpcsection163: ; O3-NEXT: orl $65493, %ecx # imm = 0xFFD5 -; O3-NEXT: .Lpcsection149: +; O3-NEXT: .Lpcsection164: ; O3-NEXT: # kill: def $ax killed $ax killed $eax -; O3-NEXT: .Lpcsection150: +; O3-NEXT: .Lpcsection165: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) -; O3-NEXT: .Lpcsection151: +; O3-NEXT: .Lpcsection166: ; O3-NEXT: # kill: def $ax killed $ax def $eax -; O3-NEXT: .Lpcsection152: +; O3-NEXT: .Lpcsection167: ; O3-NEXT: jne .LBB92_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -4625,13 +4670,13 @@ define void @atomic16_cas_monotonic(ptr %a) { ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movw $1, %cx ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection153: +; O1-NEXT: .Lpcsection168: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection154: +; O1-NEXT: .Lpcsection169: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection155: +; O1-NEXT: .Lpcsection170: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -4641,13 +4686,13 @@ define void @atomic16_cas_monotonic(ptr %a) { ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movw $1, %cx ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection153: +; O2-NEXT: .Lpcsection168: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection154: +; O2-NEXT: .Lpcsection169: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection155: +; O2-NEXT: .Lpcsection170: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -4657,13 +4702,13 @@ define void @atomic16_cas_monotonic(ptr %a) { ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movw $1, %cx ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection153: +; O3-NEXT: .Lpcsection168: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection154: +; O3-NEXT: .Lpcsection169: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection155: +; O3-NEXT: .Lpcsection170: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -4703,13 +4748,13 @@ define void @atomic16_cas_acquire(ptr %a) { ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movw $1, %cx ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection156: +; O1-NEXT: .Lpcsection171: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection157: +; O1-NEXT: .Lpcsection172: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection158: +; O1-NEXT: .Lpcsection173: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -4719,13 +4764,13 @@ define void @atomic16_cas_acquire(ptr %a) { ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movw $1, %cx ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection156: +; O2-NEXT: .Lpcsection171: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection157: +; O2-NEXT: .Lpcsection172: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection158: +; O2-NEXT: .Lpcsection173: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -4735,13 +4780,13 @@ define void @atomic16_cas_acquire(ptr %a) { ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movw $1, %cx ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection156: +; O3-NEXT: .Lpcsection171: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection157: +; O3-NEXT: .Lpcsection172: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection158: +; O3-NEXT: .Lpcsection173: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -4781,13 +4826,13 @@ define void @atomic16_cas_release(ptr %a) { ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movw $1, %cx ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection159: +; O1-NEXT: .Lpcsection174: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection160: +; O1-NEXT: .Lpcsection175: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection161: +; O1-NEXT: .Lpcsection176: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -4797,13 +4842,13 @@ define void @atomic16_cas_release(ptr %a) { ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movw $1, %cx ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection159: +; O2-NEXT: .Lpcsection174: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection160: +; O2-NEXT: .Lpcsection175: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection161: +; O2-NEXT: .Lpcsection176: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -4813,13 +4858,13 @@ define void @atomic16_cas_release(ptr %a) { ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movw $1, %cx ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection159: +; O3-NEXT: .Lpcsection174: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection160: +; O3-NEXT: .Lpcsection175: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection161: +; O3-NEXT: .Lpcsection176: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -4859,13 +4904,13 @@ define void @atomic16_cas_acq_rel(ptr %a) { ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movw $1, %cx ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection162: +; O1-NEXT: .Lpcsection177: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection163: +; O1-NEXT: .Lpcsection178: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection164: +; O1-NEXT: .Lpcsection179: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -4875,13 +4920,13 @@ define void @atomic16_cas_acq_rel(ptr %a) { ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movw $1, %cx ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection162: +; O2-NEXT: .Lpcsection177: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection163: +; O2-NEXT: .Lpcsection178: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection164: +; O2-NEXT: .Lpcsection179: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -4891,13 +4936,13 @@ define void @atomic16_cas_acq_rel(ptr %a) { ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movw $1, %cx ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection162: +; O3-NEXT: .Lpcsection177: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection163: +; O3-NEXT: .Lpcsection178: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection164: +; O3-NEXT: .Lpcsection179: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -4937,13 +4982,13 @@ define void @atomic16_cas_seq_cst(ptr %a) { ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movw $1, %cx ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection165: +; O1-NEXT: .Lpcsection180: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection166: +; O1-NEXT: .Lpcsection181: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) ; O1-NEXT: movw $42, %ax -; O1-NEXT: .Lpcsection167: +; O1-NEXT: .Lpcsection182: ; O1-NEXT: lock cmpxchgw %cx, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -4953,13 +4998,13 @@ define void @atomic16_cas_seq_cst(ptr %a) { ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movw $1, %cx ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection165: +; O2-NEXT: .Lpcsection180: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection166: +; O2-NEXT: .Lpcsection181: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) ; O2-NEXT: movw $42, %ax -; O2-NEXT: .Lpcsection167: +; O2-NEXT: .Lpcsection182: ; O2-NEXT: lock cmpxchgw %cx, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -4969,13 +5014,13 @@ define void @atomic16_cas_seq_cst(ptr %a) { ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movw $1, %cx ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection165: +; O3-NEXT: .Lpcsection180: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection166: +; O3-NEXT: .Lpcsection181: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movw $42, %ax -; O3-NEXT: .Lpcsection167: +; O3-NEXT: .Lpcsection182: ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5000,7 +5045,7 @@ define i32 @atomic32_load_unordered(ptr %a) { ; O1-LABEL: atomic32_load_unordered: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection168: +; O1-NEXT: .Lpcsection183: ; O1-NEXT: movl (%rdi), %eax ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5008,7 +5053,7 @@ define i32 @atomic32_load_unordered(ptr %a) { ; O2-LABEL: atomic32_load_unordered: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection168: +; O2-NEXT: .Lpcsection183: ; O2-NEXT: movl (%rdi), %eax ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5016,7 +5061,7 @@ define i32 @atomic32_load_unordered(ptr %a) { ; O3-LABEL: atomic32_load_unordered: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection168: +; O3-NEXT: .Lpcsection183: ; O3-NEXT: movl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5039,7 +5084,7 @@ define i32 @atomic32_load_monotonic(ptr %a) { ; O1-LABEL: atomic32_load_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection169: +; O1-NEXT: .Lpcsection184: ; O1-NEXT: movl (%rdi), %eax ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5047,7 +5092,7 @@ define i32 @atomic32_load_monotonic(ptr %a) { ; O2-LABEL: atomic32_load_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection169: +; O2-NEXT: .Lpcsection184: ; O2-NEXT: movl (%rdi), %eax ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5055,7 +5100,7 @@ define i32 @atomic32_load_monotonic(ptr %a) { ; O3-LABEL: atomic32_load_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection169: +; O3-NEXT: .Lpcsection184: ; O3-NEXT: movl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5078,7 +5123,7 @@ define i32 @atomic32_load_acquire(ptr %a) { ; O1-LABEL: atomic32_load_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection170: +; O1-NEXT: .Lpcsection185: ; O1-NEXT: movl (%rdi), %eax ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5086,7 +5131,7 @@ define i32 @atomic32_load_acquire(ptr %a) { ; O2-LABEL: atomic32_load_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection170: +; O2-NEXT: .Lpcsection185: ; O2-NEXT: movl (%rdi), %eax ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5094,7 +5139,7 @@ define i32 @atomic32_load_acquire(ptr %a) { ; O3-LABEL: atomic32_load_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection170: +; O3-NEXT: .Lpcsection185: ; O3-NEXT: movl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5117,7 +5162,7 @@ define i32 @atomic32_load_seq_cst(ptr %a) { ; O1-LABEL: atomic32_load_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection171: +; O1-NEXT: .Lpcsection186: ; O1-NEXT: movl (%rdi), %eax ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5125,7 +5170,7 @@ define i32 @atomic32_load_seq_cst(ptr %a) { ; O2-LABEL: atomic32_load_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection171: +; O2-NEXT: .Lpcsection186: ; O2-NEXT: movl (%rdi), %eax ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5133,7 +5178,7 @@ define i32 @atomic32_load_seq_cst(ptr %a) { ; O3-LABEL: atomic32_load_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection171: +; O3-NEXT: .Lpcsection186: ; O3-NEXT: movl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5156,7 +5201,7 @@ define void @atomic32_store_unordered(ptr %a) { ; O1-LABEL: atomic32_store_unordered: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection172: +; O1-NEXT: .Lpcsection187: ; O1-NEXT: movl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5164,7 +5209,7 @@ define void @atomic32_store_unordered(ptr %a) { ; O2-LABEL: atomic32_store_unordered: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection172: +; O2-NEXT: .Lpcsection187: ; O2-NEXT: movl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5172,7 +5217,7 @@ define void @atomic32_store_unordered(ptr %a) { ; O3-LABEL: atomic32_store_unordered: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection172: +; O3-NEXT: .Lpcsection187: ; O3-NEXT: movl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5195,7 +5240,7 @@ define void @atomic32_store_monotonic(ptr %a) { ; O1-LABEL: atomic32_store_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection173: +; O1-NEXT: .Lpcsection188: ; O1-NEXT: movl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5203,7 +5248,7 @@ define void @atomic32_store_monotonic(ptr %a) { ; O2-LABEL: atomic32_store_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection173: +; O2-NEXT: .Lpcsection188: ; O2-NEXT: movl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5211,7 +5256,7 @@ define void @atomic32_store_monotonic(ptr %a) { ; O3-LABEL: atomic32_store_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection173: +; O3-NEXT: .Lpcsection188: ; O3-NEXT: movl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5234,7 +5279,7 @@ define void @atomic32_store_release(ptr %a) { ; O1-LABEL: atomic32_store_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection174: +; O1-NEXT: .Lpcsection189: ; O1-NEXT: movl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5242,7 +5287,7 @@ define void @atomic32_store_release(ptr %a) { ; O2-LABEL: atomic32_store_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection174: +; O2-NEXT: .Lpcsection189: ; O2-NEXT: movl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5250,7 +5295,7 @@ define void @atomic32_store_release(ptr %a) { ; O3-LABEL: atomic32_store_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection174: +; O3-NEXT: .Lpcsection189: ; O3-NEXT: movl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5275,7 +5320,7 @@ define void @atomic32_store_seq_cst(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection175: +; O1-NEXT: .Lpcsection190: ; O1-NEXT: xchgl %eax, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5284,7 +5329,7 @@ define void @atomic32_store_seq_cst(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection175: +; O2-NEXT: .Lpcsection190: ; O2-NEXT: xchgl %eax, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5293,7 +5338,7 @@ define void @atomic32_store_seq_cst(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection175: +; O3-NEXT: .Lpcsection190: ; O3-NEXT: xchgl %eax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5318,7 +5363,7 @@ define void @atomic32_xchg_monotonic(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection176: +; O1-NEXT: .Lpcsection191: ; O1-NEXT: xchgl %eax, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5327,7 +5372,7 @@ define void @atomic32_xchg_monotonic(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection176: +; O2-NEXT: .Lpcsection191: ; O2-NEXT: xchgl %eax, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5336,7 +5381,7 @@ define void @atomic32_xchg_monotonic(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection176: +; O3-NEXT: .Lpcsection191: ; O3-NEXT: xchgl %eax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5359,7 +5404,7 @@ define void @atomic32_add_monotonic(ptr %a) { ; O1-LABEL: atomic32_add_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection177: +; O1-NEXT: .Lpcsection192: ; O1-NEXT: lock addl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5367,7 +5412,7 @@ define void @atomic32_add_monotonic(ptr %a) { ; O2-LABEL: atomic32_add_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection177: +; O2-NEXT: .Lpcsection192: ; O2-NEXT: lock addl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5375,7 +5420,7 @@ define void @atomic32_add_monotonic(ptr %a) { ; O3-LABEL: atomic32_add_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection177: +; O3-NEXT: .Lpcsection192: ; O3-NEXT: lock addl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5398,7 +5443,7 @@ define void @atomic32_sub_monotonic(ptr %a) { ; O1-LABEL: atomic32_sub_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection178: +; O1-NEXT: .Lpcsection193: ; O1-NEXT: lock subl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5406,7 +5451,7 @@ define void @atomic32_sub_monotonic(ptr %a) { ; O2-LABEL: atomic32_sub_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection178: +; O2-NEXT: .Lpcsection193: ; O2-NEXT: lock subl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5414,7 +5459,7 @@ define void @atomic32_sub_monotonic(ptr %a) { ; O3-LABEL: atomic32_sub_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection178: +; O3-NEXT: .Lpcsection193: ; O3-NEXT: lock subl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5437,7 +5482,7 @@ define void @atomic32_and_monotonic(ptr %a) { ; O1-LABEL: atomic32_and_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection179: +; O1-NEXT: .Lpcsection194: ; O1-NEXT: lock andl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5445,7 +5490,7 @@ define void @atomic32_and_monotonic(ptr %a) { ; O2-LABEL: atomic32_and_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection179: +; O2-NEXT: .Lpcsection194: ; O2-NEXT: lock andl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5453,7 +5498,7 @@ define void @atomic32_and_monotonic(ptr %a) { ; O3-LABEL: atomic32_and_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection179: +; O3-NEXT: .Lpcsection194: ; O3-NEXT: lock andl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5476,7 +5521,7 @@ define void @atomic32_or_monotonic(ptr %a) { ; O1-LABEL: atomic32_or_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection180: +; O1-NEXT: .Lpcsection195: ; O1-NEXT: lock orl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5484,7 +5529,7 @@ define void @atomic32_or_monotonic(ptr %a) { ; O2-LABEL: atomic32_or_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection180: +; O2-NEXT: .Lpcsection195: ; O2-NEXT: lock orl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5492,7 +5537,7 @@ define void @atomic32_or_monotonic(ptr %a) { ; O3-LABEL: atomic32_or_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection180: +; O3-NEXT: .Lpcsection195: ; O3-NEXT: lock orl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5515,7 +5560,7 @@ define void @atomic32_xor_monotonic(ptr %a) { ; O1-LABEL: atomic32_xor_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection181: +; O1-NEXT: .Lpcsection196: ; O1-NEXT: lock xorl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5523,7 +5568,7 @@ define void @atomic32_xor_monotonic(ptr %a) { ; O2-LABEL: atomic32_xor_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection181: +; O2-NEXT: .Lpcsection196: ; O2-NEXT: lock xorl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5531,7 +5576,7 @@ define void @atomic32_xor_monotonic(ptr %a) { ; O3-LABEL: atomic32_xor_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection181: +; O3-NEXT: .Lpcsection196: ; O3-NEXT: lock xorl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5576,19 +5621,19 @@ define void @atomic32_nand_monotonic(ptr %a) { ; O1-LABEL: atomic32_nand_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection182: +; O1-NEXT: .Lpcsection197: ; O1-NEXT: movl (%rdi), %eax ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB112_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ecx -; O1-NEXT: .Lpcsection183: +; O1-NEXT: .Lpcsection198: ; O1-NEXT: notl %ecx -; O1-NEXT: .Lpcsection184: +; O1-NEXT: .Lpcsection199: ; O1-NEXT: orl $-43, %ecx -; O1-NEXT: .Lpcsection185: +; O1-NEXT: .Lpcsection200: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) -; O1-NEXT: .Lpcsection186: +; O1-NEXT: .Lpcsection201: ; O1-NEXT: jne .LBB112_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -5597,19 +5642,19 @@ define void @atomic32_nand_monotonic(ptr %a) { ; O2-LABEL: atomic32_nand_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection182: +; O2-NEXT: .Lpcsection197: ; O2-NEXT: movl (%rdi), %eax ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB112_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ecx -; O2-NEXT: .Lpcsection183: +; O2-NEXT: .Lpcsection198: ; O2-NEXT: notl %ecx -; O2-NEXT: .Lpcsection184: +; O2-NEXT: .Lpcsection199: ; O2-NEXT: orl $-43, %ecx -; O2-NEXT: .Lpcsection185: +; O2-NEXT: .Lpcsection200: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) -; O2-NEXT: .Lpcsection186: +; O2-NEXT: .Lpcsection201: ; O2-NEXT: jne .LBB112_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -5618,19 +5663,19 @@ define void @atomic32_nand_monotonic(ptr %a) { ; O3-LABEL: atomic32_nand_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection182: +; O3-NEXT: .Lpcsection197: ; O3-NEXT: movl (%rdi), %eax ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB112_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ecx -; O3-NEXT: .Lpcsection183: +; O3-NEXT: .Lpcsection198: ; O3-NEXT: notl %ecx -; O3-NEXT: .Lpcsection184: +; O3-NEXT: .Lpcsection199: ; O3-NEXT: orl $-43, %ecx -; O3-NEXT: .Lpcsection185: +; O3-NEXT: .Lpcsection200: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) -; O3-NEXT: .Lpcsection186: +; O3-NEXT: .Lpcsection201: ; O3-NEXT: jne .LBB112_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -5656,7 +5701,7 @@ define void @atomic32_xchg_acquire(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection187: +; O1-NEXT: .Lpcsection202: ; O1-NEXT: xchgl %eax, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5665,7 +5710,7 @@ define void @atomic32_xchg_acquire(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection187: +; O2-NEXT: .Lpcsection202: ; O2-NEXT: xchgl %eax, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5674,7 +5719,7 @@ define void @atomic32_xchg_acquire(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection187: +; O3-NEXT: .Lpcsection202: ; O3-NEXT: xchgl %eax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5697,7 +5742,7 @@ define void @atomic32_add_acquire(ptr %a) { ; O1-LABEL: atomic32_add_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection188: +; O1-NEXT: .Lpcsection203: ; O1-NEXT: lock addl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5705,7 +5750,7 @@ define void @atomic32_add_acquire(ptr %a) { ; O2-LABEL: atomic32_add_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection188: +; O2-NEXT: .Lpcsection203: ; O2-NEXT: lock addl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5713,7 +5758,7 @@ define void @atomic32_add_acquire(ptr %a) { ; O3-LABEL: atomic32_add_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection188: +; O3-NEXT: .Lpcsection203: ; O3-NEXT: lock addl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5736,7 +5781,7 @@ define void @atomic32_sub_acquire(ptr %a) { ; O1-LABEL: atomic32_sub_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection189: +; O1-NEXT: .Lpcsection204: ; O1-NEXT: lock subl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5744,7 +5789,7 @@ define void @atomic32_sub_acquire(ptr %a) { ; O2-LABEL: atomic32_sub_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection189: +; O2-NEXT: .Lpcsection204: ; O2-NEXT: lock subl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5752,7 +5797,7 @@ define void @atomic32_sub_acquire(ptr %a) { ; O3-LABEL: atomic32_sub_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection189: +; O3-NEXT: .Lpcsection204: ; O3-NEXT: lock subl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5775,7 +5820,7 @@ define void @atomic32_and_acquire(ptr %a) { ; O1-LABEL: atomic32_and_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection190: +; O1-NEXT: .Lpcsection205: ; O1-NEXT: lock andl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5783,7 +5828,7 @@ define void @atomic32_and_acquire(ptr %a) { ; O2-LABEL: atomic32_and_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection190: +; O2-NEXT: .Lpcsection205: ; O2-NEXT: lock andl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5791,7 +5836,7 @@ define void @atomic32_and_acquire(ptr %a) { ; O3-LABEL: atomic32_and_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection190: +; O3-NEXT: .Lpcsection205: ; O3-NEXT: lock andl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5814,7 +5859,7 @@ define void @atomic32_or_acquire(ptr %a) { ; O1-LABEL: atomic32_or_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection191: +; O1-NEXT: .Lpcsection206: ; O1-NEXT: lock orl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5822,7 +5867,7 @@ define void @atomic32_or_acquire(ptr %a) { ; O2-LABEL: atomic32_or_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection191: +; O2-NEXT: .Lpcsection206: ; O2-NEXT: lock orl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5830,7 +5875,7 @@ define void @atomic32_or_acquire(ptr %a) { ; O3-LABEL: atomic32_or_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection191: +; O3-NEXT: .Lpcsection206: ; O3-NEXT: lock orl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5853,7 +5898,7 @@ define void @atomic32_xor_acquire(ptr %a) { ; O1-LABEL: atomic32_xor_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection192: +; O1-NEXT: .Lpcsection207: ; O1-NEXT: lock xorl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -5861,7 +5906,7 @@ define void @atomic32_xor_acquire(ptr %a) { ; O2-LABEL: atomic32_xor_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection192: +; O2-NEXT: .Lpcsection207: ; O2-NEXT: lock xorl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -5869,7 +5914,7 @@ define void @atomic32_xor_acquire(ptr %a) { ; O3-LABEL: atomic32_xor_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection192: +; O3-NEXT: .Lpcsection207: ; O3-NEXT: lock xorl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -5914,19 +5959,19 @@ define void @atomic32_nand_acquire(ptr %a) { ; O1-LABEL: atomic32_nand_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection193: +; O1-NEXT: .Lpcsection208: ; O1-NEXT: movl (%rdi), %eax ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB119_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ecx -; O1-NEXT: .Lpcsection194: +; O1-NEXT: .Lpcsection209: ; O1-NEXT: notl %ecx -; O1-NEXT: .Lpcsection195: +; O1-NEXT: .Lpcsection210: ; O1-NEXT: orl $-43, %ecx -; O1-NEXT: .Lpcsection196: +; O1-NEXT: .Lpcsection211: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) -; O1-NEXT: .Lpcsection197: +; O1-NEXT: .Lpcsection212: ; O1-NEXT: jne .LBB119_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -5935,19 +5980,19 @@ define void @atomic32_nand_acquire(ptr %a) { ; O2-LABEL: atomic32_nand_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection193: +; O2-NEXT: .Lpcsection208: ; O2-NEXT: movl (%rdi), %eax ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB119_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ecx -; O2-NEXT: .Lpcsection194: +; O2-NEXT: .Lpcsection209: ; O2-NEXT: notl %ecx -; O2-NEXT: .Lpcsection195: +; O2-NEXT: .Lpcsection210: ; O2-NEXT: orl $-43, %ecx -; O2-NEXT: .Lpcsection196: +; O2-NEXT: .Lpcsection211: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) -; O2-NEXT: .Lpcsection197: +; O2-NEXT: .Lpcsection212: ; O2-NEXT: jne .LBB119_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -5956,19 +6001,19 @@ define void @atomic32_nand_acquire(ptr %a) { ; O3-LABEL: atomic32_nand_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection193: +; O3-NEXT: .Lpcsection208: ; O3-NEXT: movl (%rdi), %eax ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB119_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ecx -; O3-NEXT: .Lpcsection194: +; O3-NEXT: .Lpcsection209: ; O3-NEXT: notl %ecx -; O3-NEXT: .Lpcsection195: +; O3-NEXT: .Lpcsection210: ; O3-NEXT: orl $-43, %ecx -; O3-NEXT: .Lpcsection196: +; O3-NEXT: .Lpcsection211: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) -; O3-NEXT: .Lpcsection197: +; O3-NEXT: .Lpcsection212: ; O3-NEXT: jne .LBB119_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -5994,7 +6039,7 @@ define void @atomic32_xchg_release(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection198: +; O1-NEXT: .Lpcsection213: ; O1-NEXT: xchgl %eax, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -6003,7 +6048,7 @@ define void @atomic32_xchg_release(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection198: +; O2-NEXT: .Lpcsection213: ; O2-NEXT: xchgl %eax, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -6012,7 +6057,7 @@ define void @atomic32_xchg_release(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection198: +; O3-NEXT: .Lpcsection213: ; O3-NEXT: xchgl %eax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -6035,7 +6080,7 @@ define void @atomic32_add_release(ptr %a) { ; O1-LABEL: atomic32_add_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection199: +; O1-NEXT: .Lpcsection214: ; O1-NEXT: lock addl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -6043,7 +6088,7 @@ define void @atomic32_add_release(ptr %a) { ; O2-LABEL: atomic32_add_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection199: +; O2-NEXT: .Lpcsection214: ; O2-NEXT: lock addl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -6051,7 +6096,7 @@ define void @atomic32_add_release(ptr %a) { ; O3-LABEL: atomic32_add_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection199: +; O3-NEXT: .Lpcsection214: ; O3-NEXT: lock addl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -6074,7 +6119,7 @@ define void @atomic32_sub_release(ptr %a) { ; O1-LABEL: atomic32_sub_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection200: +; O1-NEXT: .Lpcsection215: ; O1-NEXT: lock subl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -6082,7 +6127,7 @@ define void @atomic32_sub_release(ptr %a) { ; O2-LABEL: atomic32_sub_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection200: +; O2-NEXT: .Lpcsection215: ; O2-NEXT: lock subl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -6090,7 +6135,7 @@ define void @atomic32_sub_release(ptr %a) { ; O3-LABEL: atomic32_sub_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection200: +; O3-NEXT: .Lpcsection215: ; O3-NEXT: lock subl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -6113,7 +6158,7 @@ define void @atomic32_and_release(ptr %a) { ; O1-LABEL: atomic32_and_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection201: +; O1-NEXT: .Lpcsection216: ; O1-NEXT: lock andl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -6121,7 +6166,7 @@ define void @atomic32_and_release(ptr %a) { ; O2-LABEL: atomic32_and_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection201: +; O2-NEXT: .Lpcsection216: ; O2-NEXT: lock andl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -6129,7 +6174,7 @@ define void @atomic32_and_release(ptr %a) { ; O3-LABEL: atomic32_and_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection201: +; O3-NEXT: .Lpcsection216: ; O3-NEXT: lock andl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -6152,7 +6197,7 @@ define void @atomic32_or_release(ptr %a) { ; O1-LABEL: atomic32_or_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection202: +; O1-NEXT: .Lpcsection217: ; O1-NEXT: lock orl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -6160,7 +6205,7 @@ define void @atomic32_or_release(ptr %a) { ; O2-LABEL: atomic32_or_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection202: +; O2-NEXT: .Lpcsection217: ; O2-NEXT: lock orl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -6168,7 +6213,7 @@ define void @atomic32_or_release(ptr %a) { ; O3-LABEL: atomic32_or_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection202: +; O3-NEXT: .Lpcsection217: ; O3-NEXT: lock orl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -6191,7 +6236,7 @@ define void @atomic32_xor_release(ptr %a) { ; O1-LABEL: atomic32_xor_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection203: +; O1-NEXT: .Lpcsection218: ; O1-NEXT: lock xorl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -6199,7 +6244,7 @@ define void @atomic32_xor_release(ptr %a) { ; O2-LABEL: atomic32_xor_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection203: +; O2-NEXT: .Lpcsection218: ; O2-NEXT: lock xorl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -6207,7 +6252,7 @@ define void @atomic32_xor_release(ptr %a) { ; O3-LABEL: atomic32_xor_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection203: +; O3-NEXT: .Lpcsection218: ; O3-NEXT: lock xorl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -6252,19 +6297,19 @@ define void @atomic32_nand_release(ptr %a) { ; O1-LABEL: atomic32_nand_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection204: +; O1-NEXT: .Lpcsection219: ; O1-NEXT: movl (%rdi), %eax ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB126_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ecx -; O1-NEXT: .Lpcsection205: +; O1-NEXT: .Lpcsection220: ; O1-NEXT: notl %ecx -; O1-NEXT: .Lpcsection206: +; O1-NEXT: .Lpcsection221: ; O1-NEXT: orl $-43, %ecx -; O1-NEXT: .Lpcsection207: +; O1-NEXT: .Lpcsection222: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) -; O1-NEXT: .Lpcsection208: +; O1-NEXT: .Lpcsection223: ; O1-NEXT: jne .LBB126_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -6273,19 +6318,19 @@ define void @atomic32_nand_release(ptr %a) { ; O2-LABEL: atomic32_nand_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection204: +; O2-NEXT: .Lpcsection219: ; O2-NEXT: movl (%rdi), %eax ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB126_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ecx -; O2-NEXT: .Lpcsection205: +; O2-NEXT: .Lpcsection220: ; O2-NEXT: notl %ecx -; O2-NEXT: .Lpcsection206: +; O2-NEXT: .Lpcsection221: ; O2-NEXT: orl $-43, %ecx -; O2-NEXT: .Lpcsection207: +; O2-NEXT: .Lpcsection222: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) -; O2-NEXT: .Lpcsection208: +; O2-NEXT: .Lpcsection223: ; O2-NEXT: jne .LBB126_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -6294,19 +6339,19 @@ define void @atomic32_nand_release(ptr %a) { ; O3-LABEL: atomic32_nand_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection204: +; O3-NEXT: .Lpcsection219: ; O3-NEXT: movl (%rdi), %eax ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB126_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ecx -; O3-NEXT: .Lpcsection205: +; O3-NEXT: .Lpcsection220: ; O3-NEXT: notl %ecx -; O3-NEXT: .Lpcsection206: +; O3-NEXT: .Lpcsection221: ; O3-NEXT: orl $-43, %ecx -; O3-NEXT: .Lpcsection207: +; O3-NEXT: .Lpcsection222: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) -; O3-NEXT: .Lpcsection208: +; O3-NEXT: .Lpcsection223: ; O3-NEXT: jne .LBB126_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -6332,7 +6377,7 @@ define void @atomic32_xchg_acq_rel(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection209: +; O1-NEXT: .Lpcsection224: ; O1-NEXT: xchgl %eax, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -6341,7 +6386,7 @@ define void @atomic32_xchg_acq_rel(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection209: +; O2-NEXT: .Lpcsection224: ; O2-NEXT: xchgl %eax, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -6350,7 +6395,7 @@ define void @atomic32_xchg_acq_rel(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection209: +; O3-NEXT: .Lpcsection224: ; O3-NEXT: xchgl %eax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -6373,7 +6418,7 @@ define void @atomic32_add_acq_rel(ptr %a) { ; O1-LABEL: atomic32_add_acq_rel: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection210: +; O1-NEXT: .Lpcsection225: ; O1-NEXT: lock addl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -6381,7 +6426,7 @@ define void @atomic32_add_acq_rel(ptr %a) { ; O2-LABEL: atomic32_add_acq_rel: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection210: +; O2-NEXT: .Lpcsection225: ; O2-NEXT: lock addl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -6389,7 +6434,7 @@ define void @atomic32_add_acq_rel(ptr %a) { ; O3-LABEL: atomic32_add_acq_rel: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection210: +; O3-NEXT: .Lpcsection225: ; O3-NEXT: lock addl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -6412,7 +6457,7 @@ define void @atomic32_sub_acq_rel(ptr %a) { ; O1-LABEL: atomic32_sub_acq_rel: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection211: +; O1-NEXT: .Lpcsection226: ; O1-NEXT: lock subl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -6420,7 +6465,7 @@ define void @atomic32_sub_acq_rel(ptr %a) { ; O2-LABEL: atomic32_sub_acq_rel: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection211: +; O2-NEXT: .Lpcsection226: ; O2-NEXT: lock subl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -6428,7 +6473,7 @@ define void @atomic32_sub_acq_rel(ptr %a) { ; O3-LABEL: atomic32_sub_acq_rel: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection211: +; O3-NEXT: .Lpcsection226: ; O3-NEXT: lock subl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -6451,7 +6496,7 @@ define void @atomic32_and_acq_rel(ptr %a) { ; O1-LABEL: atomic32_and_acq_rel: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection212: +; O1-NEXT: .Lpcsection227: ; O1-NEXT: lock andl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -6459,7 +6504,7 @@ define void @atomic32_and_acq_rel(ptr %a) { ; O2-LABEL: atomic32_and_acq_rel: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection212: +; O2-NEXT: .Lpcsection227: ; O2-NEXT: lock andl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -6467,7 +6512,7 @@ define void @atomic32_and_acq_rel(ptr %a) { ; O3-LABEL: atomic32_and_acq_rel: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection212: +; O3-NEXT: .Lpcsection227: ; O3-NEXT: lock andl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -6490,7 +6535,7 @@ define void @atomic32_or_acq_rel(ptr %a) { ; O1-LABEL: atomic32_or_acq_rel: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection213: +; O1-NEXT: .Lpcsection228: ; O1-NEXT: lock orl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -6498,7 +6543,7 @@ define void @atomic32_or_acq_rel(ptr %a) { ; O2-LABEL: atomic32_or_acq_rel: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection213: +; O2-NEXT: .Lpcsection228: ; O2-NEXT: lock orl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -6506,7 +6551,7 @@ define void @atomic32_or_acq_rel(ptr %a) { ; O3-LABEL: atomic32_or_acq_rel: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection213: +; O3-NEXT: .Lpcsection228: ; O3-NEXT: lock orl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -6529,7 +6574,7 @@ define void @atomic32_xor_acq_rel(ptr %a) { ; O1-LABEL: atomic32_xor_acq_rel: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection214: +; O1-NEXT: .Lpcsection229: ; O1-NEXT: lock xorl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -6537,7 +6582,7 @@ define void @atomic32_xor_acq_rel(ptr %a) { ; O2-LABEL: atomic32_xor_acq_rel: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection214: +; O2-NEXT: .Lpcsection229: ; O2-NEXT: lock xorl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -6545,7 +6590,7 @@ define void @atomic32_xor_acq_rel(ptr %a) { ; O3-LABEL: atomic32_xor_acq_rel: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection214: +; O3-NEXT: .Lpcsection229: ; O3-NEXT: lock xorl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -6590,19 +6635,19 @@ define void @atomic32_nand_acq_rel(ptr %a) { ; O1-LABEL: atomic32_nand_acq_rel: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection215: +; O1-NEXT: .Lpcsection230: ; O1-NEXT: movl (%rdi), %eax ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB133_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ecx -; O1-NEXT: .Lpcsection216: +; O1-NEXT: .Lpcsection231: ; O1-NEXT: notl %ecx -; O1-NEXT: .Lpcsection217: +; O1-NEXT: .Lpcsection232: ; O1-NEXT: orl $-43, %ecx -; O1-NEXT: .Lpcsection218: +; O1-NEXT: .Lpcsection233: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) -; O1-NEXT: .Lpcsection219: +; O1-NEXT: .Lpcsection234: ; O1-NEXT: jne .LBB133_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -6611,19 +6656,19 @@ define void @atomic32_nand_acq_rel(ptr %a) { ; O2-LABEL: atomic32_nand_acq_rel: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection215: +; O2-NEXT: .Lpcsection230: ; O2-NEXT: movl (%rdi), %eax ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB133_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ecx -; O2-NEXT: .Lpcsection216: +; O2-NEXT: .Lpcsection231: ; O2-NEXT: notl %ecx -; O2-NEXT: .Lpcsection217: +; O2-NEXT: .Lpcsection232: ; O2-NEXT: orl $-43, %ecx -; O2-NEXT: .Lpcsection218: +; O2-NEXT: .Lpcsection233: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) -; O2-NEXT: .Lpcsection219: +; O2-NEXT: .Lpcsection234: ; O2-NEXT: jne .LBB133_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -6632,19 +6677,19 @@ define void @atomic32_nand_acq_rel(ptr %a) { ; O3-LABEL: atomic32_nand_acq_rel: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection215: +; O3-NEXT: .Lpcsection230: ; O3-NEXT: movl (%rdi), %eax ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB133_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ecx -; O3-NEXT: .Lpcsection216: +; O3-NEXT: .Lpcsection231: ; O3-NEXT: notl %ecx -; O3-NEXT: .Lpcsection217: +; O3-NEXT: .Lpcsection232: ; O3-NEXT: orl $-43, %ecx -; O3-NEXT: .Lpcsection218: +; O3-NEXT: .Lpcsection233: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) -; O3-NEXT: .Lpcsection219: +; O3-NEXT: .Lpcsection234: ; O3-NEXT: jne .LBB133_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -6670,7 +6715,7 @@ define void @atomic32_xchg_seq_cst(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection220: +; O1-NEXT: .Lpcsection235: ; O1-NEXT: xchgl %eax, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -6679,7 +6724,7 @@ define void @atomic32_xchg_seq_cst(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection220: +; O2-NEXT: .Lpcsection235: ; O2-NEXT: xchgl %eax, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -6688,7 +6733,7 @@ define void @atomic32_xchg_seq_cst(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection220: +; O3-NEXT: .Lpcsection235: ; O3-NEXT: xchgl %eax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -6711,7 +6756,7 @@ define void @atomic32_add_seq_cst(ptr %a) { ; O1-LABEL: atomic32_add_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection221: +; O1-NEXT: .Lpcsection236: ; O1-NEXT: lock addl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -6719,7 +6764,7 @@ define void @atomic32_add_seq_cst(ptr %a) { ; O2-LABEL: atomic32_add_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection221: +; O2-NEXT: .Lpcsection236: ; O2-NEXT: lock addl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -6727,7 +6772,7 @@ define void @atomic32_add_seq_cst(ptr %a) { ; O3-LABEL: atomic32_add_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection221: +; O3-NEXT: .Lpcsection236: ; O3-NEXT: lock addl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -6750,7 +6795,7 @@ define void @atomic32_sub_seq_cst(ptr %a) { ; O1-LABEL: atomic32_sub_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection222: +; O1-NEXT: .Lpcsection237: ; O1-NEXT: lock subl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -6758,7 +6803,7 @@ define void @atomic32_sub_seq_cst(ptr %a) { ; O2-LABEL: atomic32_sub_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection222: +; O2-NEXT: .Lpcsection237: ; O2-NEXT: lock subl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -6766,7 +6811,7 @@ define void @atomic32_sub_seq_cst(ptr %a) { ; O3-LABEL: atomic32_sub_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection222: +; O3-NEXT: .Lpcsection237: ; O3-NEXT: lock subl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -6789,7 +6834,7 @@ define void @atomic32_and_seq_cst(ptr %a) { ; O1-LABEL: atomic32_and_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection223: +; O1-NEXT: .Lpcsection238: ; O1-NEXT: lock andl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -6797,7 +6842,7 @@ define void @atomic32_and_seq_cst(ptr %a) { ; O2-LABEL: atomic32_and_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection223: +; O2-NEXT: .Lpcsection238: ; O2-NEXT: lock andl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -6805,7 +6850,7 @@ define void @atomic32_and_seq_cst(ptr %a) { ; O3-LABEL: atomic32_and_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection223: +; O3-NEXT: .Lpcsection238: ; O3-NEXT: lock andl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -6828,7 +6873,7 @@ define void @atomic32_or_seq_cst(ptr %a) { ; O1-LABEL: atomic32_or_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection224: +; O1-NEXT: .Lpcsection239: ; O1-NEXT: lock orl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -6836,7 +6881,7 @@ define void @atomic32_or_seq_cst(ptr %a) { ; O2-LABEL: atomic32_or_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection224: +; O2-NEXT: .Lpcsection239: ; O2-NEXT: lock orl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -6844,7 +6889,7 @@ define void @atomic32_or_seq_cst(ptr %a) { ; O3-LABEL: atomic32_or_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection224: +; O3-NEXT: .Lpcsection239: ; O3-NEXT: lock orl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -6867,7 +6912,7 @@ define void @atomic32_xor_seq_cst(ptr %a) { ; O1-LABEL: atomic32_xor_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection225: +; O1-NEXT: .Lpcsection240: ; O1-NEXT: lock xorl $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -6875,7 +6920,7 @@ define void @atomic32_xor_seq_cst(ptr %a) { ; O2-LABEL: atomic32_xor_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection225: +; O2-NEXT: .Lpcsection240: ; O2-NEXT: lock xorl $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -6883,7 +6928,7 @@ define void @atomic32_xor_seq_cst(ptr %a) { ; O3-LABEL: atomic32_xor_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection225: +; O3-NEXT: .Lpcsection240: ; O3-NEXT: lock xorl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -6928,19 +6973,19 @@ define void @atomic32_nand_seq_cst(ptr %a) { ; O1-LABEL: atomic32_nand_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection226: +; O1-NEXT: .Lpcsection241: ; O1-NEXT: movl (%rdi), %eax ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB140_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ecx -; O1-NEXT: .Lpcsection227: +; O1-NEXT: .Lpcsection242: ; O1-NEXT: notl %ecx -; O1-NEXT: .Lpcsection228: +; O1-NEXT: .Lpcsection243: ; O1-NEXT: orl $-43, %ecx -; O1-NEXT: .Lpcsection229: +; O1-NEXT: .Lpcsection244: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) -; O1-NEXT: .Lpcsection230: +; O1-NEXT: .Lpcsection245: ; O1-NEXT: jne .LBB140_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -6949,19 +6994,19 @@ define void @atomic32_nand_seq_cst(ptr %a) { ; O2-LABEL: atomic32_nand_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection226: +; O2-NEXT: .Lpcsection241: ; O2-NEXT: movl (%rdi), %eax ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB140_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ecx -; O2-NEXT: .Lpcsection227: +; O2-NEXT: .Lpcsection242: ; O2-NEXT: notl %ecx -; O2-NEXT: .Lpcsection228: +; O2-NEXT: .Lpcsection243: ; O2-NEXT: orl $-43, %ecx -; O2-NEXT: .Lpcsection229: +; O2-NEXT: .Lpcsection244: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) -; O2-NEXT: .Lpcsection230: +; O2-NEXT: .Lpcsection245: ; O2-NEXT: jne .LBB140_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -6970,19 +7015,19 @@ define void @atomic32_nand_seq_cst(ptr %a) { ; O3-LABEL: atomic32_nand_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection226: +; O3-NEXT: .Lpcsection241: ; O3-NEXT: movl (%rdi), %eax ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB140_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ecx -; O3-NEXT: .Lpcsection227: +; O3-NEXT: .Lpcsection242: ; O3-NEXT: notl %ecx -; O3-NEXT: .Lpcsection228: +; O3-NEXT: .Lpcsection243: ; O3-NEXT: orl $-43, %ecx -; O3-NEXT: .Lpcsection229: +; O3-NEXT: .Lpcsection244: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) -; O3-NEXT: .Lpcsection230: +; O3-NEXT: .Lpcsection245: ; O3-NEXT: jne .LBB140_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -7020,14 +7065,17 @@ define void @atomic32_cas_monotonic(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $1, %ecx +; O1-NEXT: .Lpcsection246: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection231: +; O1-NEXT: .Lpcsection247: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) +; O1-NEXT: .Lpcsection248: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection232: +; O1-NEXT: .Lpcsection249: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) +; O1-NEXT: .Lpcsection250: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection233: +; O1-NEXT: .Lpcsection251: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7036,14 +7084,17 @@ define void @atomic32_cas_monotonic(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $1, %ecx +; O2-NEXT: .Lpcsection246: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection231: +; O2-NEXT: .Lpcsection247: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) +; O2-NEXT: .Lpcsection248: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection232: +; O2-NEXT: .Lpcsection249: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) +; O2-NEXT: .Lpcsection250: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection233: +; O2-NEXT: .Lpcsection251: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7052,14 +7103,17 @@ define void @atomic32_cas_monotonic(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $1, %ecx +; O3-NEXT: .Lpcsection246: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection231: +; O3-NEXT: .Lpcsection247: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; O3-NEXT: .Lpcsection248: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection232: +; O3-NEXT: .Lpcsection249: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; O3-NEXT: .Lpcsection250: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection233: +; O3-NEXT: .Lpcsection251: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7098,14 +7152,17 @@ define void @atomic32_cas_acquire(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $1, %ecx +; O1-NEXT: .Lpcsection252: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection234: +; O1-NEXT: .Lpcsection253: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) +; O1-NEXT: .Lpcsection254: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection235: +; O1-NEXT: .Lpcsection255: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) +; O1-NEXT: .Lpcsection256: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection236: +; O1-NEXT: .Lpcsection257: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7114,14 +7171,17 @@ define void @atomic32_cas_acquire(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $1, %ecx +; O2-NEXT: .Lpcsection252: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection234: +; O2-NEXT: .Lpcsection253: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) +; O2-NEXT: .Lpcsection254: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection235: +; O2-NEXT: .Lpcsection255: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) +; O2-NEXT: .Lpcsection256: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection236: +; O2-NEXT: .Lpcsection257: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7130,14 +7190,17 @@ define void @atomic32_cas_acquire(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $1, %ecx +; O3-NEXT: .Lpcsection252: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection234: +; O3-NEXT: .Lpcsection253: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; O3-NEXT: .Lpcsection254: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection235: +; O3-NEXT: .Lpcsection255: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; O3-NEXT: .Lpcsection256: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection236: +; O3-NEXT: .Lpcsection257: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7176,14 +7239,17 @@ define void @atomic32_cas_release(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $1, %ecx +; O1-NEXT: .Lpcsection258: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection237: +; O1-NEXT: .Lpcsection259: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) +; O1-NEXT: .Lpcsection260: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection238: +; O1-NEXT: .Lpcsection261: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) +; O1-NEXT: .Lpcsection262: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection239: +; O1-NEXT: .Lpcsection263: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7192,14 +7258,17 @@ define void @atomic32_cas_release(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $1, %ecx +; O2-NEXT: .Lpcsection258: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection237: +; O2-NEXT: .Lpcsection259: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) +; O2-NEXT: .Lpcsection260: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection238: +; O2-NEXT: .Lpcsection261: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) +; O2-NEXT: .Lpcsection262: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection239: +; O2-NEXT: .Lpcsection263: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7208,14 +7277,17 @@ define void @atomic32_cas_release(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $1, %ecx +; O3-NEXT: .Lpcsection258: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection237: +; O3-NEXT: .Lpcsection259: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; O3-NEXT: .Lpcsection260: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection238: +; O3-NEXT: .Lpcsection261: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; O3-NEXT: .Lpcsection262: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection239: +; O3-NEXT: .Lpcsection263: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7254,14 +7326,17 @@ define void @atomic32_cas_acq_rel(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $1, %ecx +; O1-NEXT: .Lpcsection264: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection240: +; O1-NEXT: .Lpcsection265: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) +; O1-NEXT: .Lpcsection266: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection241: +; O1-NEXT: .Lpcsection267: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) +; O1-NEXT: .Lpcsection268: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection242: +; O1-NEXT: .Lpcsection269: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7270,14 +7345,17 @@ define void @atomic32_cas_acq_rel(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $1, %ecx +; O2-NEXT: .Lpcsection264: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection240: +; O2-NEXT: .Lpcsection265: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) +; O2-NEXT: .Lpcsection266: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection241: +; O2-NEXT: .Lpcsection267: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) +; O2-NEXT: .Lpcsection268: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection242: +; O2-NEXT: .Lpcsection269: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7286,14 +7364,17 @@ define void @atomic32_cas_acq_rel(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $1, %ecx +; O3-NEXT: .Lpcsection264: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection240: +; O3-NEXT: .Lpcsection265: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; O3-NEXT: .Lpcsection266: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection241: +; O3-NEXT: .Lpcsection267: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; O3-NEXT: .Lpcsection268: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection242: +; O3-NEXT: .Lpcsection269: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7332,14 +7413,17 @@ define void @atomic32_cas_seq_cst(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $1, %ecx +; O1-NEXT: .Lpcsection270: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection243: +; O1-NEXT: .Lpcsection271: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) +; O1-NEXT: .Lpcsection272: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection244: +; O1-NEXT: .Lpcsection273: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) +; O1-NEXT: .Lpcsection274: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection245: +; O1-NEXT: .Lpcsection275: ; O1-NEXT: lock cmpxchgl %ecx, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7348,14 +7432,17 @@ define void @atomic32_cas_seq_cst(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $1, %ecx +; O2-NEXT: .Lpcsection270: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection243: +; O2-NEXT: .Lpcsection271: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) +; O2-NEXT: .Lpcsection272: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection244: +; O2-NEXT: .Lpcsection273: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) +; O2-NEXT: .Lpcsection274: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection245: +; O2-NEXT: .Lpcsection275: ; O2-NEXT: lock cmpxchgl %ecx, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7364,14 +7451,17 @@ define void @atomic32_cas_seq_cst(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $1, %ecx +; O3-NEXT: .Lpcsection270: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection243: +; O3-NEXT: .Lpcsection271: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; O3-NEXT: .Lpcsection272: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection244: +; O3-NEXT: .Lpcsection273: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; O3-NEXT: .Lpcsection274: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection245: +; O3-NEXT: .Lpcsection275: ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7396,7 +7486,7 @@ define i64 @atomic64_load_unordered(ptr %a) { ; O1-LABEL: atomic64_load_unordered: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection246: +; O1-NEXT: .Lpcsection276: ; O1-NEXT: movq (%rdi), %rax ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7404,7 +7494,7 @@ define i64 @atomic64_load_unordered(ptr %a) { ; O2-LABEL: atomic64_load_unordered: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection246: +; O2-NEXT: .Lpcsection276: ; O2-NEXT: movq (%rdi), %rax ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7412,7 +7502,7 @@ define i64 @atomic64_load_unordered(ptr %a) { ; O3-LABEL: atomic64_load_unordered: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection246: +; O3-NEXT: .Lpcsection276: ; O3-NEXT: movq (%rdi), %rax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7435,7 +7525,7 @@ define i64 @atomic64_load_monotonic(ptr %a) { ; O1-LABEL: atomic64_load_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection247: +; O1-NEXT: .Lpcsection277: ; O1-NEXT: movq (%rdi), %rax ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7443,7 +7533,7 @@ define i64 @atomic64_load_monotonic(ptr %a) { ; O2-LABEL: atomic64_load_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection247: +; O2-NEXT: .Lpcsection277: ; O2-NEXT: movq (%rdi), %rax ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7451,7 +7541,7 @@ define i64 @atomic64_load_monotonic(ptr %a) { ; O3-LABEL: atomic64_load_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection247: +; O3-NEXT: .Lpcsection277: ; O3-NEXT: movq (%rdi), %rax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7474,7 +7564,7 @@ define i64 @atomic64_load_acquire(ptr %a) { ; O1-LABEL: atomic64_load_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection248: +; O1-NEXT: .Lpcsection278: ; O1-NEXT: movq (%rdi), %rax ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7482,7 +7572,7 @@ define i64 @atomic64_load_acquire(ptr %a) { ; O2-LABEL: atomic64_load_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection248: +; O2-NEXT: .Lpcsection278: ; O2-NEXT: movq (%rdi), %rax ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7490,7 +7580,7 @@ define i64 @atomic64_load_acquire(ptr %a) { ; O3-LABEL: atomic64_load_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection248: +; O3-NEXT: .Lpcsection278: ; O3-NEXT: movq (%rdi), %rax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7513,7 +7603,7 @@ define i64 @atomic64_load_seq_cst(ptr %a) { ; O1-LABEL: atomic64_load_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection249: +; O1-NEXT: .Lpcsection279: ; O1-NEXT: movq (%rdi), %rax ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7521,7 +7611,7 @@ define i64 @atomic64_load_seq_cst(ptr %a) { ; O2-LABEL: atomic64_load_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection249: +; O2-NEXT: .Lpcsection279: ; O2-NEXT: movq (%rdi), %rax ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7529,7 +7619,7 @@ define i64 @atomic64_load_seq_cst(ptr %a) { ; O3-LABEL: atomic64_load_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection249: +; O3-NEXT: .Lpcsection279: ; O3-NEXT: movq (%rdi), %rax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7552,7 +7642,7 @@ define ptr @atomic64_load_seq_cst_ptr_ty(ptr %a) { ; O1-LABEL: atomic64_load_seq_cst_ptr_ty: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection250: +; O1-NEXT: .Lpcsection280: ; O1-NEXT: movq (%rdi), %rax ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7560,7 +7650,7 @@ define ptr @atomic64_load_seq_cst_ptr_ty(ptr %a) { ; O2-LABEL: atomic64_load_seq_cst_ptr_ty: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection250: +; O2-NEXT: .Lpcsection280: ; O2-NEXT: movq (%rdi), %rax ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7568,7 +7658,7 @@ define ptr @atomic64_load_seq_cst_ptr_ty(ptr %a) { ; O3-LABEL: atomic64_load_seq_cst_ptr_ty: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection250: +; O3-NEXT: .Lpcsection280: ; O3-NEXT: movq (%rdi), %rax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7591,7 +7681,7 @@ define void @atomic64_store_unordered(ptr %a) { ; O1-LABEL: atomic64_store_unordered: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection251: +; O1-NEXT: .Lpcsection281: ; O1-NEXT: movq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7599,7 +7689,7 @@ define void @atomic64_store_unordered(ptr %a) { ; O2-LABEL: atomic64_store_unordered: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection251: +; O2-NEXT: .Lpcsection281: ; O2-NEXT: movq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7607,7 +7697,7 @@ define void @atomic64_store_unordered(ptr %a) { ; O3-LABEL: atomic64_store_unordered: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection251: +; O3-NEXT: .Lpcsection281: ; O3-NEXT: movq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7630,7 +7720,7 @@ define void @atomic64_store_monotonic(ptr %a) { ; O1-LABEL: atomic64_store_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection252: +; O1-NEXT: .Lpcsection282: ; O1-NEXT: movq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7638,7 +7728,7 @@ define void @atomic64_store_monotonic(ptr %a) { ; O2-LABEL: atomic64_store_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection252: +; O2-NEXT: .Lpcsection282: ; O2-NEXT: movq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7646,7 +7736,7 @@ define void @atomic64_store_monotonic(ptr %a) { ; O3-LABEL: atomic64_store_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection252: +; O3-NEXT: .Lpcsection282: ; O3-NEXT: movq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7669,7 +7759,7 @@ define void @atomic64_store_release(ptr %a) { ; O1-LABEL: atomic64_store_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection253: +; O1-NEXT: .Lpcsection283: ; O1-NEXT: movq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7677,7 +7767,7 @@ define void @atomic64_store_release(ptr %a) { ; O2-LABEL: atomic64_store_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection253: +; O2-NEXT: .Lpcsection283: ; O2-NEXT: movq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7685,7 +7775,7 @@ define void @atomic64_store_release(ptr %a) { ; O3-LABEL: atomic64_store_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection253: +; O3-NEXT: .Lpcsection283: ; O3-NEXT: movq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7710,7 +7800,7 @@ define void @atomic64_store_seq_cst(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection254: +; O1-NEXT: .Lpcsection284: ; O1-NEXT: xchgq %rax, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7719,7 +7809,7 @@ define void @atomic64_store_seq_cst(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection254: +; O2-NEXT: .Lpcsection284: ; O2-NEXT: xchgq %rax, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7728,7 +7818,7 @@ define void @atomic64_store_seq_cst(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection254: +; O3-NEXT: .Lpcsection284: ; O3-NEXT: xchgq %rax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7751,7 +7841,7 @@ define void @atomic64_store_seq_cst_ptr_ty(ptr %a, ptr %v) { ; O1-LABEL: atomic64_store_seq_cst_ptr_ty: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection255: +; O1-NEXT: .Lpcsection285: ; O1-NEXT: xchgq %rsi, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7759,7 +7849,7 @@ define void @atomic64_store_seq_cst_ptr_ty(ptr %a, ptr %v) { ; O2-LABEL: atomic64_store_seq_cst_ptr_ty: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection255: +; O2-NEXT: .Lpcsection285: ; O2-NEXT: xchgq %rsi, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7767,7 +7857,7 @@ define void @atomic64_store_seq_cst_ptr_ty(ptr %a, ptr %v) { ; O3-LABEL: atomic64_store_seq_cst_ptr_ty: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection255: +; O3-NEXT: .Lpcsection285: ; O3-NEXT: xchgq %rsi, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7792,7 +7882,7 @@ define void @atomic64_xchg_monotonic(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection256: +; O1-NEXT: .Lpcsection286: ; O1-NEXT: xchgq %rax, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7801,7 +7891,7 @@ define void @atomic64_xchg_monotonic(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection256: +; O2-NEXT: .Lpcsection286: ; O2-NEXT: xchgq %rax, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7810,7 +7900,7 @@ define void @atomic64_xchg_monotonic(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection256: +; O3-NEXT: .Lpcsection286: ; O3-NEXT: xchgq %rax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7833,7 +7923,7 @@ define void @atomic64_add_monotonic(ptr %a) { ; O1-LABEL: atomic64_add_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection257: +; O1-NEXT: .Lpcsection287: ; O1-NEXT: lock addq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7841,7 +7931,7 @@ define void @atomic64_add_monotonic(ptr %a) { ; O2-LABEL: atomic64_add_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection257: +; O2-NEXT: .Lpcsection287: ; O2-NEXT: lock addq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7849,7 +7939,7 @@ define void @atomic64_add_monotonic(ptr %a) { ; O3-LABEL: atomic64_add_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection257: +; O3-NEXT: .Lpcsection287: ; O3-NEXT: lock addq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7872,7 +7962,7 @@ define void @atomic64_sub_monotonic(ptr %a) { ; O1-LABEL: atomic64_sub_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection258: +; O1-NEXT: .Lpcsection288: ; O1-NEXT: lock subq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7880,7 +7970,7 @@ define void @atomic64_sub_monotonic(ptr %a) { ; O2-LABEL: atomic64_sub_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection258: +; O2-NEXT: .Lpcsection288: ; O2-NEXT: lock subq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7888,7 +7978,7 @@ define void @atomic64_sub_monotonic(ptr %a) { ; O3-LABEL: atomic64_sub_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection258: +; O3-NEXT: .Lpcsection288: ; O3-NEXT: lock subq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7911,7 +8001,7 @@ define void @atomic64_and_monotonic(ptr %a) { ; O1-LABEL: atomic64_and_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection259: +; O1-NEXT: .Lpcsection289: ; O1-NEXT: lock andq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7919,7 +8009,7 @@ define void @atomic64_and_monotonic(ptr %a) { ; O2-LABEL: atomic64_and_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection259: +; O2-NEXT: .Lpcsection289: ; O2-NEXT: lock andq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7927,7 +8017,7 @@ define void @atomic64_and_monotonic(ptr %a) { ; O3-LABEL: atomic64_and_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection259: +; O3-NEXT: .Lpcsection289: ; O3-NEXT: lock andq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7950,7 +8040,7 @@ define void @atomic64_or_monotonic(ptr %a) { ; O1-LABEL: atomic64_or_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection260: +; O1-NEXT: .Lpcsection290: ; O1-NEXT: lock orq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7958,7 +8048,7 @@ define void @atomic64_or_monotonic(ptr %a) { ; O2-LABEL: atomic64_or_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection260: +; O2-NEXT: .Lpcsection290: ; O2-NEXT: lock orq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -7966,7 +8056,7 @@ define void @atomic64_or_monotonic(ptr %a) { ; O3-LABEL: atomic64_or_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection260: +; O3-NEXT: .Lpcsection290: ; O3-NEXT: lock orq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -7989,7 +8079,7 @@ define void @atomic64_xor_monotonic(ptr %a) { ; O1-LABEL: atomic64_xor_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection261: +; O1-NEXT: .Lpcsection291: ; O1-NEXT: lock xorq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -7997,7 +8087,7 @@ define void @atomic64_xor_monotonic(ptr %a) { ; O2-LABEL: atomic64_xor_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection261: +; O2-NEXT: .Lpcsection291: ; O2-NEXT: lock xorq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -8005,7 +8095,7 @@ define void @atomic64_xor_monotonic(ptr %a) { ; O3-LABEL: atomic64_xor_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection261: +; O3-NEXT: .Lpcsection291: ; O3-NEXT: lock xorq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -8053,19 +8143,19 @@ define void @atomic64_nand_monotonic(ptr %a) { ; O1-LABEL: atomic64_nand_monotonic: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection262: +; O1-NEXT: .Lpcsection292: ; O1-NEXT: movq (%rdi), %rax ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB162_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ecx -; O1-NEXT: .Lpcsection263: +; O1-NEXT: .Lpcsection293: ; O1-NEXT: notl %ecx -; O1-NEXT: .Lpcsection264: +; O1-NEXT: .Lpcsection294: ; O1-NEXT: orq $-43, %rcx -; O1-NEXT: .Lpcsection265: +; O1-NEXT: .Lpcsection295: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) -; O1-NEXT: .Lpcsection266: +; O1-NEXT: .Lpcsection296: ; O1-NEXT: jne .LBB162_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -8074,19 +8164,19 @@ define void @atomic64_nand_monotonic(ptr %a) { ; O2-LABEL: atomic64_nand_monotonic: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection262: +; O2-NEXT: .Lpcsection292: ; O2-NEXT: movq (%rdi), %rax ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB162_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ecx -; O2-NEXT: .Lpcsection263: +; O2-NEXT: .Lpcsection293: ; O2-NEXT: notl %ecx -; O2-NEXT: .Lpcsection264: +; O2-NEXT: .Lpcsection294: ; O2-NEXT: orq $-43, %rcx -; O2-NEXT: .Lpcsection265: +; O2-NEXT: .Lpcsection295: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) -; O2-NEXT: .Lpcsection266: +; O2-NEXT: .Lpcsection296: ; O2-NEXT: jne .LBB162_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -8095,19 +8185,19 @@ define void @atomic64_nand_monotonic(ptr %a) { ; O3-LABEL: atomic64_nand_monotonic: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection262: +; O3-NEXT: .Lpcsection292: ; O3-NEXT: movq (%rdi), %rax ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB162_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ecx -; O3-NEXT: .Lpcsection263: +; O3-NEXT: .Lpcsection293: ; O3-NEXT: notl %ecx -; O3-NEXT: .Lpcsection264: +; O3-NEXT: .Lpcsection294: ; O3-NEXT: orq $-43, %rcx -; O3-NEXT: .Lpcsection265: +; O3-NEXT: .Lpcsection295: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) -; O3-NEXT: .Lpcsection266: +; O3-NEXT: .Lpcsection296: ; O3-NEXT: jne .LBB162_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -8133,7 +8223,7 @@ define void @atomic64_xchg_acquire(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection267: +; O1-NEXT: .Lpcsection297: ; O1-NEXT: xchgq %rax, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -8142,7 +8232,7 @@ define void @atomic64_xchg_acquire(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection267: +; O2-NEXT: .Lpcsection297: ; O2-NEXT: xchgq %rax, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -8151,7 +8241,7 @@ define void @atomic64_xchg_acquire(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection267: +; O3-NEXT: .Lpcsection297: ; O3-NEXT: xchgq %rax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -8174,7 +8264,7 @@ define void @atomic64_add_acquire(ptr %a) { ; O1-LABEL: atomic64_add_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection268: +; O1-NEXT: .Lpcsection298: ; O1-NEXT: lock addq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -8182,7 +8272,7 @@ define void @atomic64_add_acquire(ptr %a) { ; O2-LABEL: atomic64_add_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection268: +; O2-NEXT: .Lpcsection298: ; O2-NEXT: lock addq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -8190,7 +8280,7 @@ define void @atomic64_add_acquire(ptr %a) { ; O3-LABEL: atomic64_add_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection268: +; O3-NEXT: .Lpcsection298: ; O3-NEXT: lock addq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -8213,7 +8303,7 @@ define void @atomic64_sub_acquire(ptr %a) { ; O1-LABEL: atomic64_sub_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection269: +; O1-NEXT: .Lpcsection299: ; O1-NEXT: lock subq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -8221,7 +8311,7 @@ define void @atomic64_sub_acquire(ptr %a) { ; O2-LABEL: atomic64_sub_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection269: +; O2-NEXT: .Lpcsection299: ; O2-NEXT: lock subq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -8229,7 +8319,7 @@ define void @atomic64_sub_acquire(ptr %a) { ; O3-LABEL: atomic64_sub_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection269: +; O3-NEXT: .Lpcsection299: ; O3-NEXT: lock subq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -8252,7 +8342,7 @@ define void @atomic64_and_acquire(ptr %a) { ; O1-LABEL: atomic64_and_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection270: +; O1-NEXT: .Lpcsection300: ; O1-NEXT: lock andq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -8260,7 +8350,7 @@ define void @atomic64_and_acquire(ptr %a) { ; O2-LABEL: atomic64_and_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection270: +; O2-NEXT: .Lpcsection300: ; O2-NEXT: lock andq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -8268,7 +8358,7 @@ define void @atomic64_and_acquire(ptr %a) { ; O3-LABEL: atomic64_and_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection270: +; O3-NEXT: .Lpcsection300: ; O3-NEXT: lock andq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -8291,7 +8381,7 @@ define void @atomic64_or_acquire(ptr %a) { ; O1-LABEL: atomic64_or_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection271: +; O1-NEXT: .Lpcsection301: ; O1-NEXT: lock orq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -8299,7 +8389,7 @@ define void @atomic64_or_acquire(ptr %a) { ; O2-LABEL: atomic64_or_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection271: +; O2-NEXT: .Lpcsection301: ; O2-NEXT: lock orq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -8307,7 +8397,7 @@ define void @atomic64_or_acquire(ptr %a) { ; O3-LABEL: atomic64_or_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection271: +; O3-NEXT: .Lpcsection301: ; O3-NEXT: lock orq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -8330,7 +8420,7 @@ define void @atomic64_xor_acquire(ptr %a) { ; O1-LABEL: atomic64_xor_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection272: +; O1-NEXT: .Lpcsection302: ; O1-NEXT: lock xorq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -8338,7 +8428,7 @@ define void @atomic64_xor_acquire(ptr %a) { ; O2-LABEL: atomic64_xor_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection272: +; O2-NEXT: .Lpcsection302: ; O2-NEXT: lock xorq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -8346,7 +8436,7 @@ define void @atomic64_xor_acquire(ptr %a) { ; O3-LABEL: atomic64_xor_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection272: +; O3-NEXT: .Lpcsection302: ; O3-NEXT: lock xorq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -8394,19 +8484,19 @@ define void @atomic64_nand_acquire(ptr %a) { ; O1-LABEL: atomic64_nand_acquire: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection273: +; O1-NEXT: .Lpcsection303: ; O1-NEXT: movq (%rdi), %rax ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB169_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ecx -; O1-NEXT: .Lpcsection274: +; O1-NEXT: .Lpcsection304: ; O1-NEXT: notl %ecx -; O1-NEXT: .Lpcsection275: +; O1-NEXT: .Lpcsection305: ; O1-NEXT: orq $-43, %rcx -; O1-NEXT: .Lpcsection276: +; O1-NEXT: .Lpcsection306: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) -; O1-NEXT: .Lpcsection277: +; O1-NEXT: .Lpcsection307: ; O1-NEXT: jne .LBB169_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -8415,19 +8505,19 @@ define void @atomic64_nand_acquire(ptr %a) { ; O2-LABEL: atomic64_nand_acquire: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection273: +; O2-NEXT: .Lpcsection303: ; O2-NEXT: movq (%rdi), %rax ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB169_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ecx -; O2-NEXT: .Lpcsection274: +; O2-NEXT: .Lpcsection304: ; O2-NEXT: notl %ecx -; O2-NEXT: .Lpcsection275: +; O2-NEXT: .Lpcsection305: ; O2-NEXT: orq $-43, %rcx -; O2-NEXT: .Lpcsection276: +; O2-NEXT: .Lpcsection306: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) -; O2-NEXT: .Lpcsection277: +; O2-NEXT: .Lpcsection307: ; O2-NEXT: jne .LBB169_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -8436,19 +8526,19 @@ define void @atomic64_nand_acquire(ptr %a) { ; O3-LABEL: atomic64_nand_acquire: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection273: +; O3-NEXT: .Lpcsection303: ; O3-NEXT: movq (%rdi), %rax ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB169_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ecx -; O3-NEXT: .Lpcsection274: +; O3-NEXT: .Lpcsection304: ; O3-NEXT: notl %ecx -; O3-NEXT: .Lpcsection275: +; O3-NEXT: .Lpcsection305: ; O3-NEXT: orq $-43, %rcx -; O3-NEXT: .Lpcsection276: +; O3-NEXT: .Lpcsection306: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) -; O3-NEXT: .Lpcsection277: +; O3-NEXT: .Lpcsection307: ; O3-NEXT: jne .LBB169_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -8474,7 +8564,7 @@ define void @atomic64_xchg_release(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection278: +; O1-NEXT: .Lpcsection308: ; O1-NEXT: xchgq %rax, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -8483,7 +8573,7 @@ define void @atomic64_xchg_release(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection278: +; O2-NEXT: .Lpcsection308: ; O2-NEXT: xchgq %rax, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -8492,7 +8582,7 @@ define void @atomic64_xchg_release(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection278: +; O3-NEXT: .Lpcsection308: ; O3-NEXT: xchgq %rax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -8515,7 +8605,7 @@ define void @atomic64_add_release(ptr %a) { ; O1-LABEL: atomic64_add_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection279: +; O1-NEXT: .Lpcsection309: ; O1-NEXT: lock addq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -8523,7 +8613,7 @@ define void @atomic64_add_release(ptr %a) { ; O2-LABEL: atomic64_add_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection279: +; O2-NEXT: .Lpcsection309: ; O2-NEXT: lock addq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -8531,7 +8621,7 @@ define void @atomic64_add_release(ptr %a) { ; O3-LABEL: atomic64_add_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection279: +; O3-NEXT: .Lpcsection309: ; O3-NEXT: lock addq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -8554,7 +8644,7 @@ define void @atomic64_sub_release(ptr %a) { ; O1-LABEL: atomic64_sub_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection280: +; O1-NEXT: .Lpcsection310: ; O1-NEXT: lock subq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -8562,7 +8652,7 @@ define void @atomic64_sub_release(ptr %a) { ; O2-LABEL: atomic64_sub_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection280: +; O2-NEXT: .Lpcsection310: ; O2-NEXT: lock subq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -8570,7 +8660,7 @@ define void @atomic64_sub_release(ptr %a) { ; O3-LABEL: atomic64_sub_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection280: +; O3-NEXT: .Lpcsection310: ; O3-NEXT: lock subq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -8593,7 +8683,7 @@ define void @atomic64_and_release(ptr %a) { ; O1-LABEL: atomic64_and_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection281: +; O1-NEXT: .Lpcsection311: ; O1-NEXT: lock andq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -8601,7 +8691,7 @@ define void @atomic64_and_release(ptr %a) { ; O2-LABEL: atomic64_and_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection281: +; O2-NEXT: .Lpcsection311: ; O2-NEXT: lock andq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -8609,7 +8699,7 @@ define void @atomic64_and_release(ptr %a) { ; O3-LABEL: atomic64_and_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection281: +; O3-NEXT: .Lpcsection311: ; O3-NEXT: lock andq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -8632,7 +8722,7 @@ define void @atomic64_or_release(ptr %a) { ; O1-LABEL: atomic64_or_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection282: +; O1-NEXT: .Lpcsection312: ; O1-NEXT: lock orq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -8640,7 +8730,7 @@ define void @atomic64_or_release(ptr %a) { ; O2-LABEL: atomic64_or_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection282: +; O2-NEXT: .Lpcsection312: ; O2-NEXT: lock orq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -8648,7 +8738,7 @@ define void @atomic64_or_release(ptr %a) { ; O3-LABEL: atomic64_or_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection282: +; O3-NEXT: .Lpcsection312: ; O3-NEXT: lock orq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -8671,7 +8761,7 @@ define void @atomic64_xor_release(ptr %a) { ; O1-LABEL: atomic64_xor_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection283: +; O1-NEXT: .Lpcsection313: ; O1-NEXT: lock xorq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -8679,7 +8769,7 @@ define void @atomic64_xor_release(ptr %a) { ; O2-LABEL: atomic64_xor_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection283: +; O2-NEXT: .Lpcsection313: ; O2-NEXT: lock xorq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -8687,7 +8777,7 @@ define void @atomic64_xor_release(ptr %a) { ; O3-LABEL: atomic64_xor_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection283: +; O3-NEXT: .Lpcsection313: ; O3-NEXT: lock xorq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -8735,19 +8825,19 @@ define void @atomic64_nand_release(ptr %a) { ; O1-LABEL: atomic64_nand_release: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection284: +; O1-NEXT: .Lpcsection314: ; O1-NEXT: movq (%rdi), %rax ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB176_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ecx -; O1-NEXT: .Lpcsection285: +; O1-NEXT: .Lpcsection315: ; O1-NEXT: notl %ecx -; O1-NEXT: .Lpcsection286: +; O1-NEXT: .Lpcsection316: ; O1-NEXT: orq $-43, %rcx -; O1-NEXT: .Lpcsection287: +; O1-NEXT: .Lpcsection317: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) -; O1-NEXT: .Lpcsection288: +; O1-NEXT: .Lpcsection318: ; O1-NEXT: jne .LBB176_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -8756,19 +8846,19 @@ define void @atomic64_nand_release(ptr %a) { ; O2-LABEL: atomic64_nand_release: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection284: +; O2-NEXT: .Lpcsection314: ; O2-NEXT: movq (%rdi), %rax ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB176_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ecx -; O2-NEXT: .Lpcsection285: +; O2-NEXT: .Lpcsection315: ; O2-NEXT: notl %ecx -; O2-NEXT: .Lpcsection286: +; O2-NEXT: .Lpcsection316: ; O2-NEXT: orq $-43, %rcx -; O2-NEXT: .Lpcsection287: +; O2-NEXT: .Lpcsection317: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) -; O2-NEXT: .Lpcsection288: +; O2-NEXT: .Lpcsection318: ; O2-NEXT: jne .LBB176_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -8777,19 +8867,19 @@ define void @atomic64_nand_release(ptr %a) { ; O3-LABEL: atomic64_nand_release: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection284: +; O3-NEXT: .Lpcsection314: ; O3-NEXT: movq (%rdi), %rax ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB176_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ecx -; O3-NEXT: .Lpcsection285: +; O3-NEXT: .Lpcsection315: ; O3-NEXT: notl %ecx -; O3-NEXT: .Lpcsection286: +; O3-NEXT: .Lpcsection316: ; O3-NEXT: orq $-43, %rcx -; O3-NEXT: .Lpcsection287: +; O3-NEXT: .Lpcsection317: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) -; O3-NEXT: .Lpcsection288: +; O3-NEXT: .Lpcsection318: ; O3-NEXT: jne .LBB176_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -8815,7 +8905,7 @@ define void @atomic64_xchg_acq_rel(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection289: +; O1-NEXT: .Lpcsection319: ; O1-NEXT: xchgq %rax, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -8824,7 +8914,7 @@ define void @atomic64_xchg_acq_rel(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection289: +; O2-NEXT: .Lpcsection319: ; O2-NEXT: xchgq %rax, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -8833,7 +8923,7 @@ define void @atomic64_xchg_acq_rel(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection289: +; O3-NEXT: .Lpcsection319: ; O3-NEXT: xchgq %rax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -8856,7 +8946,7 @@ define void @atomic64_add_acq_rel(ptr %a) { ; O1-LABEL: atomic64_add_acq_rel: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection290: +; O1-NEXT: .Lpcsection320: ; O1-NEXT: lock addq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -8864,7 +8954,7 @@ define void @atomic64_add_acq_rel(ptr %a) { ; O2-LABEL: atomic64_add_acq_rel: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection290: +; O2-NEXT: .Lpcsection320: ; O2-NEXT: lock addq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -8872,7 +8962,7 @@ define void @atomic64_add_acq_rel(ptr %a) { ; O3-LABEL: atomic64_add_acq_rel: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection290: +; O3-NEXT: .Lpcsection320: ; O3-NEXT: lock addq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -8895,7 +8985,7 @@ define void @atomic64_sub_acq_rel(ptr %a) { ; O1-LABEL: atomic64_sub_acq_rel: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection291: +; O1-NEXT: .Lpcsection321: ; O1-NEXT: lock subq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -8903,7 +8993,7 @@ define void @atomic64_sub_acq_rel(ptr %a) { ; O2-LABEL: atomic64_sub_acq_rel: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection291: +; O2-NEXT: .Lpcsection321: ; O2-NEXT: lock subq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -8911,7 +9001,7 @@ define void @atomic64_sub_acq_rel(ptr %a) { ; O3-LABEL: atomic64_sub_acq_rel: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection291: +; O3-NEXT: .Lpcsection321: ; O3-NEXT: lock subq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -8934,7 +9024,7 @@ define void @atomic64_and_acq_rel(ptr %a) { ; O1-LABEL: atomic64_and_acq_rel: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection292: +; O1-NEXT: .Lpcsection322: ; O1-NEXT: lock andq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -8942,7 +9032,7 @@ define void @atomic64_and_acq_rel(ptr %a) { ; O2-LABEL: atomic64_and_acq_rel: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection292: +; O2-NEXT: .Lpcsection322: ; O2-NEXT: lock andq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -8950,7 +9040,7 @@ define void @atomic64_and_acq_rel(ptr %a) { ; O3-LABEL: atomic64_and_acq_rel: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection292: +; O3-NEXT: .Lpcsection322: ; O3-NEXT: lock andq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -8973,7 +9063,7 @@ define void @atomic64_or_acq_rel(ptr %a) { ; O1-LABEL: atomic64_or_acq_rel: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection293: +; O1-NEXT: .Lpcsection323: ; O1-NEXT: lock orq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -8981,7 +9071,7 @@ define void @atomic64_or_acq_rel(ptr %a) { ; O2-LABEL: atomic64_or_acq_rel: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection293: +; O2-NEXT: .Lpcsection323: ; O2-NEXT: lock orq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -8989,7 +9079,7 @@ define void @atomic64_or_acq_rel(ptr %a) { ; O3-LABEL: atomic64_or_acq_rel: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection293: +; O3-NEXT: .Lpcsection323: ; O3-NEXT: lock orq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -9012,7 +9102,7 @@ define void @atomic64_xor_acq_rel(ptr %a) { ; O1-LABEL: atomic64_xor_acq_rel: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection294: +; O1-NEXT: .Lpcsection324: ; O1-NEXT: lock xorq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -9020,7 +9110,7 @@ define void @atomic64_xor_acq_rel(ptr %a) { ; O2-LABEL: atomic64_xor_acq_rel: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection294: +; O2-NEXT: .Lpcsection324: ; O2-NEXT: lock xorq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -9028,7 +9118,7 @@ define void @atomic64_xor_acq_rel(ptr %a) { ; O3-LABEL: atomic64_xor_acq_rel: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection294: +; O3-NEXT: .Lpcsection324: ; O3-NEXT: lock xorq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -9076,19 +9166,19 @@ define void @atomic64_nand_acq_rel(ptr %a) { ; O1-LABEL: atomic64_nand_acq_rel: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection295: +; O1-NEXT: .Lpcsection325: ; O1-NEXT: movq (%rdi), %rax ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB183_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ecx -; O1-NEXT: .Lpcsection296: +; O1-NEXT: .Lpcsection326: ; O1-NEXT: notl %ecx -; O1-NEXT: .Lpcsection297: +; O1-NEXT: .Lpcsection327: ; O1-NEXT: orq $-43, %rcx -; O1-NEXT: .Lpcsection298: +; O1-NEXT: .Lpcsection328: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) -; O1-NEXT: .Lpcsection299: +; O1-NEXT: .Lpcsection329: ; O1-NEXT: jne .LBB183_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -9097,19 +9187,19 @@ define void @atomic64_nand_acq_rel(ptr %a) { ; O2-LABEL: atomic64_nand_acq_rel: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection295: +; O2-NEXT: .Lpcsection325: ; O2-NEXT: movq (%rdi), %rax ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB183_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ecx -; O2-NEXT: .Lpcsection296: +; O2-NEXT: .Lpcsection326: ; O2-NEXT: notl %ecx -; O2-NEXT: .Lpcsection297: +; O2-NEXT: .Lpcsection327: ; O2-NEXT: orq $-43, %rcx -; O2-NEXT: .Lpcsection298: +; O2-NEXT: .Lpcsection328: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) -; O2-NEXT: .Lpcsection299: +; O2-NEXT: .Lpcsection329: ; O2-NEXT: jne .LBB183_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -9118,19 +9208,19 @@ define void @atomic64_nand_acq_rel(ptr %a) { ; O3-LABEL: atomic64_nand_acq_rel: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection295: +; O3-NEXT: .Lpcsection325: ; O3-NEXT: movq (%rdi), %rax ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB183_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ecx -; O3-NEXT: .Lpcsection296: +; O3-NEXT: .Lpcsection326: ; O3-NEXT: notl %ecx -; O3-NEXT: .Lpcsection297: +; O3-NEXT: .Lpcsection327: ; O3-NEXT: orq $-43, %rcx -; O3-NEXT: .Lpcsection298: +; O3-NEXT: .Lpcsection328: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) -; O3-NEXT: .Lpcsection299: +; O3-NEXT: .Lpcsection329: ; O3-NEXT: jne .LBB183_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -9156,7 +9246,7 @@ define void @atomic64_xchg_seq_cst(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection300: +; O1-NEXT: .Lpcsection330: ; O1-NEXT: xchgq %rax, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -9165,7 +9255,7 @@ define void @atomic64_xchg_seq_cst(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection300: +; O2-NEXT: .Lpcsection330: ; O2-NEXT: xchgq %rax, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -9174,7 +9264,7 @@ define void @atomic64_xchg_seq_cst(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection300: +; O3-NEXT: .Lpcsection330: ; O3-NEXT: xchgq %rax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -9197,7 +9287,7 @@ define void @atomic64_add_seq_cst(ptr %a) { ; O1-LABEL: atomic64_add_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection301: +; O1-NEXT: .Lpcsection331: ; O1-NEXT: lock addq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -9205,7 +9295,7 @@ define void @atomic64_add_seq_cst(ptr %a) { ; O2-LABEL: atomic64_add_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection301: +; O2-NEXT: .Lpcsection331: ; O2-NEXT: lock addq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -9213,7 +9303,7 @@ define void @atomic64_add_seq_cst(ptr %a) { ; O3-LABEL: atomic64_add_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection301: +; O3-NEXT: .Lpcsection331: ; O3-NEXT: lock addq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -9236,7 +9326,7 @@ define void @atomic64_sub_seq_cst(ptr %a) { ; O1-LABEL: atomic64_sub_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection302: +; O1-NEXT: .Lpcsection332: ; O1-NEXT: lock subq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -9244,7 +9334,7 @@ define void @atomic64_sub_seq_cst(ptr %a) { ; O2-LABEL: atomic64_sub_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection302: +; O2-NEXT: .Lpcsection332: ; O2-NEXT: lock subq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -9252,7 +9342,7 @@ define void @atomic64_sub_seq_cst(ptr %a) { ; O3-LABEL: atomic64_sub_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection302: +; O3-NEXT: .Lpcsection332: ; O3-NEXT: lock subq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -9275,7 +9365,7 @@ define void @atomic64_and_seq_cst(ptr %a) { ; O1-LABEL: atomic64_and_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection303: +; O1-NEXT: .Lpcsection333: ; O1-NEXT: lock andq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -9283,7 +9373,7 @@ define void @atomic64_and_seq_cst(ptr %a) { ; O2-LABEL: atomic64_and_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection303: +; O2-NEXT: .Lpcsection333: ; O2-NEXT: lock andq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -9291,7 +9381,7 @@ define void @atomic64_and_seq_cst(ptr %a) { ; O3-LABEL: atomic64_and_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection303: +; O3-NEXT: .Lpcsection333: ; O3-NEXT: lock andq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -9314,7 +9404,7 @@ define void @atomic64_or_seq_cst(ptr %a) { ; O1-LABEL: atomic64_or_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection304: +; O1-NEXT: .Lpcsection334: ; O1-NEXT: lock orq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -9322,7 +9412,7 @@ define void @atomic64_or_seq_cst(ptr %a) { ; O2-LABEL: atomic64_or_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection304: +; O2-NEXT: .Lpcsection334: ; O2-NEXT: lock orq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -9330,7 +9420,7 @@ define void @atomic64_or_seq_cst(ptr %a) { ; O3-LABEL: atomic64_or_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection304: +; O3-NEXT: .Lpcsection334: ; O3-NEXT: lock orq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -9353,7 +9443,7 @@ define void @atomic64_xor_seq_cst(ptr %a) { ; O1-LABEL: atomic64_xor_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection305: +; O1-NEXT: .Lpcsection335: ; O1-NEXT: lock xorq $42, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -9361,7 +9451,7 @@ define void @atomic64_xor_seq_cst(ptr %a) { ; O2-LABEL: atomic64_xor_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection305: +; O2-NEXT: .Lpcsection335: ; O2-NEXT: lock xorq $42, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -9369,7 +9459,7 @@ define void @atomic64_xor_seq_cst(ptr %a) { ; O3-LABEL: atomic64_xor_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection305: +; O3-NEXT: .Lpcsection335: ; O3-NEXT: lock xorq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -9417,19 +9507,19 @@ define void @atomic64_nand_seq_cst(ptr %a) { ; O1-LABEL: atomic64_nand_seq_cst: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection306: +; O1-NEXT: .Lpcsection336: ; O1-NEXT: movq (%rdi), %rax ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB190_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ecx -; O1-NEXT: .Lpcsection307: +; O1-NEXT: .Lpcsection337: ; O1-NEXT: notl %ecx -; O1-NEXT: .Lpcsection308: +; O1-NEXT: .Lpcsection338: ; O1-NEXT: orq $-43, %rcx -; O1-NEXT: .Lpcsection309: +; O1-NEXT: .Lpcsection339: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) -; O1-NEXT: .Lpcsection310: +; O1-NEXT: .Lpcsection340: ; O1-NEXT: jne .LBB190_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -9438,19 +9528,19 @@ define void @atomic64_nand_seq_cst(ptr %a) { ; O2-LABEL: atomic64_nand_seq_cst: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection306: +; O2-NEXT: .Lpcsection336: ; O2-NEXT: movq (%rdi), %rax ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB190_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ecx -; O2-NEXT: .Lpcsection307: +; O2-NEXT: .Lpcsection337: ; O2-NEXT: notl %ecx -; O2-NEXT: .Lpcsection308: +; O2-NEXT: .Lpcsection338: ; O2-NEXT: orq $-43, %rcx -; O2-NEXT: .Lpcsection309: +; O2-NEXT: .Lpcsection339: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) -; O2-NEXT: .Lpcsection310: +; O2-NEXT: .Lpcsection340: ; O2-NEXT: jne .LBB190_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -9459,19 +9549,19 @@ define void @atomic64_nand_seq_cst(ptr %a) { ; O3-LABEL: atomic64_nand_seq_cst: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection306: +; O3-NEXT: .Lpcsection336: ; O3-NEXT: movq (%rdi), %rax ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB190_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ecx -; O3-NEXT: .Lpcsection307: +; O3-NEXT: .Lpcsection337: ; O3-NEXT: notl %ecx -; O3-NEXT: .Lpcsection308: +; O3-NEXT: .Lpcsection338: ; O3-NEXT: orq $-43, %rcx -; O3-NEXT: .Lpcsection309: +; O3-NEXT: .Lpcsection339: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) -; O3-NEXT: .Lpcsection310: +; O3-NEXT: .Lpcsection340: ; O3-NEXT: jne .LBB190_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -9509,14 +9599,17 @@ define void @atomic64_cas_monotonic(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $1, %ecx +; O1-NEXT: .Lpcsection341: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection311: +; O1-NEXT: .Lpcsection342: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) +; O1-NEXT: .Lpcsection343: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection312: +; O1-NEXT: .Lpcsection344: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) +; O1-NEXT: .Lpcsection345: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection313: +; O1-NEXT: .Lpcsection346: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -9525,14 +9618,17 @@ define void @atomic64_cas_monotonic(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $1, %ecx +; O2-NEXT: .Lpcsection341: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection311: +; O2-NEXT: .Lpcsection342: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) +; O2-NEXT: .Lpcsection343: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection312: +; O2-NEXT: .Lpcsection344: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) +; O2-NEXT: .Lpcsection345: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection313: +; O2-NEXT: .Lpcsection346: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -9541,14 +9637,17 @@ define void @atomic64_cas_monotonic(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $1, %ecx +; O3-NEXT: .Lpcsection341: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection311: +; O3-NEXT: .Lpcsection342: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; O3-NEXT: .Lpcsection343: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection312: +; O3-NEXT: .Lpcsection344: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; O3-NEXT: .Lpcsection345: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection313: +; O3-NEXT: .Lpcsection346: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -9587,14 +9686,17 @@ define void @atomic64_cas_acquire(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $1, %ecx +; O1-NEXT: .Lpcsection347: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection314: +; O1-NEXT: .Lpcsection348: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) +; O1-NEXT: .Lpcsection349: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection315: +; O1-NEXT: .Lpcsection350: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) +; O1-NEXT: .Lpcsection351: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection316: +; O1-NEXT: .Lpcsection352: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -9603,14 +9705,17 @@ define void @atomic64_cas_acquire(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $1, %ecx +; O2-NEXT: .Lpcsection347: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection314: +; O2-NEXT: .Lpcsection348: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) +; O2-NEXT: .Lpcsection349: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection315: +; O2-NEXT: .Lpcsection350: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) +; O2-NEXT: .Lpcsection351: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection316: +; O2-NEXT: .Lpcsection352: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -9619,14 +9724,17 @@ define void @atomic64_cas_acquire(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $1, %ecx +; O3-NEXT: .Lpcsection347: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection314: +; O3-NEXT: .Lpcsection348: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; O3-NEXT: .Lpcsection349: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection315: +; O3-NEXT: .Lpcsection350: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; O3-NEXT: .Lpcsection351: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection316: +; O3-NEXT: .Lpcsection352: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -9665,14 +9773,17 @@ define void @atomic64_cas_release(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $1, %ecx +; O1-NEXT: .Lpcsection353: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection317: +; O1-NEXT: .Lpcsection354: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) +; O1-NEXT: .Lpcsection355: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection318: +; O1-NEXT: .Lpcsection356: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) +; O1-NEXT: .Lpcsection357: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection319: +; O1-NEXT: .Lpcsection358: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -9681,14 +9792,17 @@ define void @atomic64_cas_release(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $1, %ecx +; O2-NEXT: .Lpcsection353: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection317: +; O2-NEXT: .Lpcsection354: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) +; O2-NEXT: .Lpcsection355: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection318: +; O2-NEXT: .Lpcsection356: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) +; O2-NEXT: .Lpcsection357: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection319: +; O2-NEXT: .Lpcsection358: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -9697,14 +9811,17 @@ define void @atomic64_cas_release(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $1, %ecx +; O3-NEXT: .Lpcsection353: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection317: +; O3-NEXT: .Lpcsection354: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; O3-NEXT: .Lpcsection355: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection318: +; O3-NEXT: .Lpcsection356: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; O3-NEXT: .Lpcsection357: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection319: +; O3-NEXT: .Lpcsection358: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -9743,14 +9860,17 @@ define void @atomic64_cas_acq_rel(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $1, %ecx +; O1-NEXT: .Lpcsection359: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection320: +; O1-NEXT: .Lpcsection360: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) +; O1-NEXT: .Lpcsection361: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection321: +; O1-NEXT: .Lpcsection362: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) +; O1-NEXT: .Lpcsection363: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection322: +; O1-NEXT: .Lpcsection364: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -9759,14 +9879,17 @@ define void @atomic64_cas_acq_rel(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $1, %ecx +; O2-NEXT: .Lpcsection359: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection320: +; O2-NEXT: .Lpcsection360: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) +; O2-NEXT: .Lpcsection361: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection321: +; O2-NEXT: .Lpcsection362: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) +; O2-NEXT: .Lpcsection363: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection322: +; O2-NEXT: .Lpcsection364: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -9775,14 +9898,17 @@ define void @atomic64_cas_acq_rel(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $1, %ecx +; O3-NEXT: .Lpcsection359: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection320: +; O3-NEXT: .Lpcsection360: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; O3-NEXT: .Lpcsection361: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection321: +; O3-NEXT: .Lpcsection362: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; O3-NEXT: .Lpcsection363: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection322: +; O3-NEXT: .Lpcsection364: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -9821,14 +9947,17 @@ define void @atomic64_cas_seq_cst(ptr %a) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax ; O1-NEXT: movl $1, %ecx +; O1-NEXT: .Lpcsection365: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection323: +; O1-NEXT: .Lpcsection366: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) +; O1-NEXT: .Lpcsection367: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection324: +; O1-NEXT: .Lpcsection368: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) +; O1-NEXT: .Lpcsection369: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection325: +; O1-NEXT: .Lpcsection370: ; O1-NEXT: lock cmpxchgq %rcx, (%rdi) ; O1-NEXT: movq $3, foo(%rip) ; O1-NEXT: retq @@ -9837,14 +9966,17 @@ define void @atomic64_cas_seq_cst(ptr %a) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax ; O2-NEXT: movl $1, %ecx +; O2-NEXT: .Lpcsection365: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection323: +; O2-NEXT: .Lpcsection366: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) +; O2-NEXT: .Lpcsection367: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection324: +; O2-NEXT: .Lpcsection368: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) +; O2-NEXT: .Lpcsection369: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection325: +; O2-NEXT: .Lpcsection370: ; O2-NEXT: lock cmpxchgq %rcx, (%rdi) ; O2-NEXT: movq $3, foo(%rip) ; O2-NEXT: retq @@ -9853,14 +9985,17 @@ define void @atomic64_cas_seq_cst(ptr %a) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax ; O3-NEXT: movl $1, %ecx +; O3-NEXT: .Lpcsection365: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection323: +; O3-NEXT: .Lpcsection366: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; O3-NEXT: .Lpcsection367: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection324: +; O3-NEXT: .Lpcsection368: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; O3-NEXT: .Lpcsection369: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection325: +; O3-NEXT: .Lpcsection370: ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) ; O3-NEXT: movq $3, foo(%rip) ; O3-NEXT: retq @@ -9887,7 +10022,7 @@ define void @atomic64_cas_seq_cst_ptr_ty(ptr %a, ptr %v1, ptr %v2) { ; O1: # %bb.0: # %entry ; O1-NEXT: movq %rsi, %rax ; O1-NEXT: movq foo(%rip), %rcx -; O1-NEXT: .Lpcsection326: +; O1-NEXT: .Lpcsection371: ; O1-NEXT: lock cmpxchgq %rdx, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -9896,7 +10031,7 @@ define void @atomic64_cas_seq_cst_ptr_ty(ptr %a, ptr %v1, ptr %v2) { ; O2: # %bb.0: # %entry ; O2-NEXT: movq %rsi, %rax ; O2-NEXT: movq foo(%rip), %rcx -; O2-NEXT: .Lpcsection326: +; O2-NEXT: .Lpcsection371: ; O2-NEXT: lock cmpxchgq %rdx, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -9905,7 +10040,7 @@ define void @atomic64_cas_seq_cst_ptr_ty(ptr %a, ptr %v1, ptr %v2) { ; O3: # %bb.0: # %entry ; O3-NEXT: movq %rsi, %rax ; O3-NEXT: movq foo(%rip), %rcx -; O3-NEXT: .Lpcsection326: +; O3-NEXT: .Lpcsection371: ; O3-NEXT: lock cmpxchgq %rdx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -9934,7 +10069,7 @@ define i64 @atomic_use_cond(ptr %a) { ; ; O1-LABEL: atomic_use_cond: ; O1: # %bb.0: # %entry -; O1-NEXT: .Lpcsection327: +; O1-NEXT: .Lpcsection372: ; O1-NEXT: lock decq (%rdi) ; O1-NEXT: jne .LBB197_2 ; O1-NEXT: # %bb.1: # %then @@ -9946,7 +10081,7 @@ define i64 @atomic_use_cond(ptr %a) { ; ; O2-LABEL: atomic_use_cond: ; O2: # %bb.0: # %entry -; O2-NEXT: .Lpcsection327: +; O2-NEXT: .Lpcsection372: ; O2-NEXT: lock decq (%rdi) ; O2-NEXT: jne .LBB197_2 ; O2-NEXT: # %bb.1: # %then @@ -9958,7 +10093,7 @@ define i64 @atomic_use_cond(ptr %a) { ; ; O3-LABEL: atomic_use_cond: ; O3: # %bb.0: # %entry -; O3-NEXT: .Lpcsection327: +; O3-NEXT: .Lpcsection372: ; O3-NEXT: lock decq (%rdi) ; O3-NEXT: jne .LBB197_2 ; O3-NEXT: # %bb.1: # %then @@ -10005,15 +10140,15 @@ define i128 @atomic128_load_unordered(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection328: +; O1-NEXT: .Lpcsection373: ; O1-NEXT: xorl %eax, %eax -; O1-NEXT: .Lpcsection329: +; O1-NEXT: .Lpcsection374: ; O1-NEXT: xorl %edx, %edx -; O1-NEXT: .Lpcsection330: +; O1-NEXT: .Lpcsection375: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection331: +; O1-NEXT: .Lpcsection376: ; O1-NEXT: xorl %ebx, %ebx -; O1-NEXT: .Lpcsection332: +; O1-NEXT: .Lpcsection377: ; O1-NEXT: lock cmpxchg16b (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: popq %rbx @@ -10026,15 +10161,15 @@ define i128 @atomic128_load_unordered(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection328: +; O2-NEXT: .Lpcsection373: ; O2-NEXT: xorl %eax, %eax -; O2-NEXT: .Lpcsection329: +; O2-NEXT: .Lpcsection374: ; O2-NEXT: xorl %edx, %edx -; O2-NEXT: .Lpcsection330: +; O2-NEXT: .Lpcsection375: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection331: +; O2-NEXT: .Lpcsection376: ; O2-NEXT: xorl %ebx, %ebx -; O2-NEXT: .Lpcsection332: +; O2-NEXT: .Lpcsection377: ; O2-NEXT: lock cmpxchg16b (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: popq %rbx @@ -10047,15 +10182,15 @@ define i128 @atomic128_load_unordered(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection328: +; O3-NEXT: .Lpcsection373: ; O3-NEXT: xorl %eax, %eax -; O3-NEXT: .Lpcsection329: +; O3-NEXT: .Lpcsection374: ; O3-NEXT: xorl %edx, %edx -; O3-NEXT: .Lpcsection330: +; O3-NEXT: .Lpcsection375: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection331: +; O3-NEXT: .Lpcsection376: ; O3-NEXT: xorl %ebx, %ebx -; O3-NEXT: .Lpcsection332: +; O3-NEXT: .Lpcsection377: ; O3-NEXT: lock cmpxchg16b (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: popq %rbx @@ -10094,15 +10229,15 @@ define i128 @atomic128_load_monotonic(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection333: +; O1-NEXT: .Lpcsection378: ; O1-NEXT: xorl %eax, %eax -; O1-NEXT: .Lpcsection334: +; O1-NEXT: .Lpcsection379: ; O1-NEXT: xorl %edx, %edx -; O1-NEXT: .Lpcsection335: +; O1-NEXT: .Lpcsection380: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection336: +; O1-NEXT: .Lpcsection381: ; O1-NEXT: xorl %ebx, %ebx -; O1-NEXT: .Lpcsection337: +; O1-NEXT: .Lpcsection382: ; O1-NEXT: lock cmpxchg16b (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: popq %rbx @@ -10115,15 +10250,15 @@ define i128 @atomic128_load_monotonic(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection333: +; O2-NEXT: .Lpcsection378: ; O2-NEXT: xorl %eax, %eax -; O2-NEXT: .Lpcsection334: +; O2-NEXT: .Lpcsection379: ; O2-NEXT: xorl %edx, %edx -; O2-NEXT: .Lpcsection335: +; O2-NEXT: .Lpcsection380: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection336: +; O2-NEXT: .Lpcsection381: ; O2-NEXT: xorl %ebx, %ebx -; O2-NEXT: .Lpcsection337: +; O2-NEXT: .Lpcsection382: ; O2-NEXT: lock cmpxchg16b (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: popq %rbx @@ -10136,15 +10271,15 @@ define i128 @atomic128_load_monotonic(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection333: +; O3-NEXT: .Lpcsection378: ; O3-NEXT: xorl %eax, %eax -; O3-NEXT: .Lpcsection334: +; O3-NEXT: .Lpcsection379: ; O3-NEXT: xorl %edx, %edx -; O3-NEXT: .Lpcsection335: +; O3-NEXT: .Lpcsection380: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection336: +; O3-NEXT: .Lpcsection381: ; O3-NEXT: xorl %ebx, %ebx -; O3-NEXT: .Lpcsection337: +; O3-NEXT: .Lpcsection382: ; O3-NEXT: lock cmpxchg16b (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: popq %rbx @@ -10183,15 +10318,15 @@ define i128 @atomic128_load_acquire(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection338: +; O1-NEXT: .Lpcsection383: ; O1-NEXT: xorl %eax, %eax -; O1-NEXT: .Lpcsection339: +; O1-NEXT: .Lpcsection384: ; O1-NEXT: xorl %edx, %edx -; O1-NEXT: .Lpcsection340: +; O1-NEXT: .Lpcsection385: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection341: +; O1-NEXT: .Lpcsection386: ; O1-NEXT: xorl %ebx, %ebx -; O1-NEXT: .Lpcsection342: +; O1-NEXT: .Lpcsection387: ; O1-NEXT: lock cmpxchg16b (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: popq %rbx @@ -10204,15 +10339,15 @@ define i128 @atomic128_load_acquire(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection338: +; O2-NEXT: .Lpcsection383: ; O2-NEXT: xorl %eax, %eax -; O2-NEXT: .Lpcsection339: +; O2-NEXT: .Lpcsection384: ; O2-NEXT: xorl %edx, %edx -; O2-NEXT: .Lpcsection340: +; O2-NEXT: .Lpcsection385: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection341: +; O2-NEXT: .Lpcsection386: ; O2-NEXT: xorl %ebx, %ebx -; O2-NEXT: .Lpcsection342: +; O2-NEXT: .Lpcsection387: ; O2-NEXT: lock cmpxchg16b (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: popq %rbx @@ -10225,15 +10360,15 @@ define i128 @atomic128_load_acquire(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection338: +; O3-NEXT: .Lpcsection383: ; O3-NEXT: xorl %eax, %eax -; O3-NEXT: .Lpcsection339: +; O3-NEXT: .Lpcsection384: ; O3-NEXT: xorl %edx, %edx -; O3-NEXT: .Lpcsection340: +; O3-NEXT: .Lpcsection385: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection341: +; O3-NEXT: .Lpcsection386: ; O3-NEXT: xorl %ebx, %ebx -; O3-NEXT: .Lpcsection342: +; O3-NEXT: .Lpcsection387: ; O3-NEXT: lock cmpxchg16b (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: popq %rbx @@ -10272,15 +10407,15 @@ define i128 @atomic128_load_seq_cst(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection343: +; O1-NEXT: .Lpcsection388: ; O1-NEXT: xorl %eax, %eax -; O1-NEXT: .Lpcsection344: +; O1-NEXT: .Lpcsection389: ; O1-NEXT: xorl %edx, %edx -; O1-NEXT: .Lpcsection345: +; O1-NEXT: .Lpcsection390: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection346: +; O1-NEXT: .Lpcsection391: ; O1-NEXT: xorl %ebx, %ebx -; O1-NEXT: .Lpcsection347: +; O1-NEXT: .Lpcsection392: ; O1-NEXT: lock cmpxchg16b (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: popq %rbx @@ -10293,15 +10428,15 @@ define i128 @atomic128_load_seq_cst(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection343: +; O2-NEXT: .Lpcsection388: ; O2-NEXT: xorl %eax, %eax -; O2-NEXT: .Lpcsection344: +; O2-NEXT: .Lpcsection389: ; O2-NEXT: xorl %edx, %edx -; O2-NEXT: .Lpcsection345: +; O2-NEXT: .Lpcsection390: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection346: +; O2-NEXT: .Lpcsection391: ; O2-NEXT: xorl %ebx, %ebx -; O2-NEXT: .Lpcsection347: +; O2-NEXT: .Lpcsection392: ; O2-NEXT: lock cmpxchg16b (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: popq %rbx @@ -10314,15 +10449,15 @@ define i128 @atomic128_load_seq_cst(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection343: +; O3-NEXT: .Lpcsection388: ; O3-NEXT: xorl %eax, %eax -; O3-NEXT: .Lpcsection344: +; O3-NEXT: .Lpcsection389: ; O3-NEXT: xorl %edx, %edx -; O3-NEXT: .Lpcsection345: +; O3-NEXT: .Lpcsection390: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection346: +; O3-NEXT: .Lpcsection391: ; O3-NEXT: xorl %ebx, %ebx -; O3-NEXT: .Lpcsection347: +; O3-NEXT: .Lpcsection392: ; O3-NEXT: lock cmpxchg16b (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: popq %rbx @@ -10347,7 +10482,7 @@ define ptr @atomic128_load_seq_cst_ptr_ty(ptr %a) { ; O1-LABEL: atomic128_load_seq_cst_ptr_ty: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection348: +; O1-NEXT: .Lpcsection393: ; O1-NEXT: movq (%rdi), %rax ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -10355,7 +10490,7 @@ define ptr @atomic128_load_seq_cst_ptr_ty(ptr %a) { ; O2-LABEL: atomic128_load_seq_cst_ptr_ty: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection348: +; O2-NEXT: .Lpcsection393: ; O2-NEXT: movq (%rdi), %rax ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -10363,7 +10498,7 @@ define ptr @atomic128_load_seq_cst_ptr_ty(ptr %a) { ; O3-LABEL: atomic128_load_seq_cst_ptr_ty: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection348: +; O3-NEXT: .Lpcsection393: ; O3-NEXT: movq (%rdi), %rax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -10420,20 +10555,20 @@ define void @atomic128_store_unordered(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection349: +; O1-NEXT: .Lpcsection394: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection350: +; O1-NEXT: .Lpcsection395: ; O1-NEXT: movq 8(%rdi), %rdx -; O1-NEXT: .Lpcsection351: +; O1-NEXT: .Lpcsection396: ; O1-NEXT: movl $42, %ebx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB203_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 -; O1-NEXT: .Lpcsection352: +; O1-NEXT: .Lpcsection397: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection353: +; O1-NEXT: .Lpcsection398: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection354: +; O1-NEXT: .Lpcsection399: ; O1-NEXT: jne .LBB203_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -10447,20 +10582,20 @@ define void @atomic128_store_unordered(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection349: +; O2-NEXT: .Lpcsection394: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection350: +; O2-NEXT: .Lpcsection395: ; O2-NEXT: movq 8(%rdi), %rdx -; O2-NEXT: .Lpcsection351: +; O2-NEXT: .Lpcsection396: ; O2-NEXT: movl $42, %ebx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB203_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 -; O2-NEXT: .Lpcsection352: +; O2-NEXT: .Lpcsection397: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection353: +; O2-NEXT: .Lpcsection398: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection354: +; O2-NEXT: .Lpcsection399: ; O2-NEXT: jne .LBB203_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -10474,20 +10609,20 @@ define void @atomic128_store_unordered(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection349: +; O3-NEXT: .Lpcsection394: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection350: +; O3-NEXT: .Lpcsection395: ; O3-NEXT: movq 8(%rdi), %rdx -; O3-NEXT: .Lpcsection351: +; O3-NEXT: .Lpcsection396: ; O3-NEXT: movl $42, %ebx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB203_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 -; O3-NEXT: .Lpcsection352: +; O3-NEXT: .Lpcsection397: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection353: +; O3-NEXT: .Lpcsection398: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection354: +; O3-NEXT: .Lpcsection399: ; O3-NEXT: jne .LBB203_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -10547,20 +10682,20 @@ define void @atomic128_store_monotonic(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection355: +; O1-NEXT: .Lpcsection400: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection356: +; O1-NEXT: .Lpcsection401: ; O1-NEXT: movq 8(%rdi), %rdx -; O1-NEXT: .Lpcsection357: +; O1-NEXT: .Lpcsection402: ; O1-NEXT: movl $42, %ebx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB204_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 -; O1-NEXT: .Lpcsection358: +; O1-NEXT: .Lpcsection403: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection359: +; O1-NEXT: .Lpcsection404: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection360: +; O1-NEXT: .Lpcsection405: ; O1-NEXT: jne .LBB204_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -10574,20 +10709,20 @@ define void @atomic128_store_monotonic(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection355: +; O2-NEXT: .Lpcsection400: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection356: +; O2-NEXT: .Lpcsection401: ; O2-NEXT: movq 8(%rdi), %rdx -; O2-NEXT: .Lpcsection357: +; O2-NEXT: .Lpcsection402: ; O2-NEXT: movl $42, %ebx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB204_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 -; O2-NEXT: .Lpcsection358: +; O2-NEXT: .Lpcsection403: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection359: +; O2-NEXT: .Lpcsection404: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection360: +; O2-NEXT: .Lpcsection405: ; O2-NEXT: jne .LBB204_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -10601,20 +10736,20 @@ define void @atomic128_store_monotonic(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection355: +; O3-NEXT: .Lpcsection400: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection356: +; O3-NEXT: .Lpcsection401: ; O3-NEXT: movq 8(%rdi), %rdx -; O3-NEXT: .Lpcsection357: +; O3-NEXT: .Lpcsection402: ; O3-NEXT: movl $42, %ebx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB204_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 -; O3-NEXT: .Lpcsection358: +; O3-NEXT: .Lpcsection403: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection359: +; O3-NEXT: .Lpcsection404: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection360: +; O3-NEXT: .Lpcsection405: ; O3-NEXT: jne .LBB204_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -10674,20 +10809,20 @@ define void @atomic128_store_release(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection361: +; O1-NEXT: .Lpcsection406: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection362: +; O1-NEXT: .Lpcsection407: ; O1-NEXT: movq 8(%rdi), %rdx -; O1-NEXT: .Lpcsection363: +; O1-NEXT: .Lpcsection408: ; O1-NEXT: movl $42, %ebx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB205_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 -; O1-NEXT: .Lpcsection364: +; O1-NEXT: .Lpcsection409: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection365: +; O1-NEXT: .Lpcsection410: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection366: +; O1-NEXT: .Lpcsection411: ; O1-NEXT: jne .LBB205_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -10701,20 +10836,20 @@ define void @atomic128_store_release(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection361: +; O2-NEXT: .Lpcsection406: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection362: +; O2-NEXT: .Lpcsection407: ; O2-NEXT: movq 8(%rdi), %rdx -; O2-NEXT: .Lpcsection363: +; O2-NEXT: .Lpcsection408: ; O2-NEXT: movl $42, %ebx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB205_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 -; O2-NEXT: .Lpcsection364: +; O2-NEXT: .Lpcsection409: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection365: +; O2-NEXT: .Lpcsection410: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection366: +; O2-NEXT: .Lpcsection411: ; O2-NEXT: jne .LBB205_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -10728,20 +10863,20 @@ define void @atomic128_store_release(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection361: +; O3-NEXT: .Lpcsection406: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection362: +; O3-NEXT: .Lpcsection407: ; O3-NEXT: movq 8(%rdi), %rdx -; O3-NEXT: .Lpcsection363: +; O3-NEXT: .Lpcsection408: ; O3-NEXT: movl $42, %ebx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB205_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 -; O3-NEXT: .Lpcsection364: +; O3-NEXT: .Lpcsection409: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection365: +; O3-NEXT: .Lpcsection410: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection366: +; O3-NEXT: .Lpcsection411: ; O3-NEXT: jne .LBB205_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -10801,20 +10936,20 @@ define void @atomic128_store_seq_cst(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection367: +; O1-NEXT: .Lpcsection412: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection368: +; O1-NEXT: .Lpcsection413: ; O1-NEXT: movq 8(%rdi), %rdx -; O1-NEXT: .Lpcsection369: +; O1-NEXT: .Lpcsection414: ; O1-NEXT: movl $42, %ebx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB206_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 -; O1-NEXT: .Lpcsection370: +; O1-NEXT: .Lpcsection415: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection371: +; O1-NEXT: .Lpcsection416: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection372: +; O1-NEXT: .Lpcsection417: ; O1-NEXT: jne .LBB206_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -10828,20 +10963,20 @@ define void @atomic128_store_seq_cst(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection367: +; O2-NEXT: .Lpcsection412: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection368: +; O2-NEXT: .Lpcsection413: ; O2-NEXT: movq 8(%rdi), %rdx -; O2-NEXT: .Lpcsection369: +; O2-NEXT: .Lpcsection414: ; O2-NEXT: movl $42, %ebx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB206_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 -; O2-NEXT: .Lpcsection370: +; O2-NEXT: .Lpcsection415: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection371: +; O2-NEXT: .Lpcsection416: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection372: +; O2-NEXT: .Lpcsection417: ; O2-NEXT: jne .LBB206_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -10855,20 +10990,20 @@ define void @atomic128_store_seq_cst(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection367: +; O3-NEXT: .Lpcsection412: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection368: +; O3-NEXT: .Lpcsection413: ; O3-NEXT: movq 8(%rdi), %rdx -; O3-NEXT: .Lpcsection369: +; O3-NEXT: .Lpcsection414: ; O3-NEXT: movl $42, %ebx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB206_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 -; O3-NEXT: .Lpcsection370: +; O3-NEXT: .Lpcsection415: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection371: +; O3-NEXT: .Lpcsection416: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection372: +; O3-NEXT: .Lpcsection417: ; O3-NEXT: jne .LBB206_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -10894,7 +11029,7 @@ define void @atomic128_store_seq_cst_ptr_ty(ptr %a, ptr %v) { ; O1-LABEL: atomic128_store_seq_cst_ptr_ty: ; O1: # %bb.0: # %entry ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection373: +; O1-NEXT: .Lpcsection418: ; O1-NEXT: xchgq %rsi, (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: retq @@ -10902,7 +11037,7 @@ define void @atomic128_store_seq_cst_ptr_ty(ptr %a, ptr %v) { ; O2-LABEL: atomic128_store_seq_cst_ptr_ty: ; O2: # %bb.0: # %entry ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection373: +; O2-NEXT: .Lpcsection418: ; O2-NEXT: xchgq %rsi, (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: retq @@ -10910,7 +11045,7 @@ define void @atomic128_store_seq_cst_ptr_ty(ptr %a, ptr %v) { ; O3-LABEL: atomic128_store_seq_cst_ptr_ty: ; O3: # %bb.0: # %entry ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection373: +; O3-NEXT: .Lpcsection418: ; O3-NEXT: xchgq %rsi, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq @@ -10967,20 +11102,20 @@ define void @atomic128_xchg_monotonic(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection374: +; O1-NEXT: .Lpcsection419: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection375: +; O1-NEXT: .Lpcsection420: ; O1-NEXT: movq 8(%rdi), %rdx -; O1-NEXT: .Lpcsection376: +; O1-NEXT: .Lpcsection421: ; O1-NEXT: movl $42, %ebx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB208_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 -; O1-NEXT: .Lpcsection377: +; O1-NEXT: .Lpcsection422: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection378: +; O1-NEXT: .Lpcsection423: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection379: +; O1-NEXT: .Lpcsection424: ; O1-NEXT: jne .LBB208_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -10994,20 +11129,20 @@ define void @atomic128_xchg_monotonic(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection374: +; O2-NEXT: .Lpcsection419: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection375: +; O2-NEXT: .Lpcsection420: ; O2-NEXT: movq 8(%rdi), %rdx -; O2-NEXT: .Lpcsection376: +; O2-NEXT: .Lpcsection421: ; O2-NEXT: movl $42, %ebx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB208_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 -; O2-NEXT: .Lpcsection377: +; O2-NEXT: .Lpcsection422: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection378: +; O2-NEXT: .Lpcsection423: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection379: +; O2-NEXT: .Lpcsection424: ; O2-NEXT: jne .LBB208_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -11021,20 +11156,20 @@ define void @atomic128_xchg_monotonic(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection374: +; O3-NEXT: .Lpcsection419: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection375: +; O3-NEXT: .Lpcsection420: ; O3-NEXT: movq 8(%rdi), %rdx -; O3-NEXT: .Lpcsection376: +; O3-NEXT: .Lpcsection421: ; O3-NEXT: movl $42, %ebx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB208_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 -; O3-NEXT: .Lpcsection377: +; O3-NEXT: .Lpcsection422: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection378: +; O3-NEXT: .Lpcsection423: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection379: +; O3-NEXT: .Lpcsection424: ; O3-NEXT: jne .LBB208_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -11094,22 +11229,22 @@ define void @atomic128_add_monotonic(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection380: +; O1-NEXT: .Lpcsection425: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection381: +; O1-NEXT: .Lpcsection426: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB209_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection382: +; O1-NEXT: .Lpcsection427: ; O1-NEXT: addq $42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection383: +; O1-NEXT: .Lpcsection428: ; O1-NEXT: adcq $0, %rcx -; O1-NEXT: .Lpcsection384: +; O1-NEXT: .Lpcsection429: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection385: +; O1-NEXT: .Lpcsection430: ; O1-NEXT: jne .LBB209_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -11123,22 +11258,22 @@ define void @atomic128_add_monotonic(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection380: +; O2-NEXT: .Lpcsection425: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection381: +; O2-NEXT: .Lpcsection426: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB209_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection382: +; O2-NEXT: .Lpcsection427: ; O2-NEXT: addq $42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection383: +; O2-NEXT: .Lpcsection428: ; O2-NEXT: adcq $0, %rcx -; O2-NEXT: .Lpcsection384: +; O2-NEXT: .Lpcsection429: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection385: +; O2-NEXT: .Lpcsection430: ; O2-NEXT: jne .LBB209_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -11152,22 +11287,22 @@ define void @atomic128_add_monotonic(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection380: +; O3-NEXT: .Lpcsection425: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection381: +; O3-NEXT: .Lpcsection426: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB209_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection382: +; O3-NEXT: .Lpcsection427: ; O3-NEXT: addq $42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection383: +; O3-NEXT: .Lpcsection428: ; O3-NEXT: adcq $0, %rcx -; O3-NEXT: .Lpcsection384: +; O3-NEXT: .Lpcsection429: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection385: +; O3-NEXT: .Lpcsection430: ; O3-NEXT: jne .LBB209_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -11227,22 +11362,22 @@ define void @atomic128_sub_monotonic(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection386: +; O1-NEXT: .Lpcsection431: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection387: +; O1-NEXT: .Lpcsection432: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB210_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection388: +; O1-NEXT: .Lpcsection433: ; O1-NEXT: addq $-42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection389: +; O1-NEXT: .Lpcsection434: ; O1-NEXT: adcq $-1, %rcx -; O1-NEXT: .Lpcsection390: +; O1-NEXT: .Lpcsection435: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection391: +; O1-NEXT: .Lpcsection436: ; O1-NEXT: jne .LBB210_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -11256,22 +11391,22 @@ define void @atomic128_sub_monotonic(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection386: +; O2-NEXT: .Lpcsection431: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection387: +; O2-NEXT: .Lpcsection432: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB210_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection388: +; O2-NEXT: .Lpcsection433: ; O2-NEXT: addq $-42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection389: +; O2-NEXT: .Lpcsection434: ; O2-NEXT: adcq $-1, %rcx -; O2-NEXT: .Lpcsection390: +; O2-NEXT: .Lpcsection435: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection391: +; O2-NEXT: .Lpcsection436: ; O2-NEXT: jne .LBB210_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -11285,22 +11420,22 @@ define void @atomic128_sub_monotonic(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection386: +; O3-NEXT: .Lpcsection431: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection387: +; O3-NEXT: .Lpcsection432: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB210_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection388: +; O3-NEXT: .Lpcsection433: ; O3-NEXT: addq $-42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection389: +; O3-NEXT: .Lpcsection434: ; O3-NEXT: adcq $-1, %rcx -; O3-NEXT: .Lpcsection390: +; O3-NEXT: .Lpcsection435: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection391: +; O3-NEXT: .Lpcsection436: ; O3-NEXT: jne .LBB210_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -11362,21 +11497,21 @@ define void @atomic128_and_monotonic(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection392: +; O1-NEXT: .Lpcsection437: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection393: +; O1-NEXT: .Lpcsection438: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB211_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ebx -; O1-NEXT: .Lpcsection394: +; O1-NEXT: .Lpcsection439: ; O1-NEXT: andl $42, %ebx -; O1-NEXT: .Lpcsection395: +; O1-NEXT: .Lpcsection440: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection396: +; O1-NEXT: .Lpcsection441: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection397: +; O1-NEXT: .Lpcsection442: ; O1-NEXT: jne .LBB211_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -11390,21 +11525,21 @@ define void @atomic128_and_monotonic(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection392: +; O2-NEXT: .Lpcsection437: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection393: +; O2-NEXT: .Lpcsection438: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB211_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ebx -; O2-NEXT: .Lpcsection394: +; O2-NEXT: .Lpcsection439: ; O2-NEXT: andl $42, %ebx -; O2-NEXT: .Lpcsection395: +; O2-NEXT: .Lpcsection440: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection396: +; O2-NEXT: .Lpcsection441: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection397: +; O2-NEXT: .Lpcsection442: ; O2-NEXT: jne .LBB211_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -11418,21 +11553,21 @@ define void @atomic128_and_monotonic(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection392: +; O3-NEXT: .Lpcsection437: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection393: +; O3-NEXT: .Lpcsection438: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB211_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ebx -; O3-NEXT: .Lpcsection394: +; O3-NEXT: .Lpcsection439: ; O3-NEXT: andl $42, %ebx -; O3-NEXT: .Lpcsection395: +; O3-NEXT: .Lpcsection440: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection396: +; O3-NEXT: .Lpcsection441: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection397: +; O3-NEXT: .Lpcsection442: ; O3-NEXT: jne .LBB211_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -11490,20 +11625,20 @@ define void @atomic128_or_monotonic(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection398: +; O1-NEXT: .Lpcsection443: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection399: +; O1-NEXT: .Lpcsection444: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB212_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection400: +; O1-NEXT: .Lpcsection445: ; O1-NEXT: orq $42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection401: +; O1-NEXT: .Lpcsection446: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection402: +; O1-NEXT: .Lpcsection447: ; O1-NEXT: jne .LBB212_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -11517,20 +11652,20 @@ define void @atomic128_or_monotonic(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection398: +; O2-NEXT: .Lpcsection443: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection399: +; O2-NEXT: .Lpcsection444: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB212_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection400: +; O2-NEXT: .Lpcsection445: ; O2-NEXT: orq $42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection401: +; O2-NEXT: .Lpcsection446: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection402: +; O2-NEXT: .Lpcsection447: ; O2-NEXT: jne .LBB212_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -11544,20 +11679,20 @@ define void @atomic128_or_monotonic(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection398: +; O3-NEXT: .Lpcsection443: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection399: +; O3-NEXT: .Lpcsection444: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB212_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection400: +; O3-NEXT: .Lpcsection445: ; O3-NEXT: orq $42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection401: +; O3-NEXT: .Lpcsection446: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection402: +; O3-NEXT: .Lpcsection447: ; O3-NEXT: jne .LBB212_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -11615,20 +11750,20 @@ define void @atomic128_xor_monotonic(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection403: +; O1-NEXT: .Lpcsection448: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection404: +; O1-NEXT: .Lpcsection449: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB213_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection405: +; O1-NEXT: .Lpcsection450: ; O1-NEXT: xorq $42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection406: +; O1-NEXT: .Lpcsection451: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection407: +; O1-NEXT: .Lpcsection452: ; O1-NEXT: jne .LBB213_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -11642,20 +11777,20 @@ define void @atomic128_xor_monotonic(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection403: +; O2-NEXT: .Lpcsection448: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection404: +; O2-NEXT: .Lpcsection449: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB213_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection405: +; O2-NEXT: .Lpcsection450: ; O2-NEXT: xorq $42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection406: +; O2-NEXT: .Lpcsection451: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection407: +; O2-NEXT: .Lpcsection452: ; O2-NEXT: jne .LBB213_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -11669,20 +11804,20 @@ define void @atomic128_xor_monotonic(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection403: +; O3-NEXT: .Lpcsection448: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection404: +; O3-NEXT: .Lpcsection449: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB213_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection405: +; O3-NEXT: .Lpcsection450: ; O3-NEXT: xorq $42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection406: +; O3-NEXT: .Lpcsection451: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection407: +; O3-NEXT: .Lpcsection452: ; O3-NEXT: jne .LBB213_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -11746,23 +11881,23 @@ define void @atomic128_nand_monotonic(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection408: +; O1-NEXT: .Lpcsection453: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection409: +; O1-NEXT: .Lpcsection454: ; O1-NEXT: movq 8(%rdi), %rdx -; O1-NEXT: .Lpcsection410: +; O1-NEXT: .Lpcsection455: ; O1-NEXT: movq $-1, %rcx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB214_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ebx -; O1-NEXT: .Lpcsection411: +; O1-NEXT: .Lpcsection456: ; O1-NEXT: notl %ebx -; O1-NEXT: .Lpcsection412: +; O1-NEXT: .Lpcsection457: ; O1-NEXT: orq $-43, %rbx -; O1-NEXT: .Lpcsection413: +; O1-NEXT: .Lpcsection458: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection414: +; O1-NEXT: .Lpcsection459: ; O1-NEXT: jne .LBB214_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -11776,23 +11911,23 @@ define void @atomic128_nand_monotonic(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection408: +; O2-NEXT: .Lpcsection453: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection409: +; O2-NEXT: .Lpcsection454: ; O2-NEXT: movq 8(%rdi), %rdx -; O2-NEXT: .Lpcsection410: +; O2-NEXT: .Lpcsection455: ; O2-NEXT: movq $-1, %rcx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB214_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ebx -; O2-NEXT: .Lpcsection411: +; O2-NEXT: .Lpcsection456: ; O2-NEXT: notl %ebx -; O2-NEXT: .Lpcsection412: +; O2-NEXT: .Lpcsection457: ; O2-NEXT: orq $-43, %rbx -; O2-NEXT: .Lpcsection413: +; O2-NEXT: .Lpcsection458: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection414: +; O2-NEXT: .Lpcsection459: ; O2-NEXT: jne .LBB214_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -11806,23 +11941,23 @@ define void @atomic128_nand_monotonic(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection408: +; O3-NEXT: .Lpcsection453: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection409: +; O3-NEXT: .Lpcsection454: ; O3-NEXT: movq 8(%rdi), %rdx -; O3-NEXT: .Lpcsection410: +; O3-NEXT: .Lpcsection455: ; O3-NEXT: movq $-1, %rcx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB214_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ebx -; O3-NEXT: .Lpcsection411: +; O3-NEXT: .Lpcsection456: ; O3-NEXT: notl %ebx -; O3-NEXT: .Lpcsection412: +; O3-NEXT: .Lpcsection457: ; O3-NEXT: orq $-43, %rbx -; O3-NEXT: .Lpcsection413: +; O3-NEXT: .Lpcsection458: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection414: +; O3-NEXT: .Lpcsection459: ; O3-NEXT: jne .LBB214_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -11882,20 +12017,20 @@ define void @atomic128_xchg_acquire(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection415: +; O1-NEXT: .Lpcsection460: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection416: +; O1-NEXT: .Lpcsection461: ; O1-NEXT: movq 8(%rdi), %rdx -; O1-NEXT: .Lpcsection417: +; O1-NEXT: .Lpcsection462: ; O1-NEXT: movl $42, %ebx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB215_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 -; O1-NEXT: .Lpcsection418: +; O1-NEXT: .Lpcsection463: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection419: +; O1-NEXT: .Lpcsection464: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection420: +; O1-NEXT: .Lpcsection465: ; O1-NEXT: jne .LBB215_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -11909,20 +12044,20 @@ define void @atomic128_xchg_acquire(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection415: +; O2-NEXT: .Lpcsection460: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection416: +; O2-NEXT: .Lpcsection461: ; O2-NEXT: movq 8(%rdi), %rdx -; O2-NEXT: .Lpcsection417: +; O2-NEXT: .Lpcsection462: ; O2-NEXT: movl $42, %ebx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB215_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 -; O2-NEXT: .Lpcsection418: +; O2-NEXT: .Lpcsection463: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection419: +; O2-NEXT: .Lpcsection464: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection420: +; O2-NEXT: .Lpcsection465: ; O2-NEXT: jne .LBB215_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -11936,20 +12071,20 @@ define void @atomic128_xchg_acquire(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection415: +; O3-NEXT: .Lpcsection460: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection416: +; O3-NEXT: .Lpcsection461: ; O3-NEXT: movq 8(%rdi), %rdx -; O3-NEXT: .Lpcsection417: +; O3-NEXT: .Lpcsection462: ; O3-NEXT: movl $42, %ebx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB215_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 -; O3-NEXT: .Lpcsection418: +; O3-NEXT: .Lpcsection463: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection419: +; O3-NEXT: .Lpcsection464: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection420: +; O3-NEXT: .Lpcsection465: ; O3-NEXT: jne .LBB215_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -12009,22 +12144,22 @@ define void @atomic128_add_acquire(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection421: +; O1-NEXT: .Lpcsection466: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection422: +; O1-NEXT: .Lpcsection467: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB216_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection423: +; O1-NEXT: .Lpcsection468: ; O1-NEXT: addq $42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection424: +; O1-NEXT: .Lpcsection469: ; O1-NEXT: adcq $0, %rcx -; O1-NEXT: .Lpcsection425: +; O1-NEXT: .Lpcsection470: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection426: +; O1-NEXT: .Lpcsection471: ; O1-NEXT: jne .LBB216_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -12038,22 +12173,22 @@ define void @atomic128_add_acquire(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection421: +; O2-NEXT: .Lpcsection466: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection422: +; O2-NEXT: .Lpcsection467: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB216_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection423: +; O2-NEXT: .Lpcsection468: ; O2-NEXT: addq $42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection424: +; O2-NEXT: .Lpcsection469: ; O2-NEXT: adcq $0, %rcx -; O2-NEXT: .Lpcsection425: +; O2-NEXT: .Lpcsection470: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection426: +; O2-NEXT: .Lpcsection471: ; O2-NEXT: jne .LBB216_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -12067,22 +12202,22 @@ define void @atomic128_add_acquire(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection421: +; O3-NEXT: .Lpcsection466: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection422: +; O3-NEXT: .Lpcsection467: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB216_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection423: +; O3-NEXT: .Lpcsection468: ; O3-NEXT: addq $42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection424: +; O3-NEXT: .Lpcsection469: ; O3-NEXT: adcq $0, %rcx -; O3-NEXT: .Lpcsection425: +; O3-NEXT: .Lpcsection470: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection426: +; O3-NEXT: .Lpcsection471: ; O3-NEXT: jne .LBB216_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -12142,22 +12277,22 @@ define void @atomic128_sub_acquire(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection427: +; O1-NEXT: .Lpcsection472: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection428: +; O1-NEXT: .Lpcsection473: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB217_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection429: +; O1-NEXT: .Lpcsection474: ; O1-NEXT: addq $-42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection430: +; O1-NEXT: .Lpcsection475: ; O1-NEXT: adcq $-1, %rcx -; O1-NEXT: .Lpcsection431: +; O1-NEXT: .Lpcsection476: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection432: +; O1-NEXT: .Lpcsection477: ; O1-NEXT: jne .LBB217_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -12171,22 +12306,22 @@ define void @atomic128_sub_acquire(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection427: +; O2-NEXT: .Lpcsection472: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection428: +; O2-NEXT: .Lpcsection473: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB217_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection429: +; O2-NEXT: .Lpcsection474: ; O2-NEXT: addq $-42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection430: +; O2-NEXT: .Lpcsection475: ; O2-NEXT: adcq $-1, %rcx -; O2-NEXT: .Lpcsection431: +; O2-NEXT: .Lpcsection476: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection432: +; O2-NEXT: .Lpcsection477: ; O2-NEXT: jne .LBB217_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -12200,22 +12335,22 @@ define void @atomic128_sub_acquire(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection427: +; O3-NEXT: .Lpcsection472: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection428: +; O3-NEXT: .Lpcsection473: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB217_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection429: +; O3-NEXT: .Lpcsection474: ; O3-NEXT: addq $-42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection430: +; O3-NEXT: .Lpcsection475: ; O3-NEXT: adcq $-1, %rcx -; O3-NEXT: .Lpcsection431: +; O3-NEXT: .Lpcsection476: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection432: +; O3-NEXT: .Lpcsection477: ; O3-NEXT: jne .LBB217_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -12277,21 +12412,21 @@ define void @atomic128_and_acquire(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection433: +; O1-NEXT: .Lpcsection478: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection434: +; O1-NEXT: .Lpcsection479: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB218_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ebx -; O1-NEXT: .Lpcsection435: +; O1-NEXT: .Lpcsection480: ; O1-NEXT: andl $42, %ebx -; O1-NEXT: .Lpcsection436: +; O1-NEXT: .Lpcsection481: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection437: +; O1-NEXT: .Lpcsection482: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection438: +; O1-NEXT: .Lpcsection483: ; O1-NEXT: jne .LBB218_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -12305,21 +12440,21 @@ define void @atomic128_and_acquire(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection433: +; O2-NEXT: .Lpcsection478: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection434: +; O2-NEXT: .Lpcsection479: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB218_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ebx -; O2-NEXT: .Lpcsection435: +; O2-NEXT: .Lpcsection480: ; O2-NEXT: andl $42, %ebx -; O2-NEXT: .Lpcsection436: +; O2-NEXT: .Lpcsection481: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection437: +; O2-NEXT: .Lpcsection482: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection438: +; O2-NEXT: .Lpcsection483: ; O2-NEXT: jne .LBB218_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -12333,21 +12468,21 @@ define void @atomic128_and_acquire(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection433: +; O3-NEXT: .Lpcsection478: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection434: +; O3-NEXT: .Lpcsection479: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB218_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ebx -; O3-NEXT: .Lpcsection435: +; O3-NEXT: .Lpcsection480: ; O3-NEXT: andl $42, %ebx -; O3-NEXT: .Lpcsection436: +; O3-NEXT: .Lpcsection481: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection437: +; O3-NEXT: .Lpcsection482: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection438: +; O3-NEXT: .Lpcsection483: ; O3-NEXT: jne .LBB218_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -12405,20 +12540,20 @@ define void @atomic128_or_acquire(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection439: +; O1-NEXT: .Lpcsection484: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection440: +; O1-NEXT: .Lpcsection485: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB219_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection441: +; O1-NEXT: .Lpcsection486: ; O1-NEXT: orq $42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection442: +; O1-NEXT: .Lpcsection487: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection443: +; O1-NEXT: .Lpcsection488: ; O1-NEXT: jne .LBB219_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -12432,20 +12567,20 @@ define void @atomic128_or_acquire(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection439: +; O2-NEXT: .Lpcsection484: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection440: +; O2-NEXT: .Lpcsection485: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB219_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection441: +; O2-NEXT: .Lpcsection486: ; O2-NEXT: orq $42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection442: +; O2-NEXT: .Lpcsection487: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection443: +; O2-NEXT: .Lpcsection488: ; O2-NEXT: jne .LBB219_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -12459,20 +12594,20 @@ define void @atomic128_or_acquire(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection439: +; O3-NEXT: .Lpcsection484: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection440: +; O3-NEXT: .Lpcsection485: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB219_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection441: +; O3-NEXT: .Lpcsection486: ; O3-NEXT: orq $42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection442: +; O3-NEXT: .Lpcsection487: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection443: +; O3-NEXT: .Lpcsection488: ; O3-NEXT: jne .LBB219_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -12530,20 +12665,20 @@ define void @atomic128_xor_acquire(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection444: +; O1-NEXT: .Lpcsection489: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection445: +; O1-NEXT: .Lpcsection490: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB220_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection446: +; O1-NEXT: .Lpcsection491: ; O1-NEXT: xorq $42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection447: +; O1-NEXT: .Lpcsection492: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection448: +; O1-NEXT: .Lpcsection493: ; O1-NEXT: jne .LBB220_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -12557,20 +12692,20 @@ define void @atomic128_xor_acquire(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection444: +; O2-NEXT: .Lpcsection489: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection445: +; O2-NEXT: .Lpcsection490: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB220_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection446: +; O2-NEXT: .Lpcsection491: ; O2-NEXT: xorq $42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection447: +; O2-NEXT: .Lpcsection492: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection448: +; O2-NEXT: .Lpcsection493: ; O2-NEXT: jne .LBB220_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -12584,20 +12719,20 @@ define void @atomic128_xor_acquire(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection444: +; O3-NEXT: .Lpcsection489: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection445: +; O3-NEXT: .Lpcsection490: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB220_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection446: +; O3-NEXT: .Lpcsection491: ; O3-NEXT: xorq $42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection447: +; O3-NEXT: .Lpcsection492: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection448: +; O3-NEXT: .Lpcsection493: ; O3-NEXT: jne .LBB220_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -12661,23 +12796,23 @@ define void @atomic128_nand_acquire(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection449: +; O1-NEXT: .Lpcsection494: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection450: +; O1-NEXT: .Lpcsection495: ; O1-NEXT: movq 8(%rdi), %rdx -; O1-NEXT: .Lpcsection451: +; O1-NEXT: .Lpcsection496: ; O1-NEXT: movq $-1, %rcx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB221_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ebx -; O1-NEXT: .Lpcsection452: +; O1-NEXT: .Lpcsection497: ; O1-NEXT: notl %ebx -; O1-NEXT: .Lpcsection453: +; O1-NEXT: .Lpcsection498: ; O1-NEXT: orq $-43, %rbx -; O1-NEXT: .Lpcsection454: +; O1-NEXT: .Lpcsection499: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection455: +; O1-NEXT: .Lpcsection500: ; O1-NEXT: jne .LBB221_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -12691,23 +12826,23 @@ define void @atomic128_nand_acquire(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection449: +; O2-NEXT: .Lpcsection494: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection450: +; O2-NEXT: .Lpcsection495: ; O2-NEXT: movq 8(%rdi), %rdx -; O2-NEXT: .Lpcsection451: +; O2-NEXT: .Lpcsection496: ; O2-NEXT: movq $-1, %rcx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB221_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ebx -; O2-NEXT: .Lpcsection452: +; O2-NEXT: .Lpcsection497: ; O2-NEXT: notl %ebx -; O2-NEXT: .Lpcsection453: +; O2-NEXT: .Lpcsection498: ; O2-NEXT: orq $-43, %rbx -; O2-NEXT: .Lpcsection454: +; O2-NEXT: .Lpcsection499: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection455: +; O2-NEXT: .Lpcsection500: ; O2-NEXT: jne .LBB221_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -12721,23 +12856,23 @@ define void @atomic128_nand_acquire(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection449: +; O3-NEXT: .Lpcsection494: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection450: +; O3-NEXT: .Lpcsection495: ; O3-NEXT: movq 8(%rdi), %rdx -; O3-NEXT: .Lpcsection451: +; O3-NEXT: .Lpcsection496: ; O3-NEXT: movq $-1, %rcx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB221_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ebx -; O3-NEXT: .Lpcsection452: +; O3-NEXT: .Lpcsection497: ; O3-NEXT: notl %ebx -; O3-NEXT: .Lpcsection453: +; O3-NEXT: .Lpcsection498: ; O3-NEXT: orq $-43, %rbx -; O3-NEXT: .Lpcsection454: +; O3-NEXT: .Lpcsection499: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection455: +; O3-NEXT: .Lpcsection500: ; O3-NEXT: jne .LBB221_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -12797,20 +12932,20 @@ define void @atomic128_xchg_release(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection456: +; O1-NEXT: .Lpcsection501: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection457: +; O1-NEXT: .Lpcsection502: ; O1-NEXT: movq 8(%rdi), %rdx -; O1-NEXT: .Lpcsection458: +; O1-NEXT: .Lpcsection503: ; O1-NEXT: movl $42, %ebx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB222_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 -; O1-NEXT: .Lpcsection459: +; O1-NEXT: .Lpcsection504: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection460: +; O1-NEXT: .Lpcsection505: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection461: +; O1-NEXT: .Lpcsection506: ; O1-NEXT: jne .LBB222_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -12824,20 +12959,20 @@ define void @atomic128_xchg_release(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection456: +; O2-NEXT: .Lpcsection501: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection457: +; O2-NEXT: .Lpcsection502: ; O2-NEXT: movq 8(%rdi), %rdx -; O2-NEXT: .Lpcsection458: +; O2-NEXT: .Lpcsection503: ; O2-NEXT: movl $42, %ebx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB222_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 -; O2-NEXT: .Lpcsection459: +; O2-NEXT: .Lpcsection504: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection460: +; O2-NEXT: .Lpcsection505: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection461: +; O2-NEXT: .Lpcsection506: ; O2-NEXT: jne .LBB222_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -12851,20 +12986,20 @@ define void @atomic128_xchg_release(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection456: +; O3-NEXT: .Lpcsection501: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection457: +; O3-NEXT: .Lpcsection502: ; O3-NEXT: movq 8(%rdi), %rdx -; O3-NEXT: .Lpcsection458: +; O3-NEXT: .Lpcsection503: ; O3-NEXT: movl $42, %ebx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB222_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 -; O3-NEXT: .Lpcsection459: +; O3-NEXT: .Lpcsection504: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection460: +; O3-NEXT: .Lpcsection505: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection461: +; O3-NEXT: .Lpcsection506: ; O3-NEXT: jne .LBB222_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -12923,22 +13058,22 @@ define void @atomic128_add_release(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection462: +; O1-NEXT: .Lpcsection507: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection463: +; O1-NEXT: .Lpcsection508: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB223_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection464: +; O1-NEXT: .Lpcsection509: ; O1-NEXT: addq $42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection465: +; O1-NEXT: .Lpcsection510: ; O1-NEXT: adcq $0, %rcx -; O1-NEXT: .Lpcsection466: +; O1-NEXT: .Lpcsection511: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection467: +; O1-NEXT: .Lpcsection512: ; O1-NEXT: jne .LBB223_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -12952,22 +13087,22 @@ define void @atomic128_add_release(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection462: +; O2-NEXT: .Lpcsection507: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection463: +; O2-NEXT: .Lpcsection508: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB223_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection464: +; O2-NEXT: .Lpcsection509: ; O2-NEXT: addq $42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection465: +; O2-NEXT: .Lpcsection510: ; O2-NEXT: adcq $0, %rcx -; O2-NEXT: .Lpcsection466: +; O2-NEXT: .Lpcsection511: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection467: +; O2-NEXT: .Lpcsection512: ; O2-NEXT: jne .LBB223_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -12981,22 +13116,22 @@ define void @atomic128_add_release(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection462: +; O3-NEXT: .Lpcsection507: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection463: +; O3-NEXT: .Lpcsection508: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB223_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection464: +; O3-NEXT: .Lpcsection509: ; O3-NEXT: addq $42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection465: +; O3-NEXT: .Lpcsection510: ; O3-NEXT: adcq $0, %rcx -; O3-NEXT: .Lpcsection466: +; O3-NEXT: .Lpcsection511: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection467: +; O3-NEXT: .Lpcsection512: ; O3-NEXT: jne .LBB223_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -13056,22 +13191,22 @@ define void @atomic128_sub_release(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection468: +; O1-NEXT: .Lpcsection513: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection469: +; O1-NEXT: .Lpcsection514: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB224_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection470: +; O1-NEXT: .Lpcsection515: ; O1-NEXT: addq $-42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection471: +; O1-NEXT: .Lpcsection516: ; O1-NEXT: adcq $-1, %rcx -; O1-NEXT: .Lpcsection472: +; O1-NEXT: .Lpcsection517: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection473: +; O1-NEXT: .Lpcsection518: ; O1-NEXT: jne .LBB224_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -13085,22 +13220,22 @@ define void @atomic128_sub_release(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection468: +; O2-NEXT: .Lpcsection513: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection469: +; O2-NEXT: .Lpcsection514: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB224_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection470: +; O2-NEXT: .Lpcsection515: ; O2-NEXT: addq $-42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection471: +; O2-NEXT: .Lpcsection516: ; O2-NEXT: adcq $-1, %rcx -; O2-NEXT: .Lpcsection472: +; O2-NEXT: .Lpcsection517: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection473: +; O2-NEXT: .Lpcsection518: ; O2-NEXT: jne .LBB224_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -13114,22 +13249,22 @@ define void @atomic128_sub_release(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection468: +; O3-NEXT: .Lpcsection513: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection469: +; O3-NEXT: .Lpcsection514: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB224_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection470: +; O3-NEXT: .Lpcsection515: ; O3-NEXT: addq $-42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection471: +; O3-NEXT: .Lpcsection516: ; O3-NEXT: adcq $-1, %rcx -; O3-NEXT: .Lpcsection472: +; O3-NEXT: .Lpcsection517: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection473: +; O3-NEXT: .Lpcsection518: ; O3-NEXT: jne .LBB224_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -13191,21 +13326,21 @@ define void @atomic128_and_release(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection474: +; O1-NEXT: .Lpcsection519: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection475: +; O1-NEXT: .Lpcsection520: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB225_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ebx -; O1-NEXT: .Lpcsection476: +; O1-NEXT: .Lpcsection521: ; O1-NEXT: andl $42, %ebx -; O1-NEXT: .Lpcsection477: +; O1-NEXT: .Lpcsection522: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection478: +; O1-NEXT: .Lpcsection523: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection479: +; O1-NEXT: .Lpcsection524: ; O1-NEXT: jne .LBB225_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -13219,21 +13354,21 @@ define void @atomic128_and_release(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection474: +; O2-NEXT: .Lpcsection519: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection475: +; O2-NEXT: .Lpcsection520: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB225_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ebx -; O2-NEXT: .Lpcsection476: +; O2-NEXT: .Lpcsection521: ; O2-NEXT: andl $42, %ebx -; O2-NEXT: .Lpcsection477: +; O2-NEXT: .Lpcsection522: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection478: +; O2-NEXT: .Lpcsection523: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection479: +; O2-NEXT: .Lpcsection524: ; O2-NEXT: jne .LBB225_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -13247,21 +13382,21 @@ define void @atomic128_and_release(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection474: +; O3-NEXT: .Lpcsection519: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection475: +; O3-NEXT: .Lpcsection520: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB225_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ebx -; O3-NEXT: .Lpcsection476: +; O3-NEXT: .Lpcsection521: ; O3-NEXT: andl $42, %ebx -; O3-NEXT: .Lpcsection477: +; O3-NEXT: .Lpcsection522: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection478: +; O3-NEXT: .Lpcsection523: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection479: +; O3-NEXT: .Lpcsection524: ; O3-NEXT: jne .LBB225_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -13319,20 +13454,20 @@ define void @atomic128_or_release(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection480: +; O1-NEXT: .Lpcsection525: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection481: +; O1-NEXT: .Lpcsection526: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB226_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection482: +; O1-NEXT: .Lpcsection527: ; O1-NEXT: orq $42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection483: +; O1-NEXT: .Lpcsection528: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection484: +; O1-NEXT: .Lpcsection529: ; O1-NEXT: jne .LBB226_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -13346,20 +13481,20 @@ define void @atomic128_or_release(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection480: +; O2-NEXT: .Lpcsection525: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection481: +; O2-NEXT: .Lpcsection526: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB226_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection482: +; O2-NEXT: .Lpcsection527: ; O2-NEXT: orq $42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection483: +; O2-NEXT: .Lpcsection528: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection484: +; O2-NEXT: .Lpcsection529: ; O2-NEXT: jne .LBB226_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -13373,20 +13508,20 @@ define void @atomic128_or_release(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection480: +; O3-NEXT: .Lpcsection525: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection481: +; O3-NEXT: .Lpcsection526: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB226_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection482: +; O3-NEXT: .Lpcsection527: ; O3-NEXT: orq $42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection483: +; O3-NEXT: .Lpcsection528: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection484: +; O3-NEXT: .Lpcsection529: ; O3-NEXT: jne .LBB226_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -13444,20 +13579,20 @@ define void @atomic128_xor_release(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection485: +; O1-NEXT: .Lpcsection530: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection486: +; O1-NEXT: .Lpcsection531: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB227_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection487: +; O1-NEXT: .Lpcsection532: ; O1-NEXT: xorq $42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection488: +; O1-NEXT: .Lpcsection533: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection489: +; O1-NEXT: .Lpcsection534: ; O1-NEXT: jne .LBB227_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -13471,20 +13606,20 @@ define void @atomic128_xor_release(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection485: +; O2-NEXT: .Lpcsection530: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection486: +; O2-NEXT: .Lpcsection531: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB227_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection487: +; O2-NEXT: .Lpcsection532: ; O2-NEXT: xorq $42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection488: +; O2-NEXT: .Lpcsection533: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection489: +; O2-NEXT: .Lpcsection534: ; O2-NEXT: jne .LBB227_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -13498,20 +13633,20 @@ define void @atomic128_xor_release(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection485: +; O3-NEXT: .Lpcsection530: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection486: +; O3-NEXT: .Lpcsection531: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB227_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection487: +; O3-NEXT: .Lpcsection532: ; O3-NEXT: xorq $42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection488: +; O3-NEXT: .Lpcsection533: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection489: +; O3-NEXT: .Lpcsection534: ; O3-NEXT: jne .LBB227_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -13575,23 +13710,23 @@ define void @atomic128_nand_release(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection490: +; O1-NEXT: .Lpcsection535: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection491: +; O1-NEXT: .Lpcsection536: ; O1-NEXT: movq 8(%rdi), %rdx -; O1-NEXT: .Lpcsection492: +; O1-NEXT: .Lpcsection537: ; O1-NEXT: movq $-1, %rcx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB228_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ebx -; O1-NEXT: .Lpcsection493: +; O1-NEXT: .Lpcsection538: ; O1-NEXT: notl %ebx -; O1-NEXT: .Lpcsection494: +; O1-NEXT: .Lpcsection539: ; O1-NEXT: orq $-43, %rbx -; O1-NEXT: .Lpcsection495: +; O1-NEXT: .Lpcsection540: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection496: +; O1-NEXT: .Lpcsection541: ; O1-NEXT: jne .LBB228_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -13605,23 +13740,23 @@ define void @atomic128_nand_release(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection490: +; O2-NEXT: .Lpcsection535: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection491: +; O2-NEXT: .Lpcsection536: ; O2-NEXT: movq 8(%rdi), %rdx -; O2-NEXT: .Lpcsection492: +; O2-NEXT: .Lpcsection537: ; O2-NEXT: movq $-1, %rcx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB228_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ebx -; O2-NEXT: .Lpcsection493: +; O2-NEXT: .Lpcsection538: ; O2-NEXT: notl %ebx -; O2-NEXT: .Lpcsection494: +; O2-NEXT: .Lpcsection539: ; O2-NEXT: orq $-43, %rbx -; O2-NEXT: .Lpcsection495: +; O2-NEXT: .Lpcsection540: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection496: +; O2-NEXT: .Lpcsection541: ; O2-NEXT: jne .LBB228_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -13635,23 +13770,23 @@ define void @atomic128_nand_release(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection490: +; O3-NEXT: .Lpcsection535: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection491: +; O3-NEXT: .Lpcsection536: ; O3-NEXT: movq 8(%rdi), %rdx -; O3-NEXT: .Lpcsection492: +; O3-NEXT: .Lpcsection537: ; O3-NEXT: movq $-1, %rcx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB228_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ebx -; O3-NEXT: .Lpcsection493: +; O3-NEXT: .Lpcsection538: ; O3-NEXT: notl %ebx -; O3-NEXT: .Lpcsection494: +; O3-NEXT: .Lpcsection539: ; O3-NEXT: orq $-43, %rbx -; O3-NEXT: .Lpcsection495: +; O3-NEXT: .Lpcsection540: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection496: +; O3-NEXT: .Lpcsection541: ; O3-NEXT: jne .LBB228_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -13711,20 +13846,20 @@ define void @atomic128_xchg_acq_rel(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection497: +; O1-NEXT: .Lpcsection542: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection498: +; O1-NEXT: .Lpcsection543: ; O1-NEXT: movq 8(%rdi), %rdx -; O1-NEXT: .Lpcsection499: +; O1-NEXT: .Lpcsection544: ; O1-NEXT: movl $42, %ebx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB229_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 -; O1-NEXT: .Lpcsection500: +; O1-NEXT: .Lpcsection545: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection501: +; O1-NEXT: .Lpcsection546: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection502: +; O1-NEXT: .Lpcsection547: ; O1-NEXT: jne .LBB229_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -13738,20 +13873,20 @@ define void @atomic128_xchg_acq_rel(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection497: +; O2-NEXT: .Lpcsection542: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection498: +; O2-NEXT: .Lpcsection543: ; O2-NEXT: movq 8(%rdi), %rdx -; O2-NEXT: .Lpcsection499: +; O2-NEXT: .Lpcsection544: ; O2-NEXT: movl $42, %ebx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB229_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 -; O2-NEXT: .Lpcsection500: +; O2-NEXT: .Lpcsection545: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection501: +; O2-NEXT: .Lpcsection546: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection502: +; O2-NEXT: .Lpcsection547: ; O2-NEXT: jne .LBB229_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -13765,20 +13900,20 @@ define void @atomic128_xchg_acq_rel(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection497: +; O3-NEXT: .Lpcsection542: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection498: +; O3-NEXT: .Lpcsection543: ; O3-NEXT: movq 8(%rdi), %rdx -; O3-NEXT: .Lpcsection499: +; O3-NEXT: .Lpcsection544: ; O3-NEXT: movl $42, %ebx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB229_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 -; O3-NEXT: .Lpcsection500: +; O3-NEXT: .Lpcsection545: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection501: +; O3-NEXT: .Lpcsection546: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection502: +; O3-NEXT: .Lpcsection547: ; O3-NEXT: jne .LBB229_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -13838,22 +13973,22 @@ define void @atomic128_add_acq_rel(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection503: +; O1-NEXT: .Lpcsection548: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection504: +; O1-NEXT: .Lpcsection549: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB230_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection505: +; O1-NEXT: .Lpcsection550: ; O1-NEXT: addq $42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection506: +; O1-NEXT: .Lpcsection551: ; O1-NEXT: adcq $0, %rcx -; O1-NEXT: .Lpcsection507: +; O1-NEXT: .Lpcsection552: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection508: +; O1-NEXT: .Lpcsection553: ; O1-NEXT: jne .LBB230_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -13867,22 +14002,22 @@ define void @atomic128_add_acq_rel(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection503: +; O2-NEXT: .Lpcsection548: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection504: +; O2-NEXT: .Lpcsection549: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB230_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection505: +; O2-NEXT: .Lpcsection550: ; O2-NEXT: addq $42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection506: +; O2-NEXT: .Lpcsection551: ; O2-NEXT: adcq $0, %rcx -; O2-NEXT: .Lpcsection507: +; O2-NEXT: .Lpcsection552: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection508: +; O2-NEXT: .Lpcsection553: ; O2-NEXT: jne .LBB230_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -13896,22 +14031,22 @@ define void @atomic128_add_acq_rel(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection503: +; O3-NEXT: .Lpcsection548: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection504: +; O3-NEXT: .Lpcsection549: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB230_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection505: +; O3-NEXT: .Lpcsection550: ; O3-NEXT: addq $42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection506: +; O3-NEXT: .Lpcsection551: ; O3-NEXT: adcq $0, %rcx -; O3-NEXT: .Lpcsection507: +; O3-NEXT: .Lpcsection552: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection508: +; O3-NEXT: .Lpcsection553: ; O3-NEXT: jne .LBB230_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -13971,22 +14106,22 @@ define void @atomic128_sub_acq_rel(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection509: +; O1-NEXT: .Lpcsection554: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection510: +; O1-NEXT: .Lpcsection555: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB231_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection511: +; O1-NEXT: .Lpcsection556: ; O1-NEXT: addq $-42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection512: +; O1-NEXT: .Lpcsection557: ; O1-NEXT: adcq $-1, %rcx -; O1-NEXT: .Lpcsection513: +; O1-NEXT: .Lpcsection558: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection514: +; O1-NEXT: .Lpcsection559: ; O1-NEXT: jne .LBB231_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -14000,22 +14135,22 @@ define void @atomic128_sub_acq_rel(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection509: +; O2-NEXT: .Lpcsection554: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection510: +; O2-NEXT: .Lpcsection555: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB231_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection511: +; O2-NEXT: .Lpcsection556: ; O2-NEXT: addq $-42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection512: +; O2-NEXT: .Lpcsection557: ; O2-NEXT: adcq $-1, %rcx -; O2-NEXT: .Lpcsection513: +; O2-NEXT: .Lpcsection558: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection514: +; O2-NEXT: .Lpcsection559: ; O2-NEXT: jne .LBB231_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -14029,22 +14164,22 @@ define void @atomic128_sub_acq_rel(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection509: +; O3-NEXT: .Lpcsection554: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection510: +; O3-NEXT: .Lpcsection555: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB231_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection511: +; O3-NEXT: .Lpcsection556: ; O3-NEXT: addq $-42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection512: +; O3-NEXT: .Lpcsection557: ; O3-NEXT: adcq $-1, %rcx -; O3-NEXT: .Lpcsection513: +; O3-NEXT: .Lpcsection558: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection514: +; O3-NEXT: .Lpcsection559: ; O3-NEXT: jne .LBB231_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -14106,21 +14241,21 @@ define void @atomic128_and_acq_rel(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection515: +; O1-NEXT: .Lpcsection560: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection516: +; O1-NEXT: .Lpcsection561: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB232_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ebx -; O1-NEXT: .Lpcsection517: +; O1-NEXT: .Lpcsection562: ; O1-NEXT: andl $42, %ebx -; O1-NEXT: .Lpcsection518: +; O1-NEXT: .Lpcsection563: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection519: +; O1-NEXT: .Lpcsection564: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection520: +; O1-NEXT: .Lpcsection565: ; O1-NEXT: jne .LBB232_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -14134,21 +14269,21 @@ define void @atomic128_and_acq_rel(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection515: +; O2-NEXT: .Lpcsection560: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection516: +; O2-NEXT: .Lpcsection561: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB232_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ebx -; O2-NEXT: .Lpcsection517: +; O2-NEXT: .Lpcsection562: ; O2-NEXT: andl $42, %ebx -; O2-NEXT: .Lpcsection518: +; O2-NEXT: .Lpcsection563: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection519: +; O2-NEXT: .Lpcsection564: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection520: +; O2-NEXT: .Lpcsection565: ; O2-NEXT: jne .LBB232_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -14162,21 +14297,21 @@ define void @atomic128_and_acq_rel(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection515: +; O3-NEXT: .Lpcsection560: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection516: +; O3-NEXT: .Lpcsection561: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB232_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ebx -; O3-NEXT: .Lpcsection517: +; O3-NEXT: .Lpcsection562: ; O3-NEXT: andl $42, %ebx -; O3-NEXT: .Lpcsection518: +; O3-NEXT: .Lpcsection563: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection519: +; O3-NEXT: .Lpcsection564: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection520: +; O3-NEXT: .Lpcsection565: ; O3-NEXT: jne .LBB232_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -14234,20 +14369,20 @@ define void @atomic128_or_acq_rel(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection521: +; O1-NEXT: .Lpcsection566: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection522: +; O1-NEXT: .Lpcsection567: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB233_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection523: +; O1-NEXT: .Lpcsection568: ; O1-NEXT: orq $42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection524: +; O1-NEXT: .Lpcsection569: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection525: +; O1-NEXT: .Lpcsection570: ; O1-NEXT: jne .LBB233_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -14261,20 +14396,20 @@ define void @atomic128_or_acq_rel(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection521: +; O2-NEXT: .Lpcsection566: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection522: +; O2-NEXT: .Lpcsection567: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB233_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection523: +; O2-NEXT: .Lpcsection568: ; O2-NEXT: orq $42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection524: +; O2-NEXT: .Lpcsection569: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection525: +; O2-NEXT: .Lpcsection570: ; O2-NEXT: jne .LBB233_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -14288,20 +14423,20 @@ define void @atomic128_or_acq_rel(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection521: +; O3-NEXT: .Lpcsection566: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection522: +; O3-NEXT: .Lpcsection567: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB233_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection523: +; O3-NEXT: .Lpcsection568: ; O3-NEXT: orq $42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection524: +; O3-NEXT: .Lpcsection569: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection525: +; O3-NEXT: .Lpcsection570: ; O3-NEXT: jne .LBB233_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -14359,20 +14494,20 @@ define void @atomic128_xor_acq_rel(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection526: +; O1-NEXT: .Lpcsection571: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection527: +; O1-NEXT: .Lpcsection572: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB234_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection528: +; O1-NEXT: .Lpcsection573: ; O1-NEXT: xorq $42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection529: +; O1-NEXT: .Lpcsection574: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection530: +; O1-NEXT: .Lpcsection575: ; O1-NEXT: jne .LBB234_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -14386,20 +14521,20 @@ define void @atomic128_xor_acq_rel(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection526: +; O2-NEXT: .Lpcsection571: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection527: +; O2-NEXT: .Lpcsection572: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB234_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection528: +; O2-NEXT: .Lpcsection573: ; O2-NEXT: xorq $42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection529: +; O2-NEXT: .Lpcsection574: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection530: +; O2-NEXT: .Lpcsection575: ; O2-NEXT: jne .LBB234_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -14413,20 +14548,20 @@ define void @atomic128_xor_acq_rel(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection526: +; O3-NEXT: .Lpcsection571: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection527: +; O3-NEXT: .Lpcsection572: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB234_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection528: +; O3-NEXT: .Lpcsection573: ; O3-NEXT: xorq $42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection529: +; O3-NEXT: .Lpcsection574: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection530: +; O3-NEXT: .Lpcsection575: ; O3-NEXT: jne .LBB234_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -14490,23 +14625,23 @@ define void @atomic128_nand_acq_rel(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection531: +; O1-NEXT: .Lpcsection576: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection532: +; O1-NEXT: .Lpcsection577: ; O1-NEXT: movq 8(%rdi), %rdx -; O1-NEXT: .Lpcsection533: +; O1-NEXT: .Lpcsection578: ; O1-NEXT: movq $-1, %rcx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB235_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ebx -; O1-NEXT: .Lpcsection534: +; O1-NEXT: .Lpcsection579: ; O1-NEXT: notl %ebx -; O1-NEXT: .Lpcsection535: +; O1-NEXT: .Lpcsection580: ; O1-NEXT: orq $-43, %rbx -; O1-NEXT: .Lpcsection536: +; O1-NEXT: .Lpcsection581: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection537: +; O1-NEXT: .Lpcsection582: ; O1-NEXT: jne .LBB235_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -14520,23 +14655,23 @@ define void @atomic128_nand_acq_rel(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection531: +; O2-NEXT: .Lpcsection576: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection532: +; O2-NEXT: .Lpcsection577: ; O2-NEXT: movq 8(%rdi), %rdx -; O2-NEXT: .Lpcsection533: +; O2-NEXT: .Lpcsection578: ; O2-NEXT: movq $-1, %rcx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB235_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ebx -; O2-NEXT: .Lpcsection534: +; O2-NEXT: .Lpcsection579: ; O2-NEXT: notl %ebx -; O2-NEXT: .Lpcsection535: +; O2-NEXT: .Lpcsection580: ; O2-NEXT: orq $-43, %rbx -; O2-NEXT: .Lpcsection536: +; O2-NEXT: .Lpcsection581: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection537: +; O2-NEXT: .Lpcsection582: ; O2-NEXT: jne .LBB235_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -14550,23 +14685,23 @@ define void @atomic128_nand_acq_rel(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection531: +; O3-NEXT: .Lpcsection576: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection532: +; O3-NEXT: .Lpcsection577: ; O3-NEXT: movq 8(%rdi), %rdx -; O3-NEXT: .Lpcsection533: +; O3-NEXT: .Lpcsection578: ; O3-NEXT: movq $-1, %rcx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB235_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ebx -; O3-NEXT: .Lpcsection534: +; O3-NEXT: .Lpcsection579: ; O3-NEXT: notl %ebx -; O3-NEXT: .Lpcsection535: +; O3-NEXT: .Lpcsection580: ; O3-NEXT: orq $-43, %rbx -; O3-NEXT: .Lpcsection536: +; O3-NEXT: .Lpcsection581: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection537: +; O3-NEXT: .Lpcsection582: ; O3-NEXT: jne .LBB235_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -14626,20 +14761,20 @@ define void @atomic128_xchg_seq_cst(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection538: +; O1-NEXT: .Lpcsection583: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection539: +; O1-NEXT: .Lpcsection584: ; O1-NEXT: movq 8(%rdi), %rdx -; O1-NEXT: .Lpcsection540: +; O1-NEXT: .Lpcsection585: ; O1-NEXT: movl $42, %ebx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB236_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 -; O1-NEXT: .Lpcsection541: +; O1-NEXT: .Lpcsection586: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection542: +; O1-NEXT: .Lpcsection587: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection543: +; O1-NEXT: .Lpcsection588: ; O1-NEXT: jne .LBB236_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -14653,20 +14788,20 @@ define void @atomic128_xchg_seq_cst(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection538: +; O2-NEXT: .Lpcsection583: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection539: +; O2-NEXT: .Lpcsection584: ; O2-NEXT: movq 8(%rdi), %rdx -; O2-NEXT: .Lpcsection540: +; O2-NEXT: .Lpcsection585: ; O2-NEXT: movl $42, %ebx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB236_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 -; O2-NEXT: .Lpcsection541: +; O2-NEXT: .Lpcsection586: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection542: +; O2-NEXT: .Lpcsection587: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection543: +; O2-NEXT: .Lpcsection588: ; O2-NEXT: jne .LBB236_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -14680,20 +14815,20 @@ define void @atomic128_xchg_seq_cst(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection538: +; O3-NEXT: .Lpcsection583: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection539: +; O3-NEXT: .Lpcsection584: ; O3-NEXT: movq 8(%rdi), %rdx -; O3-NEXT: .Lpcsection540: +; O3-NEXT: .Lpcsection585: ; O3-NEXT: movl $42, %ebx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB236_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 -; O3-NEXT: .Lpcsection541: +; O3-NEXT: .Lpcsection586: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection542: +; O3-NEXT: .Lpcsection587: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection543: +; O3-NEXT: .Lpcsection588: ; O3-NEXT: jne .LBB236_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -14753,22 +14888,22 @@ define void @atomic128_add_seq_cst(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection544: +; O1-NEXT: .Lpcsection589: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection545: +; O1-NEXT: .Lpcsection590: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB237_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection546: +; O1-NEXT: .Lpcsection591: ; O1-NEXT: addq $42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection547: +; O1-NEXT: .Lpcsection592: ; O1-NEXT: adcq $0, %rcx -; O1-NEXT: .Lpcsection548: +; O1-NEXT: .Lpcsection593: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection549: +; O1-NEXT: .Lpcsection594: ; O1-NEXT: jne .LBB237_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -14782,22 +14917,22 @@ define void @atomic128_add_seq_cst(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection544: +; O2-NEXT: .Lpcsection589: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection545: +; O2-NEXT: .Lpcsection590: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB237_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection546: +; O2-NEXT: .Lpcsection591: ; O2-NEXT: addq $42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection547: +; O2-NEXT: .Lpcsection592: ; O2-NEXT: adcq $0, %rcx -; O2-NEXT: .Lpcsection548: +; O2-NEXT: .Lpcsection593: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection549: +; O2-NEXT: .Lpcsection594: ; O2-NEXT: jne .LBB237_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -14811,22 +14946,22 @@ define void @atomic128_add_seq_cst(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection544: +; O3-NEXT: .Lpcsection589: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection545: +; O3-NEXT: .Lpcsection590: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB237_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection546: +; O3-NEXT: .Lpcsection591: ; O3-NEXT: addq $42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection547: +; O3-NEXT: .Lpcsection592: ; O3-NEXT: adcq $0, %rcx -; O3-NEXT: .Lpcsection548: +; O3-NEXT: .Lpcsection593: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection549: +; O3-NEXT: .Lpcsection594: ; O3-NEXT: jne .LBB237_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -14886,22 +15021,22 @@ define void @atomic128_sub_seq_cst(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection550: +; O1-NEXT: .Lpcsection595: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection551: +; O1-NEXT: .Lpcsection596: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB238_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection552: +; O1-NEXT: .Lpcsection597: ; O1-NEXT: addq $-42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection553: +; O1-NEXT: .Lpcsection598: ; O1-NEXT: adcq $-1, %rcx -; O1-NEXT: .Lpcsection554: +; O1-NEXT: .Lpcsection599: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection555: +; O1-NEXT: .Lpcsection600: ; O1-NEXT: jne .LBB238_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -14915,22 +15050,22 @@ define void @atomic128_sub_seq_cst(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection550: +; O2-NEXT: .Lpcsection595: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection551: +; O2-NEXT: .Lpcsection596: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB238_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection552: +; O2-NEXT: .Lpcsection597: ; O2-NEXT: addq $-42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection553: +; O2-NEXT: .Lpcsection598: ; O2-NEXT: adcq $-1, %rcx -; O2-NEXT: .Lpcsection554: +; O2-NEXT: .Lpcsection599: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection555: +; O2-NEXT: .Lpcsection600: ; O2-NEXT: jne .LBB238_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -14944,22 +15079,22 @@ define void @atomic128_sub_seq_cst(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection550: +; O3-NEXT: .Lpcsection595: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection551: +; O3-NEXT: .Lpcsection596: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB238_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection552: +; O3-NEXT: .Lpcsection597: ; O3-NEXT: addq $-42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection553: +; O3-NEXT: .Lpcsection598: ; O3-NEXT: adcq $-1, %rcx -; O3-NEXT: .Lpcsection554: +; O3-NEXT: .Lpcsection599: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection555: +; O3-NEXT: .Lpcsection600: ; O3-NEXT: jne .LBB238_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -15021,21 +15156,21 @@ define void @atomic128_and_seq_cst(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection556: +; O1-NEXT: .Lpcsection601: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection557: +; O1-NEXT: .Lpcsection602: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB239_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ebx -; O1-NEXT: .Lpcsection558: +; O1-NEXT: .Lpcsection603: ; O1-NEXT: andl $42, %ebx -; O1-NEXT: .Lpcsection559: +; O1-NEXT: .Lpcsection604: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection560: +; O1-NEXT: .Lpcsection605: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection561: +; O1-NEXT: .Lpcsection606: ; O1-NEXT: jne .LBB239_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -15049,21 +15184,21 @@ define void @atomic128_and_seq_cst(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection556: +; O2-NEXT: .Lpcsection601: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection557: +; O2-NEXT: .Lpcsection602: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB239_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ebx -; O2-NEXT: .Lpcsection558: +; O2-NEXT: .Lpcsection603: ; O2-NEXT: andl $42, %ebx -; O2-NEXT: .Lpcsection559: +; O2-NEXT: .Lpcsection604: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection560: +; O2-NEXT: .Lpcsection605: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection561: +; O2-NEXT: .Lpcsection606: ; O2-NEXT: jne .LBB239_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -15077,21 +15212,21 @@ define void @atomic128_and_seq_cst(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection556: +; O3-NEXT: .Lpcsection601: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection557: +; O3-NEXT: .Lpcsection602: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB239_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ebx -; O3-NEXT: .Lpcsection558: +; O3-NEXT: .Lpcsection603: ; O3-NEXT: andl $42, %ebx -; O3-NEXT: .Lpcsection559: +; O3-NEXT: .Lpcsection604: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection560: +; O3-NEXT: .Lpcsection605: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection561: +; O3-NEXT: .Lpcsection606: ; O3-NEXT: jne .LBB239_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -15149,20 +15284,20 @@ define void @atomic128_or_seq_cst(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection562: +; O1-NEXT: .Lpcsection607: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection563: +; O1-NEXT: .Lpcsection608: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB240_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection564: +; O1-NEXT: .Lpcsection609: ; O1-NEXT: orq $42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection565: +; O1-NEXT: .Lpcsection610: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection566: +; O1-NEXT: .Lpcsection611: ; O1-NEXT: jne .LBB240_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -15176,20 +15311,20 @@ define void @atomic128_or_seq_cst(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection562: +; O2-NEXT: .Lpcsection607: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection563: +; O2-NEXT: .Lpcsection608: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB240_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection564: +; O2-NEXT: .Lpcsection609: ; O2-NEXT: orq $42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection565: +; O2-NEXT: .Lpcsection610: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection566: +; O2-NEXT: .Lpcsection611: ; O2-NEXT: jne .LBB240_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -15203,20 +15338,20 @@ define void @atomic128_or_seq_cst(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection562: +; O3-NEXT: .Lpcsection607: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection563: +; O3-NEXT: .Lpcsection608: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB240_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection564: +; O3-NEXT: .Lpcsection609: ; O3-NEXT: orq $42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection565: +; O3-NEXT: .Lpcsection610: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection566: +; O3-NEXT: .Lpcsection611: ; O3-NEXT: jne .LBB240_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -15274,20 +15409,20 @@ define void @atomic128_xor_seq_cst(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection567: +; O1-NEXT: .Lpcsection612: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection568: +; O1-NEXT: .Lpcsection613: ; O1-NEXT: movq 8(%rdi), %rdx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB241_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movq %rax, %rbx -; O1-NEXT: .Lpcsection569: +; O1-NEXT: .Lpcsection614: ; O1-NEXT: xorq $42, %rbx ; O1-NEXT: movq %rdx, %rcx -; O1-NEXT: .Lpcsection570: +; O1-NEXT: .Lpcsection615: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection571: +; O1-NEXT: .Lpcsection616: ; O1-NEXT: jne .LBB241_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -15301,20 +15436,20 @@ define void @atomic128_xor_seq_cst(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection567: +; O2-NEXT: .Lpcsection612: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection568: +; O2-NEXT: .Lpcsection613: ; O2-NEXT: movq 8(%rdi), %rdx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB241_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movq %rax, %rbx -; O2-NEXT: .Lpcsection569: +; O2-NEXT: .Lpcsection614: ; O2-NEXT: xorq $42, %rbx ; O2-NEXT: movq %rdx, %rcx -; O2-NEXT: .Lpcsection570: +; O2-NEXT: .Lpcsection615: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection571: +; O2-NEXT: .Lpcsection616: ; O2-NEXT: jne .LBB241_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -15328,20 +15463,20 @@ define void @atomic128_xor_seq_cst(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection567: +; O3-NEXT: .Lpcsection612: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection568: +; O3-NEXT: .Lpcsection613: ; O3-NEXT: movq 8(%rdi), %rdx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB241_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movq %rax, %rbx -; O3-NEXT: .Lpcsection569: +; O3-NEXT: .Lpcsection614: ; O3-NEXT: xorq $42, %rbx ; O3-NEXT: movq %rdx, %rcx -; O3-NEXT: .Lpcsection570: +; O3-NEXT: .Lpcsection615: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection571: +; O3-NEXT: .Lpcsection616: ; O3-NEXT: jne .LBB241_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -15405,23 +15540,23 @@ define void @atomic128_nand_seq_cst(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection572: +; O1-NEXT: .Lpcsection617: ; O1-NEXT: movq (%rdi), %rax -; O1-NEXT: .Lpcsection573: +; O1-NEXT: .Lpcsection618: ; O1-NEXT: movq 8(%rdi), %rdx -; O1-NEXT: .Lpcsection574: +; O1-NEXT: .Lpcsection619: ; O1-NEXT: movq $-1, %rcx ; O1-NEXT: .p2align 4, 0x90 ; O1-NEXT: .LBB242_1: # %atomicrmw.start ; O1-NEXT: # =>This Inner Loop Header: Depth=1 ; O1-NEXT: movl %eax, %ebx -; O1-NEXT: .Lpcsection575: +; O1-NEXT: .Lpcsection620: ; O1-NEXT: notl %ebx -; O1-NEXT: .Lpcsection576: +; O1-NEXT: .Lpcsection621: ; O1-NEXT: orq $-43, %rbx -; O1-NEXT: .Lpcsection577: +; O1-NEXT: .Lpcsection622: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection578: +; O1-NEXT: .Lpcsection623: ; O1-NEXT: jne .LBB242_1 ; O1-NEXT: # %bb.2: # %atomicrmw.end ; O1-NEXT: movq $1, foo(%rip) @@ -15435,23 +15570,23 @@ define void @atomic128_nand_seq_cst(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection572: +; O2-NEXT: .Lpcsection617: ; O2-NEXT: movq (%rdi), %rax -; O2-NEXT: .Lpcsection573: +; O2-NEXT: .Lpcsection618: ; O2-NEXT: movq 8(%rdi), %rdx -; O2-NEXT: .Lpcsection574: +; O2-NEXT: .Lpcsection619: ; O2-NEXT: movq $-1, %rcx ; O2-NEXT: .p2align 4, 0x90 ; O2-NEXT: .LBB242_1: # %atomicrmw.start ; O2-NEXT: # =>This Inner Loop Header: Depth=1 ; O2-NEXT: movl %eax, %ebx -; O2-NEXT: .Lpcsection575: +; O2-NEXT: .Lpcsection620: ; O2-NEXT: notl %ebx -; O2-NEXT: .Lpcsection576: +; O2-NEXT: .Lpcsection621: ; O2-NEXT: orq $-43, %rbx -; O2-NEXT: .Lpcsection577: +; O2-NEXT: .Lpcsection622: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection578: +; O2-NEXT: .Lpcsection623: ; O2-NEXT: jne .LBB242_1 ; O2-NEXT: # %bb.2: # %atomicrmw.end ; O2-NEXT: movq $1, foo(%rip) @@ -15465,23 +15600,23 @@ define void @atomic128_nand_seq_cst(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection572: +; O3-NEXT: .Lpcsection617: ; O3-NEXT: movq (%rdi), %rax -; O3-NEXT: .Lpcsection573: +; O3-NEXT: .Lpcsection618: ; O3-NEXT: movq 8(%rdi), %rdx -; O3-NEXT: .Lpcsection574: +; O3-NEXT: .Lpcsection619: ; O3-NEXT: movq $-1, %rcx ; O3-NEXT: .p2align 4, 0x90 ; O3-NEXT: .LBB242_1: # %atomicrmw.start ; O3-NEXT: # =>This Inner Loop Header: Depth=1 ; O3-NEXT: movl %eax, %ebx -; O3-NEXT: .Lpcsection575: +; O3-NEXT: .Lpcsection620: ; O3-NEXT: notl %ebx -; O3-NEXT: .Lpcsection576: +; O3-NEXT: .Lpcsection621: ; O3-NEXT: orq $-43, %rbx -; O3-NEXT: .Lpcsection577: +; O3-NEXT: .Lpcsection622: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection578: +; O3-NEXT: .Lpcsection623: ; O3-NEXT: jne .LBB242_1 ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) @@ -15542,31 +15677,31 @@ define void @atomic128_cas_monotonic(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection579: +; O1-NEXT: .Lpcsection624: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection580: +; O1-NEXT: .Lpcsection625: ; O1-NEXT: movl $1, %ebx -; O1-NEXT: .Lpcsection581: +; O1-NEXT: .Lpcsection626: ; O1-NEXT: xorl %edx, %edx -; O1-NEXT: .Lpcsection582: +; O1-NEXT: .Lpcsection627: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection583: +; O1-NEXT: .Lpcsection628: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection584: +; O1-NEXT: .Lpcsection629: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection585: +; O1-NEXT: .Lpcsection630: ; O1-NEXT: xorl %edx, %edx -; O1-NEXT: .Lpcsection586: +; O1-NEXT: .Lpcsection631: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection587: +; O1-NEXT: .Lpcsection632: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection588: +; O1-NEXT: .Lpcsection633: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection589: +; O1-NEXT: .Lpcsection634: ; O1-NEXT: xorl %edx, %edx -; O1-NEXT: .Lpcsection590: +; O1-NEXT: .Lpcsection635: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection591: +; O1-NEXT: .Lpcsection636: ; O1-NEXT: lock cmpxchg16b (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: popq %rbx @@ -15579,31 +15714,31 @@ define void @atomic128_cas_monotonic(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection579: +; O2-NEXT: .Lpcsection624: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection580: +; O2-NEXT: .Lpcsection625: ; O2-NEXT: movl $1, %ebx -; O2-NEXT: .Lpcsection581: +; O2-NEXT: .Lpcsection626: ; O2-NEXT: xorl %edx, %edx -; O2-NEXT: .Lpcsection582: +; O2-NEXT: .Lpcsection627: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection583: +; O2-NEXT: .Lpcsection628: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection584: +; O2-NEXT: .Lpcsection629: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection585: +; O2-NEXT: .Lpcsection630: ; O2-NEXT: xorl %edx, %edx -; O2-NEXT: .Lpcsection586: +; O2-NEXT: .Lpcsection631: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection587: +; O2-NEXT: .Lpcsection632: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection588: +; O2-NEXT: .Lpcsection633: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection589: +; O2-NEXT: .Lpcsection634: ; O2-NEXT: xorl %edx, %edx -; O2-NEXT: .Lpcsection590: +; O2-NEXT: .Lpcsection635: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection591: +; O2-NEXT: .Lpcsection636: ; O2-NEXT: lock cmpxchg16b (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: popq %rbx @@ -15616,31 +15751,31 @@ define void @atomic128_cas_monotonic(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection579: +; O3-NEXT: .Lpcsection624: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection580: +; O3-NEXT: .Lpcsection625: ; O3-NEXT: movl $1, %ebx -; O3-NEXT: .Lpcsection581: +; O3-NEXT: .Lpcsection626: ; O3-NEXT: xorl %edx, %edx -; O3-NEXT: .Lpcsection582: +; O3-NEXT: .Lpcsection627: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection583: +; O3-NEXT: .Lpcsection628: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection584: +; O3-NEXT: .Lpcsection629: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection585: +; O3-NEXT: .Lpcsection630: ; O3-NEXT: xorl %edx, %edx -; O3-NEXT: .Lpcsection586: +; O3-NEXT: .Lpcsection631: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection587: +; O3-NEXT: .Lpcsection632: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection588: +; O3-NEXT: .Lpcsection633: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection589: +; O3-NEXT: .Lpcsection634: ; O3-NEXT: xorl %edx, %edx -; O3-NEXT: .Lpcsection590: +; O3-NEXT: .Lpcsection635: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection591: +; O3-NEXT: .Lpcsection636: ; O3-NEXT: lock cmpxchg16b (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: popq %rbx @@ -15702,31 +15837,31 @@ define void @atomic128_cas_acquire(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection592: +; O1-NEXT: .Lpcsection637: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection593: +; O1-NEXT: .Lpcsection638: ; O1-NEXT: movl $1, %ebx -; O1-NEXT: .Lpcsection594: +; O1-NEXT: .Lpcsection639: ; O1-NEXT: xorl %edx, %edx -; O1-NEXT: .Lpcsection595: +; O1-NEXT: .Lpcsection640: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection596: +; O1-NEXT: .Lpcsection641: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection597: +; O1-NEXT: .Lpcsection642: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection598: +; O1-NEXT: .Lpcsection643: ; O1-NEXT: xorl %edx, %edx -; O1-NEXT: .Lpcsection599: +; O1-NEXT: .Lpcsection644: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection600: +; O1-NEXT: .Lpcsection645: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection601: +; O1-NEXT: .Lpcsection646: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection602: +; O1-NEXT: .Lpcsection647: ; O1-NEXT: xorl %edx, %edx -; O1-NEXT: .Lpcsection603: +; O1-NEXT: .Lpcsection648: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection604: +; O1-NEXT: .Lpcsection649: ; O1-NEXT: lock cmpxchg16b (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: popq %rbx @@ -15739,31 +15874,31 @@ define void @atomic128_cas_acquire(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection592: +; O2-NEXT: .Lpcsection637: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection593: +; O2-NEXT: .Lpcsection638: ; O2-NEXT: movl $1, %ebx -; O2-NEXT: .Lpcsection594: +; O2-NEXT: .Lpcsection639: ; O2-NEXT: xorl %edx, %edx -; O2-NEXT: .Lpcsection595: +; O2-NEXT: .Lpcsection640: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection596: +; O2-NEXT: .Lpcsection641: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection597: +; O2-NEXT: .Lpcsection642: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection598: +; O2-NEXT: .Lpcsection643: ; O2-NEXT: xorl %edx, %edx -; O2-NEXT: .Lpcsection599: +; O2-NEXT: .Lpcsection644: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection600: +; O2-NEXT: .Lpcsection645: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection601: +; O2-NEXT: .Lpcsection646: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection602: +; O2-NEXT: .Lpcsection647: ; O2-NEXT: xorl %edx, %edx -; O2-NEXT: .Lpcsection603: +; O2-NEXT: .Lpcsection648: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection604: +; O2-NEXT: .Lpcsection649: ; O2-NEXT: lock cmpxchg16b (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: popq %rbx @@ -15776,31 +15911,31 @@ define void @atomic128_cas_acquire(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection592: +; O3-NEXT: .Lpcsection637: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection593: +; O3-NEXT: .Lpcsection638: ; O3-NEXT: movl $1, %ebx -; O3-NEXT: .Lpcsection594: +; O3-NEXT: .Lpcsection639: ; O3-NEXT: xorl %edx, %edx -; O3-NEXT: .Lpcsection595: +; O3-NEXT: .Lpcsection640: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection596: +; O3-NEXT: .Lpcsection641: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection597: +; O3-NEXT: .Lpcsection642: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection598: +; O3-NEXT: .Lpcsection643: ; O3-NEXT: xorl %edx, %edx -; O3-NEXT: .Lpcsection599: +; O3-NEXT: .Lpcsection644: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection600: +; O3-NEXT: .Lpcsection645: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection601: +; O3-NEXT: .Lpcsection646: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection602: +; O3-NEXT: .Lpcsection647: ; O3-NEXT: xorl %edx, %edx -; O3-NEXT: .Lpcsection603: +; O3-NEXT: .Lpcsection648: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection604: +; O3-NEXT: .Lpcsection649: ; O3-NEXT: lock cmpxchg16b (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: popq %rbx @@ -15862,31 +15997,31 @@ define void @atomic128_cas_release(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection605: +; O1-NEXT: .Lpcsection650: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection606: +; O1-NEXT: .Lpcsection651: ; O1-NEXT: movl $1, %ebx -; O1-NEXT: .Lpcsection607: +; O1-NEXT: .Lpcsection652: ; O1-NEXT: xorl %edx, %edx -; O1-NEXT: .Lpcsection608: +; O1-NEXT: .Lpcsection653: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection609: +; O1-NEXT: .Lpcsection654: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection610: +; O1-NEXT: .Lpcsection655: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection611: +; O1-NEXT: .Lpcsection656: ; O1-NEXT: xorl %edx, %edx -; O1-NEXT: .Lpcsection612: +; O1-NEXT: .Lpcsection657: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection613: +; O1-NEXT: .Lpcsection658: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection614: +; O1-NEXT: .Lpcsection659: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection615: +; O1-NEXT: .Lpcsection660: ; O1-NEXT: xorl %edx, %edx -; O1-NEXT: .Lpcsection616: +; O1-NEXT: .Lpcsection661: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection617: +; O1-NEXT: .Lpcsection662: ; O1-NEXT: lock cmpxchg16b (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: popq %rbx @@ -15899,31 +16034,31 @@ define void @atomic128_cas_release(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection605: +; O2-NEXT: .Lpcsection650: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection606: +; O2-NEXT: .Lpcsection651: ; O2-NEXT: movl $1, %ebx -; O2-NEXT: .Lpcsection607: +; O2-NEXT: .Lpcsection652: ; O2-NEXT: xorl %edx, %edx -; O2-NEXT: .Lpcsection608: +; O2-NEXT: .Lpcsection653: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection609: +; O2-NEXT: .Lpcsection654: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection610: +; O2-NEXT: .Lpcsection655: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection611: +; O2-NEXT: .Lpcsection656: ; O2-NEXT: xorl %edx, %edx -; O2-NEXT: .Lpcsection612: +; O2-NEXT: .Lpcsection657: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection613: +; O2-NEXT: .Lpcsection658: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection614: +; O2-NEXT: .Lpcsection659: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection615: +; O2-NEXT: .Lpcsection660: ; O2-NEXT: xorl %edx, %edx -; O2-NEXT: .Lpcsection616: +; O2-NEXT: .Lpcsection661: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection617: +; O2-NEXT: .Lpcsection662: ; O2-NEXT: lock cmpxchg16b (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: popq %rbx @@ -15936,31 +16071,31 @@ define void @atomic128_cas_release(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection605: +; O3-NEXT: .Lpcsection650: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection606: +; O3-NEXT: .Lpcsection651: ; O3-NEXT: movl $1, %ebx -; O3-NEXT: .Lpcsection607: +; O3-NEXT: .Lpcsection652: ; O3-NEXT: xorl %edx, %edx -; O3-NEXT: .Lpcsection608: +; O3-NEXT: .Lpcsection653: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection609: +; O3-NEXT: .Lpcsection654: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection610: +; O3-NEXT: .Lpcsection655: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection611: +; O3-NEXT: .Lpcsection656: ; O3-NEXT: xorl %edx, %edx -; O3-NEXT: .Lpcsection612: +; O3-NEXT: .Lpcsection657: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection613: +; O3-NEXT: .Lpcsection658: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection614: +; O3-NEXT: .Lpcsection659: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection615: +; O3-NEXT: .Lpcsection660: ; O3-NEXT: xorl %edx, %edx -; O3-NEXT: .Lpcsection616: +; O3-NEXT: .Lpcsection661: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection617: +; O3-NEXT: .Lpcsection662: ; O3-NEXT: lock cmpxchg16b (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: popq %rbx @@ -16022,31 +16157,31 @@ define void @atomic128_cas_acq_rel(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection618: +; O1-NEXT: .Lpcsection663: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection619: +; O1-NEXT: .Lpcsection664: ; O1-NEXT: movl $1, %ebx -; O1-NEXT: .Lpcsection620: +; O1-NEXT: .Lpcsection665: ; O1-NEXT: xorl %edx, %edx -; O1-NEXT: .Lpcsection621: +; O1-NEXT: .Lpcsection666: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection622: +; O1-NEXT: .Lpcsection667: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection623: +; O1-NEXT: .Lpcsection668: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection624: +; O1-NEXT: .Lpcsection669: ; O1-NEXT: xorl %edx, %edx -; O1-NEXT: .Lpcsection625: +; O1-NEXT: .Lpcsection670: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection626: +; O1-NEXT: .Lpcsection671: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection627: +; O1-NEXT: .Lpcsection672: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection628: +; O1-NEXT: .Lpcsection673: ; O1-NEXT: xorl %edx, %edx -; O1-NEXT: .Lpcsection629: +; O1-NEXT: .Lpcsection674: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection630: +; O1-NEXT: .Lpcsection675: ; O1-NEXT: lock cmpxchg16b (%rdi) ; O1-NEXT: movq $1, foo(%rip) ; O1-NEXT: popq %rbx @@ -16059,31 +16194,31 @@ define void @atomic128_cas_acq_rel(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection618: +; O2-NEXT: .Lpcsection663: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection619: +; O2-NEXT: .Lpcsection664: ; O2-NEXT: movl $1, %ebx -; O2-NEXT: .Lpcsection620: +; O2-NEXT: .Lpcsection665: ; O2-NEXT: xorl %edx, %edx -; O2-NEXT: .Lpcsection621: +; O2-NEXT: .Lpcsection666: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection622: +; O2-NEXT: .Lpcsection667: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection623: +; O2-NEXT: .Lpcsection668: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection624: +; O2-NEXT: .Lpcsection669: ; O2-NEXT: xorl %edx, %edx -; O2-NEXT: .Lpcsection625: +; O2-NEXT: .Lpcsection670: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection626: +; O2-NEXT: .Lpcsection671: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection627: +; O2-NEXT: .Lpcsection672: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection628: +; O2-NEXT: .Lpcsection673: ; O2-NEXT: xorl %edx, %edx -; O2-NEXT: .Lpcsection629: +; O2-NEXT: .Lpcsection674: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection630: +; O2-NEXT: .Lpcsection675: ; O2-NEXT: lock cmpxchg16b (%rdi) ; O2-NEXT: movq $1, foo(%rip) ; O2-NEXT: popq %rbx @@ -16096,31 +16231,31 @@ define void @atomic128_cas_acq_rel(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection618: +; O3-NEXT: .Lpcsection663: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection619: +; O3-NEXT: .Lpcsection664: ; O3-NEXT: movl $1, %ebx -; O3-NEXT: .Lpcsection620: +; O3-NEXT: .Lpcsection665: ; O3-NEXT: xorl %edx, %edx -; O3-NEXT: .Lpcsection621: +; O3-NEXT: .Lpcsection666: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection622: +; O3-NEXT: .Lpcsection667: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection623: +; O3-NEXT: .Lpcsection668: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection624: +; O3-NEXT: .Lpcsection669: ; O3-NEXT: xorl %edx, %edx -; O3-NEXT: .Lpcsection625: +; O3-NEXT: .Lpcsection670: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection626: +; O3-NEXT: .Lpcsection671: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection627: +; O3-NEXT: .Lpcsection672: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection628: +; O3-NEXT: .Lpcsection673: ; O3-NEXT: xorl %edx, %edx -; O3-NEXT: .Lpcsection629: +; O3-NEXT: .Lpcsection674: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection630: +; O3-NEXT: .Lpcsection675: ; O3-NEXT: lock cmpxchg16b (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: popq %rbx @@ -16182,31 +16317,31 @@ define void @atomic128_cas_seq_cst(ptr %a) { ; O1-NEXT: .cfi_def_cfa_offset 16 ; O1-NEXT: .cfi_offset %rbx, -16 ; O1-NEXT: movq foo(%rip), %rax -; O1-NEXT: .Lpcsection631: +; O1-NEXT: .Lpcsection676: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection632: +; O1-NEXT: .Lpcsection677: ; O1-NEXT: movl $1, %ebx -; O1-NEXT: .Lpcsection633: +; O1-NEXT: .Lpcsection678: ; O1-NEXT: xorl %edx, %edx -; O1-NEXT: .Lpcsection634: +; O1-NEXT: .Lpcsection679: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection635: +; O1-NEXT: .Lpcsection680: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection636: +; O1-NEXT: .Lpcsection681: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection637: +; O1-NEXT: .Lpcsection682: ; O1-NEXT: xorl %edx, %edx -; O1-NEXT: .Lpcsection638: +; O1-NEXT: .Lpcsection683: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection639: +; O1-NEXT: .Lpcsection684: ; O1-NEXT: lock cmpxchg16b (%rdi) -; O1-NEXT: .Lpcsection640: +; O1-NEXT: .Lpcsection685: ; O1-NEXT: movl $42, %eax -; O1-NEXT: .Lpcsection641: +; O1-NEXT: .Lpcsection686: ; O1-NEXT: xorl %edx, %edx -; O1-NEXT: .Lpcsection642: +; O1-NEXT: .Lpcsection687: ; O1-NEXT: xorl %ecx, %ecx -; O1-NEXT: .Lpcsection643: +; O1-NEXT: .Lpcsection688: ; O1-NEXT: lock cmpxchg16b (%rdi) ; O1-NEXT: movq $3, foo(%rip) ; O1-NEXT: popq %rbx @@ -16219,31 +16354,31 @@ define void @atomic128_cas_seq_cst(ptr %a) { ; O2-NEXT: .cfi_def_cfa_offset 16 ; O2-NEXT: .cfi_offset %rbx, -16 ; O2-NEXT: movq foo(%rip), %rax -; O2-NEXT: .Lpcsection631: +; O2-NEXT: .Lpcsection676: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection632: +; O2-NEXT: .Lpcsection677: ; O2-NEXT: movl $1, %ebx -; O2-NEXT: .Lpcsection633: +; O2-NEXT: .Lpcsection678: ; O2-NEXT: xorl %edx, %edx -; O2-NEXT: .Lpcsection634: +; O2-NEXT: .Lpcsection679: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection635: +; O2-NEXT: .Lpcsection680: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection636: +; O2-NEXT: .Lpcsection681: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection637: +; O2-NEXT: .Lpcsection682: ; O2-NEXT: xorl %edx, %edx -; O2-NEXT: .Lpcsection638: +; O2-NEXT: .Lpcsection683: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection639: +; O2-NEXT: .Lpcsection684: ; O2-NEXT: lock cmpxchg16b (%rdi) -; O2-NEXT: .Lpcsection640: +; O2-NEXT: .Lpcsection685: ; O2-NEXT: movl $42, %eax -; O2-NEXT: .Lpcsection641: +; O2-NEXT: .Lpcsection686: ; O2-NEXT: xorl %edx, %edx -; O2-NEXT: .Lpcsection642: +; O2-NEXT: .Lpcsection687: ; O2-NEXT: xorl %ecx, %ecx -; O2-NEXT: .Lpcsection643: +; O2-NEXT: .Lpcsection688: ; O2-NEXT: lock cmpxchg16b (%rdi) ; O2-NEXT: movq $3, foo(%rip) ; O2-NEXT: popq %rbx @@ -16256,31 +16391,31 @@ define void @atomic128_cas_seq_cst(ptr %a) { ; O3-NEXT: .cfi_def_cfa_offset 16 ; O3-NEXT: .cfi_offset %rbx, -16 ; O3-NEXT: movq foo(%rip), %rax -; O3-NEXT: .Lpcsection631: +; O3-NEXT: .Lpcsection676: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection632: +; O3-NEXT: .Lpcsection677: ; O3-NEXT: movl $1, %ebx -; O3-NEXT: .Lpcsection633: +; O3-NEXT: .Lpcsection678: ; O3-NEXT: xorl %edx, %edx -; O3-NEXT: .Lpcsection634: +; O3-NEXT: .Lpcsection679: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection635: +; O3-NEXT: .Lpcsection680: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection636: +; O3-NEXT: .Lpcsection681: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection637: +; O3-NEXT: .Lpcsection682: ; O3-NEXT: xorl %edx, %edx -; O3-NEXT: .Lpcsection638: +; O3-NEXT: .Lpcsection683: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection639: +; O3-NEXT: .Lpcsection684: ; O3-NEXT: lock cmpxchg16b (%rdi) -; O3-NEXT: .Lpcsection640: +; O3-NEXT: .Lpcsection685: ; O3-NEXT: movl $42, %eax -; O3-NEXT: .Lpcsection641: +; O3-NEXT: .Lpcsection686: ; O3-NEXT: xorl %edx, %edx -; O3-NEXT: .Lpcsection642: +; O3-NEXT: .Lpcsection687: ; O3-NEXT: xorl %ecx, %ecx -; O3-NEXT: .Lpcsection643: +; O3-NEXT: .Lpcsection688: ; O3-NEXT: lock cmpxchg16b (%rdi) ; O3-NEXT: movq $3, foo(%rip) ; O3-NEXT: popq %rbx diff --git a/llvm/test/CodeGen/X86/physreg-pairs.ll b/llvm/test/CodeGen/X86/physreg-pairs.ll index 5e1d430311a64..07ee803709caa 100644 --- a/llvm/test/CodeGen/X86/physreg-pairs.ll +++ b/llvm/test/CodeGen/X86/physreg-pairs.ll @@ -145,8 +145,8 @@ define dso_local i64 @test_ebp(i64 %in) local_unnamed_addr nounwind { ; CHECK-LABEL: test_ebp: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushl %ebp -; CHECK-NEXT: movl $19088743, %esp # imm = 0x1234567 ; CHECK-NEXT: movl $-1985229329, %ebp # imm = 0x89ABCDEF +; CHECK-NEXT: movl $19088743, %esp # imm = 0x1234567 ; CHECK-NEXT: #APP ; CHECK-NEXT: movl %ebp, %eax ; CHECK-NEXT: #NO_APP diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll index 5ed14ab6e0b97..a0879ad930a30 100644 --- a/llvm/test/CodeGen/X86/popcnt.ll +++ b/llvm/test/CodeGen/X86/popcnt.ll @@ -1044,12 +1044,11 @@ define i32 @cnt32_pgso(i32 %x) nounwind readnone !prof !14 { ; X86-NEXT: shrl %ecx ; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: movl $858993459, %ecx # imm = 0x33333333 -; X86-NEXT: movl %eax, %edx -; X86-NEXT: andl %ecx, %edx +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 ; X86-NEXT: shrl $2, %eax -; X86-NEXT: andl %ecx, %eax -; X86-NEXT: addl %edx, %eax +; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shrl $4, %ecx ; X86-NEXT: addl %eax, %ecx @@ -1064,12 +1063,11 @@ define i32 @cnt32_pgso(i32 %x) nounwind readnone !prof !14 { ; X64-NEXT: shrl %eax ; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555 ; X64-NEXT: subl %eax, %edi -; X64-NEXT: movl $858993459, %eax # imm = 0x33333333 -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: andl %eax, %ecx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 ; X64-NEXT: shrl $2, %edi -; X64-NEXT: andl %eax, %edi -; X64-NEXT: addl %ecx, %edi +; X64-NEXT: andl $858993459, %edi # imm = 0x33333333 +; X64-NEXT: addl %eax, %edi ; X64-NEXT: movl %edi, %eax ; X64-NEXT: shrl $4, %eax ; X64-NEXT: addl %edi, %eax @@ -1094,49 +1092,40 @@ define i32 @cnt32_pgso(i32 %x) nounwind readnone !prof !14 { define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 { ; X86-NOSSE-LABEL: cnt64_pgso: ; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl %ebx -; X86-NOSSE-NEXT: pushl %edi -; X86-NOSSE-NEXT: pushl %esi ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOSSE-NEXT: movl %esi, %ecx -; X86-NOSSE-NEXT: shrl %ecx -; X86-NOSSE-NEXT: movl $1431655765, %edx # imm = 0x55555555 -; X86-NOSSE-NEXT: andl %edx, %ecx -; X86-NOSSE-NEXT: subl %ecx, %esi -; X86-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333 -; X86-NOSSE-NEXT: movl %esi, %edi -; X86-NOSSE-NEXT: andl %ecx, %edi -; X86-NOSSE-NEXT: shrl $2, %esi -; X86-NOSSE-NEXT: andl %ecx, %esi -; X86-NOSSE-NEXT: addl %edi, %esi -; X86-NOSSE-NEXT: movl %esi, %ebx -; X86-NOSSE-NEXT: shrl $4, %ebx -; X86-NOSSE-NEXT: addl %esi, %ebx -; X86-NOSSE-NEXT: movl $252645135, %edi # imm = 0xF0F0F0F -; X86-NOSSE-NEXT: andl %edi, %ebx -; X86-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101 -; X86-NOSSE-NEXT: shrl $24, %esi -; X86-NOSSE-NEXT: movl %eax, %ebx -; X86-NOSSE-NEXT: shrl %ebx -; X86-NOSSE-NEXT: andl %edx, %ebx -; X86-NOSSE-NEXT: subl %ebx, %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, %edx +; X86-NOSSE-NEXT: shrl %edx +; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NOSSE-NEXT: subl %edx, %ecx +; X86-NOSSE-NEXT: movl %ecx, %edx +; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NOSSE-NEXT: shrl $2, %ecx +; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NOSSE-NEXT: addl %edx, %ecx +; X86-NOSSE-NEXT: movl %ecx, %edx +; X86-NOSSE-NEXT: shrl $4, %edx +; X86-NOSSE-NEXT: addl %ecx, %edx +; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NOSSE-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101 +; X86-NOSSE-NEXT: shrl $24, %ecx ; X86-NOSSE-NEXT: movl %eax, %edx -; X86-NOSSE-NEXT: andl %ecx, %edx +; X86-NOSSE-NEXT: shrl %edx +; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NOSSE-NEXT: subl %edx, %eax +; X86-NOSSE-NEXT: movl %eax, %edx +; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 ; X86-NOSSE-NEXT: shrl $2, %eax -; X86-NOSSE-NEXT: andl %ecx, %eax +; X86-NOSSE-NEXT: andl $858993459, %eax # imm = 0x33333333 ; X86-NOSSE-NEXT: addl %edx, %eax -; X86-NOSSE-NEXT: movl %eax, %ecx -; X86-NOSSE-NEXT: shrl $4, %ecx -; X86-NOSSE-NEXT: addl %eax, %ecx -; X86-NOSSE-NEXT: andl %edi, %ecx -; X86-NOSSE-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101 +; X86-NOSSE-NEXT: movl %eax, %edx +; X86-NOSSE-NEXT: shrl $4, %edx +; X86-NOSSE-NEXT: addl %eax, %edx +; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101 ; X86-NOSSE-NEXT: shrl $24, %eax -; X86-NOSSE-NEXT: addl %esi, %eax +; X86-NOSSE-NEXT: addl %ecx, %eax ; X86-NOSSE-NEXT: xorl %edx, %edx -; X86-NOSSE-NEXT: popl %esi -; X86-NOSSE-NEXT: popl %edi -; X86-NOSSE-NEXT: popl %ebx ; X86-NOSSE-NEXT: retl ; ; X64-LABEL: cnt64_pgso: @@ -1223,92 +1212,85 @@ define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 { define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 { ; X86-NOSSE-LABEL: cnt128_pgso: ; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: pushl %ebx ; X86-NOSSE-NEXT: pushl %edi ; X86-NOSSE-NEXT: pushl %esi +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NOSSE-NEXT: movl %ebx, %ecx -; X86-NOSSE-NEXT: shrl %ecx -; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555 -; X86-NOSSE-NEXT: andl %edi, %ecx -; X86-NOSSE-NEXT: subl %ecx, %ebx -; X86-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333 -; X86-NOSSE-NEXT: movl %ebx, %ebp -; X86-NOSSE-NEXT: andl %ecx, %ebp -; X86-NOSSE-NEXT: shrl $2, %ebx -; X86-NOSSE-NEXT: andl %ecx, %ebx -; X86-NOSSE-NEXT: addl %ebp, %ebx -; X86-NOSSE-NEXT: movl %ebx, %ebp -; X86-NOSSE-NEXT: shrl $4, %ebp -; X86-NOSSE-NEXT: addl %ebx, %ebp -; X86-NOSSE-NEXT: movl %eax, %ebx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOSSE-NEXT: movl %edi, %ebx ; X86-NOSSE-NEXT: shrl %ebx -; X86-NOSSE-NEXT: andl %edi, %ebx -; X86-NOSSE-NEXT: subl %ebx, %eax -; X86-NOSSE-NEXT: movl %eax, %ebx -; X86-NOSSE-NEXT: andl %ecx, %ebx -; X86-NOSSE-NEXT: shrl $2, %eax -; X86-NOSSE-NEXT: andl %ecx, %eax -; X86-NOSSE-NEXT: addl %ebx, %eax -; X86-NOSSE-NEXT: movl %eax, %edi -; X86-NOSSE-NEXT: shrl $4, %edi -; X86-NOSSE-NEXT: addl %eax, %edi -; X86-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F -; X86-NOSSE-NEXT: andl %ebx, %ebp -; X86-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101 -; X86-NOSSE-NEXT: shrl $24, %eax -; X86-NOSSE-NEXT: andl %ebx, %edi -; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101 +; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555 +; X86-NOSSE-NEXT: subl %ebx, %edi +; X86-NOSSE-NEXT: movl %edi, %ebx +; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333 +; X86-NOSSE-NEXT: shrl $2, %edi +; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 +; X86-NOSSE-NEXT: addl %ebx, %edi +; X86-NOSSE-NEXT: movl %edi, %ebx +; X86-NOSSE-NEXT: shrl $4, %ebx +; X86-NOSSE-NEXT: addl %edi, %ebx +; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F +; X86-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101 ; X86-NOSSE-NEXT: shrl $24, %edi -; X86-NOSSE-NEXT: addl %eax, %edi -; X86-NOSSE-NEXT: movl %esi, %eax -; X86-NOSSE-NEXT: shrl %eax -; X86-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555 -; X86-NOSSE-NEXT: andl %ebp, %eax -; X86-NOSSE-NEXT: subl %eax, %esi -; X86-NOSSE-NEXT: movl %esi, %eax -; X86-NOSSE-NEXT: andl %ecx, %eax +; X86-NOSSE-NEXT: movl %esi, %ebx +; X86-NOSSE-NEXT: shrl %ebx +; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555 +; X86-NOSSE-NEXT: subl %ebx, %esi +; X86-NOSSE-NEXT: movl %esi, %ebx +; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333 ; X86-NOSSE-NEXT: shrl $2, %esi -; X86-NOSSE-NEXT: andl %ecx, %esi -; X86-NOSSE-NEXT: addl %eax, %esi -; X86-NOSSE-NEXT: movl %esi, %ebp -; X86-NOSSE-NEXT: shrl $4, %ebp -; X86-NOSSE-NEXT: addl %esi, %ebp -; X86-NOSSE-NEXT: movl %edx, %eax -; X86-NOSSE-NEXT: shrl %eax -; X86-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555 -; X86-NOSSE-NEXT: andl %esi, %eax -; X86-NOSSE-NEXT: subl %eax, %edx -; X86-NOSSE-NEXT: movl %edx, %eax -; X86-NOSSE-NEXT: andl %ecx, %eax +; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333 +; X86-NOSSE-NEXT: addl %ebx, %esi +; X86-NOSSE-NEXT: movl %esi, %ebx +; X86-NOSSE-NEXT: shrl $4, %ebx +; X86-NOSSE-NEXT: addl %esi, %ebx +; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F +; X86-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101 +; X86-NOSSE-NEXT: shrl $24, %esi +; X86-NOSSE-NEXT: addl %edi, %esi +; X86-NOSSE-NEXT: movl %edx, %edi +; X86-NOSSE-NEXT: shrl %edi +; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555 +; X86-NOSSE-NEXT: subl %edi, %edx +; X86-NOSSE-NEXT: movl %edx, %edi +; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 ; X86-NOSSE-NEXT: shrl $2, %edx -; X86-NOSSE-NEXT: andl %ecx, %edx -; X86-NOSSE-NEXT: addl %eax, %edx -; X86-NOSSE-NEXT: movl %edx, %eax -; X86-NOSSE-NEXT: shrl $4, %eax -; X86-NOSSE-NEXT: addl %edx, %eax -; X86-NOSSE-NEXT: andl %ebx, %ebp -; X86-NOSSE-NEXT: andl %ebx, %eax -; X86-NOSSE-NEXT: imull $16843009, %ebp, %ecx # imm = 0x1010101 -; X86-NOSSE-NEXT: shrl $24, %ecx -; X86-NOSSE-NEXT: imull $16843009, %eax, %edx # imm = 0x1010101 -; X86-NOSSE-NEXT: shrl $24, %edx -; X86-NOSSE-NEXT: addl %ecx, %edx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 ; X86-NOSSE-NEXT: addl %edi, %edx -; X86-NOSSE-NEXT: xorl %ecx, %ecx -; X86-NOSSE-NEXT: movl %ecx, 12(%eax) -; X86-NOSSE-NEXT: movl %ecx, 8(%eax) -; X86-NOSSE-NEXT: movl %ecx, 4(%eax) -; X86-NOSSE-NEXT: movl %edx, (%eax) +; X86-NOSSE-NEXT: movl %edx, %edi +; X86-NOSSE-NEXT: shrl $4, %edi +; X86-NOSSE-NEXT: addl %edx, %edi +; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; X86-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101 +; X86-NOSSE-NEXT: shrl $24, %edx +; X86-NOSSE-NEXT: movl %ecx, %edi +; X86-NOSSE-NEXT: shrl %edi +; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555 +; X86-NOSSE-NEXT: subl %edi, %ecx +; X86-NOSSE-NEXT: movl %ecx, %edi +; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 +; X86-NOSSE-NEXT: shrl $2, %ecx +; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NOSSE-NEXT: addl %edi, %ecx +; X86-NOSSE-NEXT: movl %ecx, %edi +; X86-NOSSE-NEXT: shrl $4, %edi +; X86-NOSSE-NEXT: addl %ecx, %edi +; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; X86-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101 +; X86-NOSSE-NEXT: shrl $24, %ecx +; X86-NOSSE-NEXT: addl %edx, %ecx +; X86-NOSSE-NEXT: addl %esi, %ecx +; X86-NOSSE-NEXT: xorl %edx, %edx +; X86-NOSSE-NEXT: movl %edx, 12(%eax) +; X86-NOSSE-NEXT: movl %edx, 8(%eax) +; X86-NOSSE-NEXT: movl %edx, 4(%eax) +; X86-NOSSE-NEXT: movl %ecx, (%eax) ; X86-NOSSE-NEXT: popl %esi ; X86-NOSSE-NEXT: popl %edi ; X86-NOSSE-NEXT: popl %ebx -; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl $4 ; ; X64-LABEL: cnt128_pgso: diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll index e9448a800fd95..c0bfb71e189cd 100644 --- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -81,8 +81,8 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) { ; CHECK-NEXT: imulq $1040, %rdx, %rax ## imm = 0x410 ; CHECK-NEXT: movq _syBuf@GOTPCREL(%rip), %rcx ; CHECK-NEXT: leaq 8(%rcx,%rax), %rdx -; CHECK-NEXT: movl $1, %r13d ; CHECK-NEXT: movq _syCTRO@GOTPCREL(%rip), %rax +; CHECK-NEXT: movl $1, %r13d ; CHECK-NEXT: movb $1, %cl ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_9: ## %do.body diff --git a/llvm/test/CodeGen/X86/remat-phys-dead.ll b/llvm/test/CodeGen/X86/remat-phys-dead.ll index 09f2e4320b6d3..9a0a219869353 100644 --- a/llvm/test/CodeGen/X86/remat-phys-dead.ll +++ b/llvm/test/CodeGen/X86/remat-phys-dead.ll @@ -18,6 +18,6 @@ define i8 @test_remat() { define i32 @test_remat32() { ret i32 0 ; CHECK: REGISTER COALESCER -; CHECK: Remat: $eax = MOV32r0 implicit-def dead $eflags +; CHECK: $eax = MOV32r0 implicit-def dead $eflags } diff --git a/llvm/test/CodeGen/X86/select_const_i128.ll b/llvm/test/CodeGen/X86/select_const_i128.ll index af38bd6ce9e3e..503cf13ec4131 100644 --- a/llvm/test/CodeGen/X86/select_const_i128.ll +++ b/llvm/test/CodeGen/X86/select_const_i128.ll @@ -9,10 +9,9 @@ define i128 @select_eq_i128(ptr %a) { ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ptest %xmm0, %xmm0 ; CHECK-NEXT: setne %al -; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: addq $-1, %rax ; CHECK-NEXT: movabsq $9223372036854775807, %rdx # imm = 0x7FFFFFFFFFFFFFFF -; CHECK-NEXT: adcq %rcx, %rdx +; CHECK-NEXT: adcq $0, %rdx ; CHECK-NEXT: retq %1 = load i128, ptr %a, align 16 %cmp = icmp eq i128 %1, 1 diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll index 524ecf2aece7e..3d80c1554b6c3 100644 --- a/llvm/test/CodeGen/X86/shrink_vmul.ll +++ b/llvm/test/CodeGen/X86/shrink_vmul.ll @@ -1986,29 +1986,29 @@ define void @PR34947(ptr %p0, ptr %p1) nounwind { ; X86-SSE-NEXT: movdqa (%eax), %xmm3 ; X86-SSE-NEXT: movdqa (%ecx), %xmm0 ; X86-SSE-NEXT: movdqa 16(%ecx), %xmm1 -; X86-SSE-NEXT: pxor %xmm4, %xmm4 +; X86-SSE-NEXT: pxor %xmm5, %xmm5 ; X86-SSE-NEXT: movdqa %xmm3, %xmm2 ; X86-SSE-NEXT: pextrw $7, %xmm3, %eax ; X86-SSE-NEXT: pextrw $4, %xmm3, %edi ; X86-SSE-NEXT: pextrw $0, %xmm3, %ebp ; X86-SSE-NEXT: pextrw $1, %xmm3, %esi ; X86-SSE-NEXT: pextrw $3, %xmm3, %ebx -; X86-SSE-NEXT: movdqa %xmm3, %xmm5 -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] -; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] +; X86-SSE-NEXT: movdqa %xmm3, %xmm4 +; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3] +; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7] ; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] ; X86-SSE-NEXT: movd %xmm3, %ecx ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl %ecx ; X86-SSE-NEXT: movd %edx, %xmm3 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,2,3] -; X86-SSE-NEXT: movd %xmm4, %eax -; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3] -; X86-SSE-NEXT: movd %xmm4, %ecx +; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,2,3] +; X86-SSE-NEXT: movd %xmm5, %eax +; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,2,3] +; X86-SSE-NEXT: movd %xmm5, %ecx ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl %ecx -; X86-SSE-NEXT: movd %edx, %xmm4 -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; X86-SSE-NEXT: movd %edx, %xmm5 +; X86-SSE-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] ; X86-SSE-NEXT: movl %edi, %eax ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -2022,7 +2022,7 @@ define void @PR34947(ptr %p0, ptr %p1) nounwind { ; X86-SSE-NEXT: divl %ecx ; X86-SSE-NEXT: movd %edx, %xmm1 ; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] -; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0] ; X86-SSE-NEXT: movl %ebp, %eax ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl (%edi) @@ -2040,7 +2040,7 @@ define void @PR34947(ptr %p0, ptr %p1) nounwind { ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl %ecx ; X86-SSE-NEXT: movd %edx, %xmm2 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[2,3,2,3] +; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3] ; X86-SSE-NEXT: movd %xmm4, %eax ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X86-SSE-NEXT: movd %xmm0, %ecx @@ -2207,29 +2207,29 @@ define void @PR34947(ptr %p0, ptr %p1) nounwind { ; X64-SSE-NEXT: movdqa (%rdi), %xmm3 ; X64-SSE-NEXT: movdqa (%rsi), %xmm0 ; X64-SSE-NEXT: movdqa 16(%rsi), %xmm1 -; X64-SSE-NEXT: pxor %xmm4, %xmm4 +; X64-SSE-NEXT: pxor %xmm5, %xmm5 ; X64-SSE-NEXT: movdqa %xmm3, %xmm2 ; X64-SSE-NEXT: pextrw $7, %xmm3, %eax ; X64-SSE-NEXT: pextrw $4, %xmm3, %r8d ; X64-SSE-NEXT: pextrw $0, %xmm3, %r10d ; X64-SSE-NEXT: pextrw $1, %xmm3, %edi ; X64-SSE-NEXT: pextrw $3, %xmm3, %r9d -; X64-SSE-NEXT: movdqa %xmm3, %xmm5 -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] -; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] +; X64-SSE-NEXT: movdqa %xmm3, %xmm4 +; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3] +; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7] ; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] ; X64-SSE-NEXT: movd %xmm3, %r11d ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl %r11d ; X64-SSE-NEXT: movd %edx, %xmm3 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,2,3] -; X64-SSE-NEXT: movd %xmm4, %eax -; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3] -; X64-SSE-NEXT: movd %xmm4, %r11d +; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,2,3] +; X64-SSE-NEXT: movd %xmm5, %eax +; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,2,3] +; X64-SSE-NEXT: movd %xmm5, %r11d ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl %r11d -; X64-SSE-NEXT: movd %edx, %xmm4 -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; X64-SSE-NEXT: movd %edx, %xmm5 +; X64-SSE-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] ; X64-SSE-NEXT: movl %r8d, %eax ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl 16(%rsi) @@ -2242,7 +2242,7 @@ define void @PR34947(ptr %p0, ptr %p1) nounwind { ; X64-SSE-NEXT: divl %r8d ; X64-SSE-NEXT: movd %edx, %xmm1 ; X64-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0] ; X64-SSE-NEXT: movl %r10d, %eax ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl (%rsi) @@ -2260,7 +2260,7 @@ define void @PR34947(ptr %p0, ptr %p1) nounwind { ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl %edi ; X64-SSE-NEXT: movd %edx, %xmm2 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[2,3,2,3] +; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3] ; X64-SSE-NEXT: movd %xmm4, %eax ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X64-SSE-NEXT: movd %xmm0, %edi diff --git a/llvm/test/CodeGen/X86/speculative-load-hardening-call-and-ret.ll b/llvm/test/CodeGen/X86/speculative-load-hardening-call-and-ret.ll index 3f2226aec2d3f..064812323d177 100644 --- a/llvm/test/CodeGen/X86/speculative-load-hardening-call-and-ret.ll +++ b/llvm/test/CodeGen/X86/speculative-load-hardening-call-and-ret.ll @@ -283,15 +283,14 @@ define i32 @test_call_setjmp(ptr%ptr) nounwind { ; X64-NOPIC-NEXT: pushq %rbp ; X64-NOPIC-NEXT: pushq %r15 ; X64-NOPIC-NEXT: pushq %r14 -; X64-NOPIC-NEXT: pushq %r13 ; X64-NOPIC-NEXT: pushq %r12 ; X64-NOPIC-NEXT: pushq %rbx -; X64-NOPIC-NEXT: subq $24, %rsp +; X64-NOPIC-NEXT: subq $16, %rsp ; X64-NOPIC-NEXT: movq %rsp, %rax ; X64-NOPIC-NEXT: movq %rdi, %rbx ; X64-NOPIC-NEXT: movq $-1, %r15 ; X64-NOPIC-NEXT: sarq $63, %rax -; X64-NOPIC-NEXT: leaq {{[0-9]+}}(%rsp), %r14 +; X64-NOPIC-NEXT: movq %rsp, %r14 ; X64-NOPIC-NEXT: shlq $47, %rax ; X64-NOPIC-NEXT: movq %r14, %rdi ; X64-NOPIC-NEXT: orq %rax, %rsp @@ -302,24 +301,23 @@ define i32 @test_call_setjmp(ptr%ptr) nounwind { ; X64-NOPIC-NEXT: sarq $63, %rax ; X64-NOPIC-NEXT: cmpq $.Lslh_ret_addr4, %r12 ; X64-NOPIC-NEXT: cmovneq %r15, %rax -; X64-NOPIC-NEXT: movl (%rbx), %r12d -; X64-NOPIC-NEXT: movl $42, %ebp +; X64-NOPIC-NEXT: movl (%rbx), %ebp ; X64-NOPIC-NEXT: shlq $47, %rax ; X64-NOPIC-NEXT: movq %r14, %rdi -; X64-NOPIC-NEXT: movl %ebp, %esi +; X64-NOPIC-NEXT: movl $42, %esi ; X64-NOPIC-NEXT: orq %rax, %rsp -; X64-NOPIC-NEXT: movq $.Lslh_ret_addr5, %r13 +; X64-NOPIC-NEXT: movq $.Lslh_ret_addr5, %r12 ; X64-NOPIC-NEXT: callq sigsetjmp@PLT ; X64-NOPIC-NEXT: .Lslh_ret_addr5: ; X64-NOPIC-NEXT: movq %rsp, %rax ; X64-NOPIC-NEXT: sarq $63, %rax -; X64-NOPIC-NEXT: cmpq $.Lslh_ret_addr5, %r13 +; X64-NOPIC-NEXT: cmpq $.Lslh_ret_addr5, %r12 ; X64-NOPIC-NEXT: cmovneq %r15, %rax -; X64-NOPIC-NEXT: addl (%rbx), %r12d +; X64-NOPIC-NEXT: addl (%rbx), %ebp ; X64-NOPIC-NEXT: shlq $47, %rax ; X64-NOPIC-NEXT: movq %r14, %rdi ; X64-NOPIC-NEXT: movq %r14, %rsi -; X64-NOPIC-NEXT: movl %ebp, %edx +; X64-NOPIC-NEXT: movl $42, %edx ; X64-NOPIC-NEXT: orq %rax, %rsp ; X64-NOPIC-NEXT: movq $.Lslh_ret_addr6, %r14 ; X64-NOPIC-NEXT: callq __sigsetjmp@PLT @@ -329,15 +327,14 @@ define i32 @test_call_setjmp(ptr%ptr) nounwind { ; X64-NOPIC-NEXT: cmpq $.Lslh_ret_addr6, %r14 ; X64-NOPIC-NEXT: movq %rax, %rcx ; X64-NOPIC-NEXT: cmovneq %r15, %rcx -; X64-NOPIC-NEXT: addl (%rbx), %r12d -; X64-NOPIC-NEXT: movl %r12d, %eax +; X64-NOPIC-NEXT: addl (%rbx), %ebp +; X64-NOPIC-NEXT: movl %ebp, %eax ; X64-NOPIC-NEXT: orl %ecx, %eax ; X64-NOPIC-NEXT: shlq $47, %rcx ; X64-NOPIC-NEXT: orq %rcx, %rsp -; X64-NOPIC-NEXT: addq $24, %rsp +; X64-NOPIC-NEXT: addq $16, %rsp ; X64-NOPIC-NEXT: popq %rbx ; X64-NOPIC-NEXT: popq %r12 -; X64-NOPIC-NEXT: popq %r13 ; X64-NOPIC-NEXT: popq %r14 ; X64-NOPIC-NEXT: popq %r15 ; X64-NOPIC-NEXT: popq %rbp @@ -348,15 +345,14 @@ define i32 @test_call_setjmp(ptr%ptr) nounwind { ; X64-NOPIC-MCM-NEXT: pushq %rbp ; X64-NOPIC-MCM-NEXT: pushq %r15 ; X64-NOPIC-MCM-NEXT: pushq %r14 -; X64-NOPIC-MCM-NEXT: pushq %r13 ; X64-NOPIC-MCM-NEXT: pushq %r12 ; X64-NOPIC-MCM-NEXT: pushq %rbx -; X64-NOPIC-MCM-NEXT: subq $24, %rsp +; X64-NOPIC-MCM-NEXT: subq $16, %rsp ; X64-NOPIC-MCM-NEXT: movq %rsp, %rax ; X64-NOPIC-MCM-NEXT: movq %rdi, %rbx ; X64-NOPIC-MCM-NEXT: movq $-1, %r15 ; X64-NOPIC-MCM-NEXT: sarq $63, %rax -; X64-NOPIC-MCM-NEXT: leaq {{[0-9]+}}(%rsp), %r14 +; X64-NOPIC-MCM-NEXT: movq %rsp, %r14 ; X64-NOPIC-MCM-NEXT: shlq $47, %rax ; X64-NOPIC-MCM-NEXT: movq %r14, %rdi ; X64-NOPIC-MCM-NEXT: orq %rax, %rsp @@ -368,25 +364,24 @@ define i32 @test_call_setjmp(ptr%ptr) nounwind { ; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr4(%rip), %rcx ; X64-NOPIC-MCM-NEXT: cmpq %rcx, %r12 ; X64-NOPIC-MCM-NEXT: cmovneq %r15, %rax -; X64-NOPIC-MCM-NEXT: movl (%rbx), %r12d -; X64-NOPIC-MCM-NEXT: movl $42, %ebp +; X64-NOPIC-MCM-NEXT: movl (%rbx), %ebp ; X64-NOPIC-MCM-NEXT: shlq $47, %rax ; X64-NOPIC-MCM-NEXT: movq %r14, %rdi -; X64-NOPIC-MCM-NEXT: movl %ebp, %esi +; X64-NOPIC-MCM-NEXT: movl $42, %esi ; X64-NOPIC-MCM-NEXT: orq %rax, %rsp -; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr5(%rip), %r13 +; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr5(%rip), %r12 ; X64-NOPIC-MCM-NEXT: callq sigsetjmp@PLT ; X64-NOPIC-MCM-NEXT: .Lslh_ret_addr5: ; X64-NOPIC-MCM-NEXT: movq %rsp, %rax ; X64-NOPIC-MCM-NEXT: sarq $63, %rax ; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr5(%rip), %rcx -; X64-NOPIC-MCM-NEXT: cmpq %rcx, %r13 +; X64-NOPIC-MCM-NEXT: cmpq %rcx, %r12 ; X64-NOPIC-MCM-NEXT: cmovneq %r15, %rax -; X64-NOPIC-MCM-NEXT: addl (%rbx), %r12d +; X64-NOPIC-MCM-NEXT: addl (%rbx), %ebp ; X64-NOPIC-MCM-NEXT: shlq $47, %rax ; X64-NOPIC-MCM-NEXT: movq %r14, %rdi ; X64-NOPIC-MCM-NEXT: movq %r14, %rsi -; X64-NOPIC-MCM-NEXT: movl %ebp, %edx +; X64-NOPIC-MCM-NEXT: movl $42, %edx ; X64-NOPIC-MCM-NEXT: orq %rax, %rsp ; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr6(%rip), %r14 ; X64-NOPIC-MCM-NEXT: callq __sigsetjmp@PLT @@ -397,15 +392,14 @@ define i32 @test_call_setjmp(ptr%ptr) nounwind { ; X64-NOPIC-MCM-NEXT: cmpq %rcx, %r14 ; X64-NOPIC-MCM-NEXT: movq %rax, %rcx ; X64-NOPIC-MCM-NEXT: cmovneq %r15, %rcx -; X64-NOPIC-MCM-NEXT: addl (%rbx), %r12d -; X64-NOPIC-MCM-NEXT: movl %r12d, %eax +; X64-NOPIC-MCM-NEXT: addl (%rbx), %ebp +; X64-NOPIC-MCM-NEXT: movl %ebp, %eax ; X64-NOPIC-MCM-NEXT: orl %ecx, %eax ; X64-NOPIC-MCM-NEXT: shlq $47, %rcx ; X64-NOPIC-MCM-NEXT: orq %rcx, %rsp -; X64-NOPIC-MCM-NEXT: addq $24, %rsp +; X64-NOPIC-MCM-NEXT: addq $16, %rsp ; X64-NOPIC-MCM-NEXT: popq %rbx ; X64-NOPIC-MCM-NEXT: popq %r12 -; X64-NOPIC-MCM-NEXT: popq %r13 ; X64-NOPIC-MCM-NEXT: popq %r14 ; X64-NOPIC-MCM-NEXT: popq %r15 ; X64-NOPIC-MCM-NEXT: popq %rbp @@ -416,15 +410,14 @@ define i32 @test_call_setjmp(ptr%ptr) nounwind { ; X64-PIC-NEXT: pushq %rbp ; X64-PIC-NEXT: pushq %r15 ; X64-PIC-NEXT: pushq %r14 -; X64-PIC-NEXT: pushq %r13 ; X64-PIC-NEXT: pushq %r12 ; X64-PIC-NEXT: pushq %rbx -; X64-PIC-NEXT: subq $24, %rsp +; X64-PIC-NEXT: subq $16, %rsp ; X64-PIC-NEXT: movq %rsp, %rax ; X64-PIC-NEXT: movq %rdi, %rbx ; X64-PIC-NEXT: movq $-1, %r15 ; X64-PIC-NEXT: sarq $63, %rax -; X64-PIC-NEXT: leaq {{[0-9]+}}(%rsp), %r14 +; X64-PIC-NEXT: movq %rsp, %r14 ; X64-PIC-NEXT: shlq $47, %rax ; X64-PIC-NEXT: movq %r14, %rdi ; X64-PIC-NEXT: orq %rax, %rsp @@ -436,25 +429,24 @@ define i32 @test_call_setjmp(ptr%ptr) nounwind { ; X64-PIC-NEXT: leaq .Lslh_ret_addr4(%rip), %rcx ; X64-PIC-NEXT: cmpq %rcx, %r12 ; X64-PIC-NEXT: cmovneq %r15, %rax -; X64-PIC-NEXT: movl (%rbx), %r12d -; X64-PIC-NEXT: movl $42, %ebp +; X64-PIC-NEXT: movl (%rbx), %ebp ; X64-PIC-NEXT: shlq $47, %rax ; X64-PIC-NEXT: movq %r14, %rdi -; X64-PIC-NEXT: movl %ebp, %esi +; X64-PIC-NEXT: movl $42, %esi ; X64-PIC-NEXT: orq %rax, %rsp -; X64-PIC-NEXT: leaq .Lslh_ret_addr5(%rip), %r13 +; X64-PIC-NEXT: leaq .Lslh_ret_addr5(%rip), %r12 ; X64-PIC-NEXT: callq sigsetjmp@PLT ; X64-PIC-NEXT: .Lslh_ret_addr5: ; X64-PIC-NEXT: movq %rsp, %rax ; X64-PIC-NEXT: sarq $63, %rax ; X64-PIC-NEXT: leaq .Lslh_ret_addr5(%rip), %rcx -; X64-PIC-NEXT: cmpq %rcx, %r13 +; X64-PIC-NEXT: cmpq %rcx, %r12 ; X64-PIC-NEXT: cmovneq %r15, %rax -; X64-PIC-NEXT: addl (%rbx), %r12d +; X64-PIC-NEXT: addl (%rbx), %ebp ; X64-PIC-NEXT: shlq $47, %rax ; X64-PIC-NEXT: movq %r14, %rdi ; X64-PIC-NEXT: movq %r14, %rsi -; X64-PIC-NEXT: movl %ebp, %edx +; X64-PIC-NEXT: movl $42, %edx ; X64-PIC-NEXT: orq %rax, %rsp ; X64-PIC-NEXT: leaq .Lslh_ret_addr6(%rip), %r14 ; X64-PIC-NEXT: callq __sigsetjmp@PLT @@ -465,15 +457,14 @@ define i32 @test_call_setjmp(ptr%ptr) nounwind { ; X64-PIC-NEXT: cmpq %rcx, %r14 ; X64-PIC-NEXT: movq %rax, %rcx ; X64-PIC-NEXT: cmovneq %r15, %rcx -; X64-PIC-NEXT: addl (%rbx), %r12d -; X64-PIC-NEXT: movl %r12d, %eax +; X64-PIC-NEXT: addl (%rbx), %ebp +; X64-PIC-NEXT: movl %ebp, %eax ; X64-PIC-NEXT: orl %ecx, %eax ; X64-PIC-NEXT: shlq $47, %rcx ; X64-PIC-NEXT: orq %rcx, %rsp -; X64-PIC-NEXT: addq $24, %rsp +; X64-PIC-NEXT: addq $16, %rsp ; X64-PIC-NEXT: popq %rbx ; X64-PIC-NEXT: popq %r12 -; X64-PIC-NEXT: popq %r13 ; X64-PIC-NEXT: popq %r14 ; X64-PIC-NEXT: popq %r15 ; X64-PIC-NEXT: popq %rbp diff --git a/llvm/test/CodeGen/X86/swifterror.ll b/llvm/test/CodeGen/X86/swifterror.ll index 6d77e04504e2d..5814146a54613 100644 --- a/llvm/test/CodeGen/X86/swifterror.ll +++ b/llvm/test/CodeGen/X86/swifterror.ll @@ -1566,11 +1566,11 @@ define swiftcc { i64, i64, i64, i64} @params_and_return_in_reg(i64, i64, i64, i6 ; CHECK-APPLE-NEXT: .cfi_offset %r14, -32 ; CHECK-APPLE-NEXT: .cfi_offset %r15, -24 ; CHECK-APPLE-NEXT: .cfi_offset %rbp, -16 -; CHECK-APPLE-NEXT: movq %r12, %rbx -; CHECK-APPLE-NEXT: movq %r13, (%rsp) ## 8-byte Spill +; CHECK-APPLE-NEXT: movq %r12, (%rsp) ## 8-byte Spill +; CHECK-APPLE-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-APPLE-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-APPLE-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-APPLE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-APPLE-NEXT: movq %rcx, %rbx ; CHECK-APPLE-NEXT: movq %rdx, %r14 ; CHECK-APPLE-NEXT: movq %rsi, %r15 ; CHECK-APPLE-NEXT: movq %rdi, %rbp @@ -1587,16 +1587,16 @@ define swiftcc { i64, i64, i64, i64} @params_and_return_in_reg(i64, i64, i64, i6 ; CHECK-APPLE-NEXT: movq %rbp, %rdi ; CHECK-APPLE-NEXT: movq %r15, %rsi ; CHECK-APPLE-NEXT: movq %r14, %rdx -; CHECK-APPLE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload +; CHECK-APPLE-NEXT: movq %rbx, %rcx ; CHECK-APPLE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 ## 8-byte Reload ; CHECK-APPLE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 ## 8-byte Reload -; CHECK-APPLE-NEXT: movq (%rsp), %r13 ## 8-byte Reload -; CHECK-APPLE-NEXT: movq %rbx, %r12 +; CHECK-APPLE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 ## 8-byte Reload +; CHECK-APPLE-NEXT: movq (%rsp), %r12 ## 8-byte Reload ; CHECK-APPLE-NEXT: callq _params_and_return_in_reg2 -; CHECK-APPLE-NEXT: movq %rax, %r14 -; CHECK-APPLE-NEXT: movq %rdx, %r15 -; CHECK-APPLE-NEXT: movq %rcx, %rbp -; CHECK-APPLE-NEXT: movq %r8, %rbx +; CHECK-APPLE-NEXT: movq %rax, %rbx +; CHECK-APPLE-NEXT: movq %rdx, %r14 +; CHECK-APPLE-NEXT: movq %rcx, %r15 +; CHECK-APPLE-NEXT: movq %r8, %rbp ; CHECK-APPLE-NEXT: movq %r12, (%rsp) ## 8-byte Spill ; CHECK-APPLE-NEXT: movl $1, %edi ; CHECK-APPLE-NEXT: movl $2, %esi @@ -1607,10 +1607,10 @@ define swiftcc { i64, i64, i64, i64} @params_and_return_in_reg(i64, i64, i64, i6 ; CHECK-APPLE-NEXT: xorl %r13d, %r13d ; CHECK-APPLE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 ## 8-byte Reload ; CHECK-APPLE-NEXT: callq _params_in_reg2 -; CHECK-APPLE-NEXT: movq %r14, %rax -; CHECK-APPLE-NEXT: movq %r15, %rdx -; CHECK-APPLE-NEXT: movq %rbp, %rcx -; CHECK-APPLE-NEXT: movq %rbx, %r8 +; CHECK-APPLE-NEXT: movq %rbx, %rax +; CHECK-APPLE-NEXT: movq %r14, %rdx +; CHECK-APPLE-NEXT: movq %r15, %rcx +; CHECK-APPLE-NEXT: movq %rbp, %r8 ; CHECK-APPLE-NEXT: movq (%rsp), %r12 ## 8-byte Reload ; CHECK-APPLE-NEXT: addq $48, %rsp ; CHECK-APPLE-NEXT: popq %rbx diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll index 8f46209689a1d..23c37af1db2f7 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll @@ -173,13 +173,14 @@ define <8 x i32> @PR46393(<8 x i16> %a0, i8 %a1) { define i64 @PR55050() { ; X86-LABEL: PR55050: ; X86: # %bb.0: # %entry +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: testb %al, %al +; X86-NEXT: testb %dl, %dl ; X86-NEXT: jne .LBB10_2 ; X86-NEXT: # %bb.1: # %if ; X86-NEXT: xorl %eax, %eax +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: .LBB10_2: # %exit -; X86-NEXT: movl %eax, %edx ; X86-NEXT: retl ; ; X64-LABEL: PR55050: From d816c221b45c6e2553e1e9d461e743e46907cd8b Mon Sep 17 00:00:00 2001 From: Aart Bik <39774503+aartbik@users.noreply.github.com> Date: Tue, 17 Oct 2023 09:32:22 -0700 Subject: [PATCH 359/720] [mlir][sparse] complete migration to dim2lvl/lvl2dim in library (#69268) This last revision completed the migration to non-permutation support in the SparseTensor library. All mappings are now controlled by the MapRef (forward and backward). Unused code has been removed, which simplifies subsequent testing of block sparsity. --- .../mlir/ExecutionEngine/SparseTensor/File.h | 7 +- .../ExecutionEngine/SparseTensor/Storage.h | 383 ++++-------------- .../ExecutionEngine/SparseTensorRuntime.cpp | 5 +- 3 files changed, 87 insertions(+), 308 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h index efc3f82d6a307..1b5f0553a3af9 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -201,10 +201,11 @@ class SparseTensorReader final { const uint64_t *lvl2dim) { const uint64_t dimRank = getRank(); MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim); - auto *coo = readCOO(map, lvlSizes); + auto *lvlCOO = readCOO(map, lvlSizes); auto *tensor = SparseTensorStorage::newFromCOO( - dimRank, getDimSizes(), lvlRank, lvlTypes, dim2lvl, lvl2dim, *coo); - delete coo; + dimRank, getDimSizes(), lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim, + *lvlCOO); + delete lvlCOO; return tensor; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index bafc9baa7edde..f1aeb12c662fd 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -10,8 +10,6 @@ // // * `SparseTensorStorageBase` // * `SparseTensorStorage` -// * `SparseTensorEnumeratorBase` -// * `SparseTensorEnumerator` // //===----------------------------------------------------------------------===// @@ -28,26 +26,15 @@ namespace mlir { namespace sparse_tensor { -/// The type of callback functions which receive an element. -template -using ElementConsumer = - const std::function &, V)> &; - -// Forward references. -template -class SparseTensorEnumeratorBase; -template -class SparseTensorEnumerator; - //===----------------------------------------------------------------------===// // -// SparseTensorStorage +// SparseTensorStorage Classes // //===----------------------------------------------------------------------===// /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects -/// of the tensor (e.g., shape, sparsity, mapping). In addition, +/// of the tensor (e.g., sizes, sparsity, mapping). In addition, /// we use function overloading to implement "partial" method /// specialization, which the C-API relies on to catch type errors /// arising from our use of opaque pointers. @@ -55,7 +42,7 @@ class SparseTensorEnumerator; /// Because this class forms a bridge between the denotational semantics /// of "tensors" and the operational semantics of how we store and /// compute with them, it also distinguishes between two different -/// coordinate spaces (and their associated rank, shape, sizes, etc). +/// coordinate spaces (and their associated rank, sizes, etc). /// Denotationally, we have the *dimensions* of the tensor represented /// by this object. Operationally, we have the *levels* of the storage /// representation itself. @@ -139,10 +126,6 @@ class SparseTensorStorageBase { /// Safely checks if the level is unique. bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); } - /// Gets the level-to-dimension mapping. - // TODO: REMOVE THIS - const std::vector &getLvl2Dim() const { return lvl2dimVec; } - /// Gets positions-overhead storage for the given level. #define DECL_GETPOSITIONS(PNAME, P) \ virtual void getPositions(std::vector

**, uint64_t); @@ -154,6 +137,7 @@ class SparseTensorStorageBase { virtual void getCoordinates(std::vector **, uint64_t); MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETCOORDINATES) #undef DECL_GETCOORDINATES + /// Gets the coordinate-value stored at the given level and position. virtual uint64_t getCrd(uint64_t lvl, uint64_t pos) const = 0; @@ -220,8 +204,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const uint64_t *lvl2dim) : SparseTensorStorageBase(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim), - positions(lvlRank), coordinates(lvlRank), lvlCursor(lvlRank), lvlCOO() { - } + positions(lvlRank), coordinates(lvlRank), lvlCursor(lvlRank), coo() {} public: /// Constructs a sparse tensor with the given encoding, and allocates @@ -234,24 +217,16 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, const uint64_t *lvlSizes, const DimLevelType *lvlTypes, const uint64_t *dim2lvl, - const uint64_t *lvl2dim, SparseTensorCOO *coo, + const uint64_t *lvl2dim, SparseTensorCOO *lvlCOO, bool initializeValuesIfAllDense); /// Constructs a sparse tensor with the given encoding, and initializes /// the contents from the COO. This ctor performs the same heuristic /// overhead-storage allocation as the ctor above. SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes, - uint64_t lvlRank, const DimLevelType *lvlTypes, - const uint64_t *dim2lvl, const uint64_t *lvl2dim, - SparseTensorCOO &lvlCOO); - - /// Constructs a sparse tensor with the given encoding, and initializes - /// the contents from the enumerator. This ctor allocates exactly - /// the required amount of overhead storage, not using any heuristics. - SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes, - uint64_t lvlRank, const DimLevelType *lvlTypes, - const uint64_t *dim2lvl, const uint64_t *lvl2dim, - SparseTensorEnumeratorBase &lvlEnumerator); + uint64_t lvlRank, const uint64_t *lvlSizes, + const DimLevelType *lvlTypes, const uint64_t *dim2lvl, + const uint64_t *lvl2dim, SparseTensorCOO &lvlCOO); /// Constructs a sparse tensor with the given encoding, and initializes /// the contents from the level buffers. This ctor allocates exactly @@ -265,39 +240,27 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *lvlTypes, const uint64_t *dim2lvl, const uint64_t *lvl2dim, const intptr_t *lvlBufs); - /// Allocates a new empty sparse tensor. The preconditions/assertions - /// are as per the `SparseTensorStorageBase` ctor; which is to say, - /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", - /// since there's nowhere to reconstruct dynamic sizes from. + /// Allocates a new empty sparse tensor. static SparseTensorStorage * newEmpty(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, const uint64_t *lvlSizes, const DimLevelType *lvlTypes, const uint64_t *dim2lvl, const uint64_t *lvl2dim, bool forwarding); /// Allocates a new sparse tensor and initializes it from the given COO. - /// The preconditions are as per the `SparseTensorStorageBase` ctor - /// (where we define `lvlSizes = lvlCOO.getDimSizes().data()`), but - /// using the following assertions in lieu of the base ctor's assertions: - // - // TODO: The ability to reconstruct dynamic dimensions-sizes does not - // easily generalize to arbitrary `lvl2dim` mappings. When compiling - // MLIR programs to use this library, we should be able to generate - // code for effectively computing the reconstruction, but it's not clear - // that there's a feasible way to do so from within the library itself. - // Therefore, when we functionalize the `lvl2dim` mapping we'll have - // to update the type/preconditions of this factory too. static SparseTensorStorage * - newFromCOO(uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank, - const DimLevelType *lvlTypes, const uint64_t *dim2lvl, - const uint64_t *lvl2dim, SparseTensorCOO &lvlCOO); + newFromCOO(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, + const uint64_t *lvlSizes, const DimLevelType *lvlTypes, + const uint64_t *dim2lvl, const uint64_t *lvl2dim, + SparseTensorCOO &lvlCOO); /// Allocates a new sparse tensor and initialize it with the data stored level /// buffers directly. - static SparseTensorStorage *packFromLvlBuffers( - uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank, - const uint64_t *lvlSizes, const DimLevelType *lvlTypes, - const uint64_t *src2lvl, // FIXME: dim2lvl - const uint64_t *lvl2dim, uint64_t srcRank, const intptr_t *buffers); + static SparseTensorStorage * + packFromLvlBuffers(uint64_t dimRank, const uint64_t *dimSizes, + uint64_t lvlRank, const uint64_t *lvlSizes, + const DimLevelType *lvlTypes, const uint64_t *dim2lvl, + const uint64_t *lvl2dim, uint64_t srcRank, + const intptr_t *buffers); ~SparseTensorStorage() final = default; @@ -326,16 +289,14 @@ class SparseTensorStorage final : public SparseTensorStorageBase { /// Partially specialize forwarding insertions based on template types. void forwardingInsert(const uint64_t *dimCoords, V val) final { - assert(dimCoords && lvlCOO); + assert(dimCoords && coo); map.pushforward(dimCoords, lvlCursor.data()); - lvlCOO->add(lvlCursor, val); + coo->add(lvlCursor, val); } /// Partially specialize lexicographical insertions based on template types. void lexInsert(const uint64_t *lvlCoords, V val) final { assert(lvlCoords); - // TODO: get rid of this! canonicalize all-dense "sparse" array into dense - // tensors. bool allDense = std::all_of(getLvlTypes().begin(), getLvlTypes().end(), [](DimLevelType lt) { return isDenseDLT(lt); }); if (allDense) { @@ -391,16 +352,17 @@ class SparseTensorStorage final : public SparseTensorStorageBase { /// Finalizes forwarding insertions. void endForwardingInsert() final { - // Ensure lvlCOO is sorted. - assert(lvlCOO); - lvlCOO->sort(); + // Ensure COO is sorted. + assert(coo); + coo->sort(); // Now actually insert the `elements`. - const auto &elements = lvlCOO->getElements(); + const auto &elements = coo->getElements(); const uint64_t nse = elements.size(); assert(values.size() == 0); values.reserve(nse); fromCOO(elements, 0, nse, 0); - delete lvlCOO; + delete coo; + coo = nullptr; } /// Finalizes lexicographic insertions. @@ -411,23 +373,12 @@ class SparseTensorStorage final : public SparseTensorStorageBase { endPath(0); } - /// Allocates a new COO object and initializes it with the contents - /// of this tensor under the given mapping from the `getDimSizes()` - /// coordinate-space to the `trgSizes` coordinate-space. Callers must - /// make sure to delete the COO when they're done with it. - SparseTensorCOO *toCOO(uint64_t trgRank, const uint64_t *trgSizes, - uint64_t srcRank, - const uint64_t *src2trg, // FIXME: dim2lvl - const uint64_t *lvl2dim) const { - // TODO: use MapRef here too for the translation - SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, - srcRank, src2trg); - auto *coo = new SparseTensorCOO(trgRank, trgSizes, values.size()); - enumerator.forallElements( - [&coo](const auto &trgCoords, V val) { coo->add(trgCoords, val); }); - // TODO: This assertion assumes there are no stored zeros, - // or if there are then that we don't filter them out. - // + /// Allocates a new COO object and initializes it with the contents. + /// Callers must make sure to delete the COO when they're done with it. + SparseTensorCOO *toCOO() { + std::vector dimCoords(getDimRank()); + coo = new SparseTensorCOO(getDimSizes(), values.size()); + toCOO(0, 0, dimCoords); assert(coo->getElements().size() == values.size()); return coo; } @@ -525,27 +476,11 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } } - /// Writes the given coordinate to `coordinates[lvl][pos]`. This method - /// checks that `crd` is representable in the `C` type; however, it - /// does not check that `crd` is semantically valid (i.e., in bounds - /// for `dimSizes[lvl]` and not elsewhere occurring in the same segment). - void writeCrd(uint64_t lvl, uint64_t pos, uint64_t crd) { - assert(isCompressedDLT(getLvlType(lvl)) || isSingletonDLT(getLvlType(lvl))); - // Subscript assignment to `std::vector` requires that the `pos`-th - // entry has been initialized; thus we must be sure to check `size()` - // here, instead of `capacity()` as would be ideal. - assert(pos < coordinates[lvl].size()); - coordinates[lvl][pos] = detail::checkOverflowCast(crd); - } - /// Computes the assembled-size associated with the `l`-th level, /// given the assembled-size associated with the `(l-1)`-th level. /// "Assembled-sizes" correspond to the (nominal) sizes of overhead /// storage, as opposed to "level-sizes" which are the cardinality /// of possible coordinates for that level. - /// - /// Precondition: the `positions[l]` array must be fully initialized - /// before calling this method. uint64_t assembledSize(uint64_t parentSz, uint64_t l) const { const auto dlt = getLvlType(l); // Avoid redundant bounds checking. if (isCompressedDLT(dlt)) @@ -553,7 +488,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { if (isSingletonDLT(dlt)) return parentSz; // New size is same as the parent. if (isDenseDLT(dlt)) - return parentSz * getLvlSizes()[l]; + return parentSz * getLvlSize(l); MLIR_SPARSETENSOR_FATAL("unsupported level type: %d\n", static_cast(dlt)); } @@ -561,11 +496,6 @@ class SparseTensorStorage final : public SparseTensorStorageBase { /// Initializes sparse tensor storage scheme from a memory-resident sparse /// tensor in coordinate scheme. This method prepares the positions and /// coordinates arrays under the given per-level dense/sparse annotations. - /// - /// Preconditions: - /// * the `lvlElements` must be lexicographically sorted. - /// * the coordinates of every element are valid for `getLvlSizes()` - /// (i.e., equal rank and pointwise less-than). void fromCOO(const std::vector> &lvlElements, uint64_t lo, uint64_t hi, uint64_t l) { const uint64_t lvlRank = getLvlRank(); @@ -669,184 +599,48 @@ class SparseTensorStorage final : public SparseTensorStorageBase { return -1u; } - // Allow `SparseTensorEnumerator` to access the data-members (to avoid - // the cost of virtual-function dispatch in inner loops), without - // making them public to other client code. - friend class SparseTensorEnumerator; - - std::vector> positions; - std::vector> coordinates; - std::vector values; - std::vector lvlCursor; // cursor for lexicographic insertion. - SparseTensorCOO *lvlCOO; // COO used during forwarding -}; - -//===----------------------------------------------------------------------===// -// -// SparseTensorEnumerator -// -//===----------------------------------------------------------------------===// - -/// A (higher-order) function object for enumerating the elements of some -/// `SparseTensorStorage` under a permutation. That is, the `forallElements` -/// method encapsulates the loop-nest for enumerating the elements of -/// the source tensor (in whatever order is best for the source tensor), -/// and applies a permutation to the coordinates before handing -/// each element to the callback. A single enumerator object can be -/// freely reused for several calls to `forallElements`, just so long -/// as each call is sequential with respect to one another. -/// -/// N.B., this class stores a reference to the `SparseTensorStorageBase` -/// passed to the constructor; thus, objects of this class must not -/// outlive the sparse tensor they depend on. -/// -/// Design Note: The reason we define this class instead of simply using -/// `SparseTensorEnumerator` is because we need to hide/generalize -/// the `` template parameters from MLIR client code (to simplify the -/// type parameters used for direct sparse-to-sparse conversion). And the -/// reason we define the `SparseTensorEnumerator` subclasses rather -/// than simply using this class, is to avoid the cost of virtual-method -/// dispatch within the loop-nest. -template -class SparseTensorEnumeratorBase { -public: - /// Constructs an enumerator which automatically applies the given - /// mapping from the source tensor's dimensions to the desired - /// target tensor dimensions. - /// - /// Preconditions: - /// * the `src` must have the same `V` value type. - /// * `trgSizes` must be valid for `trgRank`. - /// * `src2trg` must be valid for `srcRank`, and must map coordinates - /// valid for `src.getDimSizes()` to coordinates valid for `trgSizes`. - /// - /// Asserts: - /// * `trgSizes` must be nonnull and must contain only nonzero sizes. - /// * `srcRank == src.getDimRank()`. - /// * `src2trg` must be nonnull. - SparseTensorEnumeratorBase(const SparseTensorStorageBase &src, - uint64_t trgRank, const uint64_t *trgSizes, - uint64_t srcRank, const uint64_t *src2trg) - : src(src), trgSizes(trgSizes, trgSizes + trgRank), - lvl2trg(src.getLvlRank()), trgCursor(trgRank) { - assert(trgSizes && "Received nullptr for target-sizes"); - assert(src2trg && "Received nullptr for source-to-target mapping"); - assert(srcRank == src.getDimRank() && "Source-rank mismatch"); - for (uint64_t t = 0; t < trgRank; ++t) - assert(trgSizes[t] > 0 && "Target-size zero has trivial storage"); - const auto &lvl2src = src.getLvl2Dim(); - for (uint64_t lvlRank = src.getLvlRank(), l = 0; l < lvlRank; ++l) - lvl2trg[l] = src2trg[lvl2src[l]]; - } - - virtual ~SparseTensorEnumeratorBase() = default; - - // We disallow copying to help avoid leaking the `src` reference. - // (In addition to avoiding the problem of slicing.) - SparseTensorEnumeratorBase(const SparseTensorEnumeratorBase &) = delete; - SparseTensorEnumeratorBase & - operator=(const SparseTensorEnumeratorBase &) = delete; - - /// Gets the source's dimension-rank. - uint64_t getSrcDimRank() const { return src.getDimRank(); } - - /// Gets the target's dimension-/level-rank. (This is usually - /// "dimension-rank", though that may coincide with "level-rank" - /// depending on usage.) - uint64_t getTrgRank() const { return trgSizes.size(); } - - /// Gets the target's dimension-/level-sizes. (These are usually - /// "dimensions", though that may coincide with "level-rank" depending - /// on usage.) - const std::vector &getTrgSizes() const { return trgSizes; } - - /// Enumerates all elements of the source tensor, permutes their - /// coordinates, and passes the permuted element to the callback. - /// The callback must not store the cursor reference directly, - /// since this function reuses the storage. Instead, the callback - /// must copy it if they want to keep it. - virtual void forallElements(ElementConsumer yield) = 0; - -protected: - const SparseTensorStorageBase &src; - std::vector trgSizes; // in target order. - std::vector lvl2trg; // source-levels -> target-dims/lvls. - std::vector trgCursor; // in target order. -}; - -template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { - using Base = SparseTensorEnumeratorBase; - using StorageImpl = SparseTensorStorage; - -public: - /// Constructs an enumerator which automatically applies the given - /// mapping from the source tensor's dimensions to the desired - /// target tensor dimensions. - /// - /// Preconditions/assertions are as per the `SparseTensorEnumeratorBase` ctor. - SparseTensorEnumerator(const StorageImpl &src, uint64_t trgRank, - const uint64_t *trgSizes, uint64_t srcRank, - const uint64_t *src2trg) - : Base(src, trgRank, trgSizes, srcRank, src2trg) {} - - ~SparseTensorEnumerator() final = default; - - void forallElements(ElementConsumer yield) final { - forallElements(yield, 0, 0); - } - -private: - // TODO: Once we functionalize the mappings, then we'll no longer - // be able to use the current approach of constructing `lvl2trg` in the - // ctor and using it to incrementally fill the `trgCursor` cursor as we - // recurse through `forallElements`. Instead we'll want to incrementally - // fill a `lvlCursor` as we recurse, and then use `src.getLvl2Dim()` - // and `src2trg` to convert it just before yielding to the callback. - // It's probably most efficient to just store the `srcCursor` and - // `trgCursor` buffers in this object, but we may want to benchmark - // that against using `std::calloc` to stack-allocate them instead. - // - /// The recursive component of the public `forallElements`. - void forallElements(ElementConsumer yield, uint64_t parentPos, - uint64_t l) { - // Recover the `` type parameters of `src`. - const auto &src = static_cast(this->src); - if (l == src.getLvlRank()) { - assert(parentPos < src.values.size()); - // TODO: - yield(this->trgCursor, src.values[parentPos]); + // Performs forall on level entries and inserts into dim COO. + void toCOO(uint64_t parentPos, uint64_t l, std::vector &dimCoords) { + if (l == getLvlRank()) { + map.pushbackward(lvlCursor.data(), dimCoords.data()); + assert(coo); + assert(parentPos < values.size()); + coo->add(dimCoords, values[parentPos]); return; } - uint64_t &cursorL = this->trgCursor[this->lvl2trg[l]]; - const auto dlt = src.getLvlType(l); // Avoid redundant bounds checking. - if (isCompressedDLT(dlt)) { + if (isCompressedLvl(l)) { // Look up the bounds of the `l`-level segment determined by the // `(l - 1)`-level position `parentPos`. - const std::vector

&positionsL = src.positions[l]; + const std::vector

&positionsL = positions[l]; assert(parentPos + 1 < positionsL.size()); const uint64_t pstart = static_cast(positionsL[parentPos]); const uint64_t pstop = static_cast(positionsL[parentPos + 1]); // Loop-invariant code for looking up the `l`-level coordinates. - const std::vector &coordinatesL = src.coordinates[l]; + const std::vector &coordinatesL = coordinates[l]; assert(pstop <= coordinatesL.size()); for (uint64_t pos = pstart; pos < pstop; ++pos) { - cursorL = static_cast(coordinatesL[pos]); - forallElements(yield, pos, l + 1); + lvlCursor[l] = static_cast(coordinatesL[pos]); + toCOO(pos, l + 1, dimCoords); } - } else if (isSingletonDLT(dlt)) { - cursorL = src.getCrd(l, parentPos); - forallElements(yield, parentPos, l + 1); + } else if (isSingletonLvl(l)) { + lvlCursor[l] = getCrd(l, parentPos); + toCOO(parentPos, l + 1, dimCoords); } else { // Dense level. - assert(isDenseDLT(dlt)); - const uint64_t sz = src.getLvlSizes()[l]; + assert(isDenseLvl(l)); + const uint64_t sz = getLvlSizes()[l]; const uint64_t pstart = parentPos * sz; for (uint64_t c = 0; c < sz; ++c) { - cursorL = c; - forallElements(yield, pstart + c, l + 1); + lvlCursor[l] = c; + toCOO(pstart + c, l + 1, dimCoords); } } } + + std::vector> positions; + std::vector> coordinates; + std::vector values; + std::vector lvlCursor; + SparseTensorCOO *coo; }; //===----------------------------------------------------------------------===// @@ -868,41 +662,24 @@ SparseTensorStorage *SparseTensorStorage::newEmpty( !forwarding); } -// TODO: MapRef template SparseTensorStorage *SparseTensorStorage::newFromCOO( - uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank, - const DimLevelType *lvlTypes, const uint64_t *dim2lvl, - const uint64_t *lvl2dim, SparseTensorCOO &lvlCOO) { - assert(dimShape && dim2lvl && lvl2dim); - const auto &lvlSizes = lvlCOO.getDimSizes(); - assert(lvlRank == lvlSizes.size() && "Level-rank mismatch"); - // Must reconstruct `dimSizes` from `lvlSizes`. While this is easy - // enough to do when `lvl2dim` is a permutation, this approach will - // not work for more general mappings; so we will need to move this - // computation off to codegen. - std::vector dimSizes(dimRank); - for (uint64_t l = 0; l < lvlRank; ++l) { - const uint64_t d = lvl2dim[l]; - assert((dimShape[d] == 0 || dimShape[d] == lvlSizes[l]) && - "Dimension sizes do not match expected shape"); - dimSizes[d] = lvlSizes[l]; - } - return new SparseTensorStorage(dimRank, dimSizes.data(), lvlRank, + uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, + const uint64_t *lvlSizes, const DimLevelType *lvlTypes, + const uint64_t *dim2lvl, const uint64_t *lvl2dim, + SparseTensorCOO &lvlCOO) { + return new SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim, lvlCOO); } template SparseTensorStorage *SparseTensorStorage::packFromLvlBuffers( - uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank, + uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, const uint64_t *lvlSizes, const DimLevelType *lvlTypes, - const uint64_t *src2lvl, // FIXME: dim2lvl - const uint64_t *lvl2dim, uint64_t srcRank, const intptr_t *buffers) { - assert(dimShape && "Got nullptr for dimension shape"); - auto *tensor = - new SparseTensorStorage(dimRank, dimShape, lvlRank, lvlSizes, - lvlTypes, src2lvl, lvl2dim, buffers); - return tensor; + const uint64_t *dim2lvl, const uint64_t *lvl2dim, uint64_t srcRank, + const intptr_t *buffers) { + return new SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, + lvlTypes, dim2lvl, lvl2dim, buffers); } //===----------------------------------------------------------------------===// @@ -915,11 +692,12 @@ template SparseTensorStorage::SparseTensorStorage( uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, const uint64_t *lvlSizes, const DimLevelType *lvlTypes, - const uint64_t *dim2lvl, const uint64_t *lvl2dim, SparseTensorCOO *coo, - bool initializeValuesIfAllDense) + const uint64_t *dim2lvl, const uint64_t *lvl2dim, + SparseTensorCOO *lvlCOO, bool initializeValuesIfAllDense) : SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim) { - lvlCOO = coo; + assert(!lvlCOO || lvlRank == lvlCOO->getRank()); + coo = lvlCOO; // Provide hints on capacity of positions and coordinates. // TODO: needs much fine-tuning based on actual sparsity; currently // we reserve position/coordinate space based on all previous dense @@ -948,17 +726,16 @@ SparseTensorStorage::SparseTensorStorage( values.resize(sz, 0); } -// TODO: share more code with forwarding methods? template SparseTensorStorage::SparseTensorStorage( // NOLINT uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, - const DimLevelType *lvlTypes, const uint64_t *dim2lvl, - const uint64_t *lvl2dim, SparseTensorCOO &lvlCOO) - : SparseTensorStorage(dimRank, dimSizes, lvlRank, - lvlCOO.getDimSizes().data(), lvlTypes, dim2lvl, - lvl2dim, nullptr, false) { + const uint64_t *lvlSizes, const DimLevelType *lvlTypes, + const uint64_t *dim2lvl, const uint64_t *lvl2dim, + SparseTensorCOO &lvlCOO) + : SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, + dim2lvl, lvl2dim, nullptr, false) { // Ensure lvlCOO is sorted. - assert(lvlRank == lvlCOO.getDimSizes().size() && "Level-rank mismatch"); + assert(lvlRank == lvlCOO.getRank()); lvlCOO.sort(); // Now actually insert the `elements`. const auto &elements = lvlCOO.getElements(); diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 6a4c0f292c5f8..36d888a08de6d 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -129,7 +129,8 @@ extern "C" { assert(ptr && "Received nullptr for SparseTensorCOO object"); \ auto &coo = *static_cast *>(ptr); \ return SparseTensorStorage::newFromCOO( \ - dimRank, dimSizes, lvlRank, lvlTypes, dim2lvl, lvl2dim, coo); \ + dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim, \ + coo); \ } \ case Action::kFromReader: { \ assert(ptr && "Received nullptr for SparseTensorReader object"); \ @@ -140,7 +141,7 @@ extern "C" { case Action::kToCOO: { \ assert(ptr && "Received nullptr for SparseTensorStorage object"); \ auto &tensor = *static_cast *>(ptr); \ - return tensor.toCOO(lvlRank, lvlSizes, dimRank, dim2lvl, lvl2dim); \ + return tensor.toCOO(); \ } \ case Action::kPack: { \ assert(ptr && "Received nullptr for SparseTensorStorage object"); \ From 2a40ec2d3e4d2af0222156022256cdee1ae6bb56 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 17 Oct 2023 17:49:59 +0100 Subject: [PATCH 360/720] [DAG] SimplifyDemandedBits - fix isOperationLegal typo in D146121 We need to check that the simplified ISD::SRL node is legal, not the old one Noticed while trying to isolate the regressions in D155472 --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index c0e88051dc427..8b4f315949912 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1897,7 +1897,7 @@ bool TargetLowering::SimplifyDemandedBits( if (isNarrowingProfitable(VT, HalfVT) && isTypeDesirableForOp(ISD::SRL, HalfVT) && isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) && - (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) && + (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) && ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) || TLO.DAG.MaskedValueIsZero(Op0, HiBits))) { SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0); From 4480e650b3cf7cc63cfd3767cd6b120f8bfad2ac Mon Sep 17 00:00:00 2001 From: akirchhoff-modular <29713761+akirchhoff-modular@users.noreply.github.com> Date: Tue, 17 Oct 2023 10:28:14 -0700 Subject: [PATCH 361/720] [YAMLParser] Improve plain scalar spec compliance (#68946) The `YAMLParser.h` header file claims support for YAML 1.2 with a few deviations, but our plain scalar parsing failed to parse some valid YAML according to the spec. This change puts us more in compliance with the YAML spec, now letting us parse plain scalars containing additional special characters in cases where they are not ambiguous. --- llvm/lib/Support/YAMLParser.cpp | 71 ++++++++++++------- .../Generic/first-character-parse-error.mir | 4 +- llvm/test/YAMLParser/plain-characters.test | 30 ++++++++ llvm/unittests/Support/YAMLIOTest.cpp | 2 +- llvm/unittests/Support/YAMLParserTest.cpp | 61 +++++++++++++++- 5 files changed, 137 insertions(+), 31 deletions(-) create mode 100644 llvm/test/YAMLParser/plain-characters.test diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp index 6ac2c6aeeb46a..1422e40f91944 100644 --- a/llvm/lib/Support/YAMLParser.cpp +++ b/llvm/lib/Support/YAMLParser.cpp @@ -392,6 +392,10 @@ class Scanner { /// Pos is whitespace or a new line bool isBlankOrBreak(StringRef::iterator Position); + /// Return true if the minimal well-formed code unit subsequence at + /// Pos is considered a "safe" character for plain scalars. + bool isPlainSafeNonBlank(StringRef::iterator Position); + /// Return true if the line is a line break, false otherwise. bool isLineEmpty(StringRef Line); @@ -545,6 +549,10 @@ class Scanner { /// Can the next token be the start of a simple key? bool IsSimpleKeyAllowed; + /// Can the next token be a value indicator even if it does not have a + /// trailing space? + bool IsAdjacentValueAllowedInFlow; + /// True if an error has occurred. bool Failed; @@ -868,6 +876,7 @@ void Scanner::init(MemoryBufferRef Buffer) { FlowLevel = 0; IsStartOfStream = true; IsSimpleKeyAllowed = true; + IsAdjacentValueAllowedInFlow = false; Failed = false; std::unique_ptr InputBufferOwner = MemoryBuffer::getMemBuffer(Buffer, /*RequiresNullTerminator=*/false); @@ -1049,6 +1058,15 @@ bool Scanner::isBlankOrBreak(StringRef::iterator Position) { *Position == '\n'; } +bool Scanner::isPlainSafeNonBlank(StringRef::iterator Position) { + if (Position == End || isBlankOrBreak(Position)) + return false; + if (FlowLevel && + StringRef(Position, 1).find_first_of(",[]{}") != StringRef::npos) + return false; + return true; +} + bool Scanner::isLineEmpty(StringRef Line) { for (const auto *Position = Line.begin(); Position != Line.end(); ++Position) if (!isBlankOrBreak(Position)) @@ -1189,6 +1207,7 @@ bool Scanner::scanStreamEnd() { unrollIndent(-1); SimpleKeys.clear(); IsSimpleKeyAllowed = false; + IsAdjacentValueAllowedInFlow = false; Token T; T.Kind = Token::TK_StreamEnd; @@ -1202,6 +1221,7 @@ bool Scanner::scanDirective() { unrollIndent(-1); SimpleKeys.clear(); IsSimpleKeyAllowed = false; + IsAdjacentValueAllowedInFlow = false; StringRef::iterator Start = Current; consume('%'); @@ -1233,6 +1253,7 @@ bool Scanner::scanDocumentIndicator(bool IsStart) { unrollIndent(-1); SimpleKeys.clear(); IsSimpleKeyAllowed = false; + IsAdjacentValueAllowedInFlow = false; Token T; T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd; @@ -1255,6 +1276,8 @@ bool Scanner::scanFlowCollectionStart(bool IsSequence) { // And may also be followed by a simple key. IsSimpleKeyAllowed = true; + // Adjacent values are allowed in flows only after JSON-style keys. + IsAdjacentValueAllowedInFlow = false; ++FlowLevel; return true; } @@ -1262,6 +1285,7 @@ bool Scanner::scanFlowCollectionStart(bool IsSequence) { bool Scanner::scanFlowCollectionEnd(bool IsSequence) { removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); IsSimpleKeyAllowed = false; + IsAdjacentValueAllowedInFlow = true; Token T; T.Kind = IsSequence ? Token::TK_FlowSequenceEnd : Token::TK_FlowMappingEnd; @@ -1276,6 +1300,7 @@ bool Scanner::scanFlowCollectionEnd(bool IsSequence) { bool Scanner::scanFlowEntry() { removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); IsSimpleKeyAllowed = true; + IsAdjacentValueAllowedInFlow = false; Token T; T.Kind = Token::TK_FlowEntry; T.Range = StringRef(Current, 1); @@ -1288,6 +1313,7 @@ bool Scanner::scanBlockEntry() { rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end()); removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); IsSimpleKeyAllowed = true; + IsAdjacentValueAllowedInFlow = false; Token T; T.Kind = Token::TK_BlockEntry; T.Range = StringRef(Current, 1); @@ -1302,6 +1328,7 @@ bool Scanner::scanKey() { removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); IsSimpleKeyAllowed = !FlowLevel; + IsAdjacentValueAllowedInFlow = false; Token T; T.Kind = Token::TK_Key; @@ -1339,6 +1366,7 @@ bool Scanner::scanValue() { rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); IsSimpleKeyAllowed = !FlowLevel; } + IsAdjacentValueAllowedInFlow = false; Token T; T.Kind = Token::TK_Value; @@ -1420,6 +1448,7 @@ bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); IsSimpleKeyAllowed = false; + IsAdjacentValueAllowedInFlow = true; return true; } @@ -1434,21 +1463,9 @@ bool Scanner::scanPlainScalar() { if (*Current == '#') break; - while (Current != End && !isBlankOrBreak(Current)) { - if (FlowLevel && *Current == ':' && - (Current + 1 == End || - !(isBlankOrBreak(Current + 1) || *(Current + 1) == ','))) { - setError("Found unexpected ':' while scanning a plain scalar", Current); - return false; - } - - // Check for the end of the plain scalar. - if ( (*Current == ':' && isBlankOrBreak(Current + 1)) - || ( FlowLevel - && (StringRef(Current, 1).find_first_of(",:?[]{}") - != StringRef::npos))) - break; - + while (Current != End && + ((*Current != ':' && isPlainSafeNonBlank(Current)) || + (*Current == ':' && isPlainSafeNonBlank(Current + 1)))) { StringRef::iterator i = skip_nb_char(Current); if (i == Current) break; @@ -1499,6 +1516,7 @@ bool Scanner::scanPlainScalar() { saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); IsSimpleKeyAllowed = false; + IsAdjacentValueAllowedInFlow = false; return true; } @@ -1534,6 +1552,7 @@ bool Scanner::scanAliasOrAnchor(bool IsAlias) { saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); IsSimpleKeyAllowed = false; + IsAdjacentValueAllowedInFlow = false; return true; } @@ -1766,6 +1785,7 @@ bool Scanner::scanBlockScalar(bool IsLiteral) { // New lines may start a simple key. if (!FlowLevel) IsSimpleKeyAllowed = true; + IsAdjacentValueAllowedInFlow = false; Token T; T.Kind = Token::TK_BlockScalar; @@ -1799,6 +1819,7 @@ bool Scanner::scanTag() { saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); IsSimpleKeyAllowed = false; + IsAdjacentValueAllowedInFlow = false; return true; } @@ -1848,13 +1869,14 @@ bool Scanner::fetchMoreTokens() { if (*Current == ',') return scanFlowEntry(); - if (*Current == '-' && isBlankOrBreak(Current + 1)) + if (*Current == '-' && (isBlankOrBreak(Current + 1) || Current + 1 == End)) return scanBlockEntry(); - if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1))) + if (*Current == '?' && (Current + 1 == End || isBlankOrBreak(Current + 1))) return scanKey(); - if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1))) + if (*Current == ':' && + (!isPlainSafeNonBlank(Current + 1) || IsAdjacentValueAllowedInFlow)) return scanValue(); if (*Current == '*') @@ -1880,15 +1902,10 @@ bool Scanner::fetchMoreTokens() { // Get a plain scalar. StringRef FirstChar(Current, 1); - if (!(isBlankOrBreak(Current) - || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos) - || (*Current == '-' && !isBlankOrBreak(Current + 1)) - || (!FlowLevel && (*Current == '?' || *Current == ':') - && isBlankOrBreak(Current + 1)) - || (!FlowLevel && *Current == ':' - && Current + 2 < End - && *(Current + 1) == ':' - && !isBlankOrBreak(Current + 2))) + if ((!isBlankOrBreak(Current) && + FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") == StringRef::npos) || + (FirstChar.find_first_of("?:-") != StringRef::npos && + isPlainSafeNonBlank(Current + 1))) return scanPlainScalar(); setError("Unrecognized character while tokenizing.", Current); diff --git a/llvm/test/CodeGen/MIR/Generic/first-character-parse-error.mir b/llvm/test/CodeGen/MIR/Generic/first-character-parse-error.mir index 00a01058dc8cb..869392f3e4bb6 100644 --- a/llvm/test/CodeGen/MIR/Generic/first-character-parse-error.mir +++ b/llvm/test/CodeGen/MIR/Generic/first-character-parse-error.mir @@ -1,6 +1,6 @@ -:# RUN: not llc -run-pass=none %s -o - 2>&1 | FileCheck %s +@# RUN: not llc -run-pass=none %s -o - 2>&1 | FileCheck %s -# The : before the run comment is syntactically invalid. This used to +# The @ before the run comment is syntactically invalid. This used to # crash in the SourceMgr diagnostic printer because it was called # before the LLVMContext was initialized. diff --git a/llvm/test/YAMLParser/plain-characters.test b/llvm/test/YAMLParser/plain-characters.test new file mode 100644 index 0000000000000..f22016bcb9bca --- /dev/null +++ b/llvm/test/YAMLParser/plain-characters.test @@ -0,0 +1,30 @@ +# RUN: yaml-bench -canonical %s | FileCheck %s +# Example from https://yaml.org/spec/1.2.2/#example-plain-characters + +# Outside flow collection: +- ::vector +- ": - ()" +- Up, up, and away! +- -123 +- https://example.com/foo#bar +# Inside flow collection: +- [ ::vector, + ": - ()", + "Up, up and away!", + -123, + https://example.com/foo#bar ] + +# CHECK: !!seq [ +# CHECK-NEXT: !!str "::vector", +# CHECK-NEXT: !!str ": - ()", +# CHECK-NEXT: !!str "Up, up, and away!", +# CHECK-NEXT: !!str "-123", +# CHECK-NEXT: !!str "https://example.com/foo#bar", +# CHECK-NEXT: !!seq [ +# CHECK-NEXT: !!str "::vector", +# CHECK-NEXT: !!str ": - ()", +# CHECK-NEXT: !!str "Up, up and away!", +# CHECK-NEXT: !!str "-123", +# CHECK-NEXT: !!str "https://example.com/foo#bar", +# CHECK-NEXT: ], +# CHECK-NEXT: ] diff --git a/llvm/unittests/Support/YAMLIOTest.cpp b/llvm/unittests/Support/YAMLIOTest.cpp index 745d743b2b244..488746764ae65 100644 --- a/llvm/unittests/Support/YAMLIOTest.cpp +++ b/llvm/unittests/Support/YAMLIOTest.cpp @@ -3156,7 +3156,7 @@ TEST(YAMLIO, TestFlowSequenceTokenErrors) { TEST(YAMLIO, TestDirectiveMappingNoValue) { Input yin("%YAML\n{5:"); - EXPECT_FALSE(yin.setCurrentDocument()); + yin.setCurrentDocument(); EXPECT_TRUE(yin.error()); Input yin2("%TAG\n'\x98!< :\n"); diff --git a/llvm/unittests/Support/YAMLParserTest.cpp b/llvm/unittests/Support/YAMLParserTest.cpp index b52a3850c02b7..247e70756861d 100644 --- a/llvm/unittests/Support/YAMLParserTest.cpp +++ b/llvm/unittests/Support/YAMLParserTest.cpp @@ -47,6 +47,10 @@ TEST(YAMLParser, ParsesEmptyArray) { ExpectParseSuccess("Empty array", "[]"); } +TEST(YAMLParser, ParsesComplexMap) { + ExpectParseSuccess("Complex block map", "? a\n: b"); +} + TEST(YAMLParser, FailsIfNotClosingArray) { ExpectParseError("Not closing array", "["); ExpectParseError("Not closing array", " [ "); @@ -82,7 +86,10 @@ TEST(YAMLParser, FailsIfMissingColon) { } TEST(YAMLParser, FailsOnMissingQuote) { - ExpectParseError("Missing open quote", "[{a\":\"b\"}]"); + // Missing open quote counts as a plain scalar per YAML spec + // (Following is equivalent to JSON [{"a\":\"b\"": null}]) + ExpectParseSuccess("Missing open quote", "[{a\":\"b\"}]"); + // Closing quote is more strict -- plain scalars cannot start with a quote ExpectParseError("Missing closing quote", "[{\"a\":\"b}]"); } @@ -128,6 +135,48 @@ TEST(YAMLParser, ParsesArrayOfArrays) { ExpectParseSuccess("Array of arrays", "[[]]"); } +TEST(YAMLParser, ParsesPlainScalars) { + ExpectParseSuccess("Plain scalar", "hello"); + ExpectParseSuccess("Plain scalar beginning with a question mark", "?hello"); + ExpectParseSuccess("Plain scalar beginning with a colon", ":hello"); + ExpectParseSuccess("Plain scalar beginning with two colons", "::hello"); + ExpectParseSuccess("Plain scalar beginning with a hyphen", "-hello"); + ExpectParseSuccess("Multi-line plain scalar", "Hello\nworld"); + ExpectParseSuccess("Plain scalar with indicator characters", + "He-!l*lo, []world{}"); + ExpectParseSuccess("Plain scalar with indicator characters used as block key", + "He-!l*lo, []world{}: value"); + ExpectParseSuccess("Plain scalar in flow sequence", "hello"); + ExpectParseSuccess( + "Plain scalar beginning with a question mark in flow sequence", + "[ ?hello ]"); + ExpectParseSuccess("Plain scalar beginning with a colon in flow sequence", + "[ :hello ]"); + ExpectParseSuccess("Plain scalar beginning with two colons in flow sequence", + "[ ::hello ]"); + ExpectParseSuccess("Plain scalar beginning with a hyphen in flow sequence", + "[ -hello ]"); + ExpectParseSuccess("Multi-line plain scalar in flow sequence", + "[ Hello\nworld ]"); + ExpectParseSuccess( + "Plain scalar with non-flow indicator characters in flow sequence", + "[ He-!l*lo, world ]"); + ExpectParseSuccess( + "Plain scalar with non-flow indicator characters used as flow key", + "{ He-!l*lo, world: value } "); + ExpectParseError( + "Plain scalar with flow indicator characters inside flow sequence", + "[ Hello[world ]"); + ExpectParseError( + "Plain scalar with flow indicator characters inside flow key", + "{ Hello[world: value }"); + // Multi-line plain scalar in keys is strictly invalid per the spec, but many + // implementations accept it in flow keys nonetheless. Block keys are not + // accepted by any other implementation I can find. + ExpectParseSuccess("Multi-line plain scalar in block key", "a\nb: c"); + ExpectParseSuccess("Multi-line plain scalar in flow key", "{\na\nb: c\n}"); +} + TEST(YAMLParser, ParsesBlockLiteralScalars) { ExpectParseSuccess("Block literal scalar", "test: |\n Hello\n World\n"); ExpectParseSuccess("Block literal scalar EOF", "test: |\n Hello\n World"); @@ -176,6 +225,10 @@ TEST(YAMLParser, HandlesEndOfFileGracefully) { ExpectParseError("In array hitting EOF", "[[] "); ExpectParseError("In array hitting EOF", "[[]"); ExpectParseError("In object hitting EOF", "{\"\""); + // This one is valid, equivalent to the JSON {"": null} + ExpectParseSuccess("In complex block map hitting EOF", "?"); + // Equivalent to JSON [null] + ExpectParseSuccess("In block sequence hitting EOF", "-"); } TEST(YAMLParser, HandlesNullValuesInKeyValueNodesGracefully) { @@ -183,6 +236,12 @@ TEST(YAMLParser, HandlesNullValuesInKeyValueNodesGracefully) { ExpectParseError("KeyValueNode with null value", "test: '"); } +TEST(YAMLParser, BlockSequenceEOF) { + SourceMgr SM; + yaml::Stream Stream("-", SM); + EXPECT_TRUE(isa_and_present(Stream.begin()->getRoot())); +} + // Checks that the given string can be parsed into an identical string inside // of an array. static void ExpectCanParseString(StringRef String) { From 658ed58de6f897a83270431bd645d1fa04395e04 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 17 Oct 2023 18:39:37 +0100 Subject: [PATCH 362/720] [AArch64] Add additional tests for fptosi/fptoui. NFC --- llvm/test/CodeGen/AArch64/fptoi.ll | 5518 ++++++++++++++++++++++++++++ 1 file changed, 5518 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/fptoi.ll diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll new file mode 100644 index 0000000000000..8fbb074136a90 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fptoi.ll @@ -0,0 +1,5518 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16 +; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16 +; RUN: llc -mtriple=aarch64-none-eabi -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 +; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 + +; CHECK-GI: warning: Instruction selection used fallback path for fptos_v3f64_v3i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f64_v3i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v4f64_v4i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v4f64_v4i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v8f64_v8i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v8f64_v8i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v16f64_v16i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v16f64_v16i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v32f64_v32i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v32f64_v32i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f64_v3i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f64_v3i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v4f64_v4i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v4f64_v4i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v8f64_v8i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v8f64_v8i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v16f64_v16i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v16f64_v16i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v32f64_v32i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v32f64_v32i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f64_v2i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f64_v2i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f64_v3i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f64_v3i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v4f64_v4i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v4f64_v4i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v8f64_v8i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v8f64_v8i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v16f64_v16i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v16f64_v16i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v32f64_v32i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v32f64_v32i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f64_v2i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f64_v2i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f64_v3i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f64_v3i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v4f64_v4i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v4f64_v4i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v8f64_v8i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v8f64_v8i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v16f64_v16i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v16f64_v16i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v32f64_v32i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v32f64_v32i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f32_v3i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f32_v3i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v4f32_v4i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v4f32_v4i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v8f32_v8i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v8f32_v8i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v16f32_v16i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v16f32_v16i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v32f32_v32i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v32f32_v32i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f32_v3i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f32_v3i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v8f32_v8i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v8f32_v8i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v16f32_v16i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v16f32_v16i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v32f32_v32i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v32f32_v32i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f32_v2i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f32_v2i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f32_v3i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f32_v3i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v4f32_v4i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v4f32_v4i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v8f32_v8i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v8f32_v8i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v16f32_v16i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v16f32_v16i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v32f32_v32i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v32f32_v32i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f32_v2i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f32_v2i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f32_v3i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f32_v3i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v4f32_v4i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v4f32_v4i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v8f32_v8i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v8f32_v8i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v16f32_v16i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v16f32_v16i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v32f32_v32i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v32f32_v32i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f16_v2i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f16_v2i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f16_v3i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f16_v3i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v4f16_v4i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v4f16_v4i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v8f16_v8i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v8f16_v8i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v16f16_v16i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v16f16_v16i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v32f16_v32i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v32f16_v32i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f16_v2i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f16_v2i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f16_v3i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f16_v3i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v4f16_v4i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v4f16_v4i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v8f16_v8i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v8f16_v8i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v16f16_v16i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v16f16_v16i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v32f16_v32i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v32f16_v32i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f16_v2i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f16_v2i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f16_v3i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f16_v3i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v4f16_v4i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v4f16_v4i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v8f16_v8i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v8f16_v8i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v16f16_v16i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v16f16_v16i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v32f16_v32i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v32f16_v32i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f16_v2i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f16_v2i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f16_v3i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f16_v3i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v4f16_v4i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v4f16_v4i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v8f16_v8i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v8f16_v8i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v16f16_v16i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v16f16_v16i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v32f16_v32i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v32f16_v32i8 + +define i64 @fptos_f64_i64(double %a) { +; CHECK-LABEL: fptos_f64_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs x0, d0 +; CHECK-NEXT: ret +entry: + %c = fptosi double %a to i64 + ret i64 %c +} + +define i64 @fptou_f64_i64(double %a) { +; CHECK-LABEL: fptou_f64_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu x0, d0 +; CHECK-NEXT: ret +entry: + %c = fptoui double %a to i64 + ret i64 %c +} + +define i32 @fptos_f64_i32(double %a) { +; CHECK-LABEL: fptos_f64_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs w0, d0 +; CHECK-NEXT: ret +entry: + %c = fptosi double %a to i32 + ret i32 %c +} + +define i32 @fptou_f64_i32(double %a) { +; CHECK-LABEL: fptou_f64_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu w0, d0 +; CHECK-NEXT: ret +entry: + %c = fptoui double %a to i32 + ret i32 %c +} + +define i16 @fptos_f64_i16(double %a) { +; CHECK-LABEL: fptos_f64_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs w0, d0 +; CHECK-NEXT: ret +entry: + %c = fptosi double %a to i16 + ret i16 %c +} + +define i16 @fptou_f64_i16(double %a) { +; CHECK-SD-LABEL: fptou_f64_i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fcvtzs w0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_f64_i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: ret +entry: + %c = fptoui double %a to i16 + ret i16 %c +} + +define i8 @fptos_f64_i8(double %a) { +; CHECK-LABEL: fptos_f64_i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs w0, d0 +; CHECK-NEXT: ret +entry: + %c = fptosi double %a to i8 + ret i8 %c +} + +define i8 @fptou_f64_i8(double %a) { +; CHECK-SD-LABEL: fptou_f64_i8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fcvtzs w0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_f64_i8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: ret +entry: + %c = fptoui double %a to i8 + ret i8 %c +} + +define i64 @fptos_f32_i64(float %a) { +; CHECK-LABEL: fptos_f32_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs x0, s0 +; CHECK-NEXT: ret +entry: + %c = fptosi float %a to i64 + ret i64 %c +} + +define i64 @fptou_f32_i64(float %a) { +; CHECK-LABEL: fptou_f32_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu x0, s0 +; CHECK-NEXT: ret +entry: + %c = fptoui float %a to i64 + ret i64 %c +} + +define i32 @fptos_f32_i32(float %a) { +; CHECK-LABEL: fptos_f32_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs w0, s0 +; CHECK-NEXT: ret +entry: + %c = fptosi float %a to i32 + ret i32 %c +} + +define i32 @fptou_f32_i32(float %a) { +; CHECK-LABEL: fptou_f32_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu w0, s0 +; CHECK-NEXT: ret +entry: + %c = fptoui float %a to i32 + ret i32 %c +} + +define i16 @fptos_f32_i16(float %a) { +; CHECK-LABEL: fptos_f32_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs w0, s0 +; CHECK-NEXT: ret +entry: + %c = fptosi float %a to i16 + ret i16 %c +} + +define i16 @fptou_f32_i16(float %a) { +; CHECK-SD-LABEL: fptou_f32_i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fcvtzs w0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_f32_i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: ret +entry: + %c = fptoui float %a to i16 + ret i16 %c +} + +define i8 @fptos_f32_i8(float %a) { +; CHECK-LABEL: fptos_f32_i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs w0, s0 +; CHECK-NEXT: ret +entry: + %c = fptosi float %a to i8 + ret i8 %c +} + +define i8 @fptou_f32_i8(float %a) { +; CHECK-SD-LABEL: fptou_f32_i8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fcvtzs w0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_f32_i8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: ret +entry: + %c = fptoui float %a to i8 + ret i8 %c +} + +define i64 @fptos_f16_i64(half %a) { +; CHECK-SD-NOFP16-LABEL: fptos_f16_i64: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvtzs x0, s0 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_f16_i64: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzs x0, h0 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-LABEL: fptos_f16_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvt s0, h0 +; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: ret +entry: + %c = fptosi half %a to i64 + ret i64 %c +} + +define i64 @fptou_f16_i64(half %a) { +; CHECK-SD-NOFP16-LABEL: fptou_f16_i64: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvtzu x0, s0 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_f16_i64: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzu x0, h0 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-LABEL: fptou_f16_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvt s0, h0 +; CHECK-GI-NEXT: fcvtzu x0, s0 +; CHECK-GI-NEXT: ret +entry: + %c = fptoui half %a to i64 + ret i64 %c +} + +define i32 @fptos_f16_i32(half %a) { +; CHECK-SD-NOFP16-LABEL: fptos_f16_i32: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvtzs w0, s0 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_f16_i32: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzs w0, h0 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-LABEL: fptos_f16_i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvt s0, h0 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret +entry: + %c = fptosi half %a to i32 + ret i32 %c +} + +define i32 @fptou_f16_i32(half %a) { +; CHECK-SD-NOFP16-LABEL: fptou_f16_i32: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvtzu w0, s0 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_f16_i32: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzu w0, h0 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-LABEL: fptou_f16_i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvt s0, h0 +; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: ret +entry: + %c = fptoui half %a to i32 + ret i32 %c +} + +define i16 @fptos_f16_i16(half %a) { +; CHECK-SD-NOFP16-LABEL: fptos_f16_i16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvtzs w0, s0 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_f16_i16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzs w0, h0 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-LABEL: fptos_f16_i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvt s0, h0 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret +entry: + %c = fptosi half %a to i16 + ret i16 %c +} + +define i16 @fptou_f16_i16(half %a) { +; CHECK-SD-NOFP16-LABEL: fptou_f16_i16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvtzs w0, s0 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_f16_i16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzs w0, h0 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-LABEL: fptou_f16_i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvt s0, h0 +; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: ret +entry: + %c = fptoui half %a to i16 + ret i16 %c +} + +define i8 @fptos_f16_i8(half %a) { +; CHECK-SD-NOFP16-LABEL: fptos_f16_i8: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvtzs w0, s0 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_f16_i8: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzs w0, h0 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-LABEL: fptos_f16_i8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvt s0, h0 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret +entry: + %c = fptosi half %a to i8 + ret i8 %c +} + +define i8 @fptou_f16_i8(half %a) { +; CHECK-SD-NOFP16-LABEL: fptou_f16_i8: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvtzs w0, s0 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_f16_i8: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzs w0, h0 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-LABEL: fptou_f16_i8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvt s0, h0 +; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: ret +entry: + %c = fptoui half %a to i8 + ret i8 %c +} + +define <2 x i64> @fptos_v2f64_v2i64(<2 x double> %a) { +; CHECK-LABEL: fptos_v2f64_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: ret +entry: + %c = fptosi <2 x double> %a to <2 x i64> + ret <2 x i64> %c +} + +define <2 x i64> @fptou_v2f64_v2i64(<2 x double> %a) { +; CHECK-LABEL: fptou_v2f64_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: ret +entry: + %c = fptoui <2 x double> %a to <2 x i64> + ret <2 x i64> %c +} + +define <3 x i64> @fptos_v3f64_v3i64(<3 x double> %a) { +; CHECK-LABEL: fptos_v3f64_v3i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-NEXT: ret +entry: + %c = fptosi <3 x double> %a to <3 x i64> + ret <3 x i64> %c +} + +define <3 x i64> @fptou_v3f64_v3i64(<3 x double> %a) { +; CHECK-LABEL: fptou_v3f64_v3i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: fcvtzu v2.2d, v2.2d +; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-NEXT: ret +entry: + %c = fptoui <3 x double> %a to <3 x i64> + ret <3 x i64> %c +} + +define <4 x i64> @fptos_v4f64_v4i64(<4 x double> %a) { +; CHECK-LABEL: fptos_v4f64_v4i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: ret +entry: + %c = fptosi <4 x double> %a to <4 x i64> + ret <4 x i64> %c +} + +define <4 x i64> @fptou_v4f64_v4i64(<4 x double> %a) { +; CHECK-LABEL: fptou_v4f64_v4i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-NEXT: ret +entry: + %c = fptoui <4 x double> %a to <4 x i64> + ret <4 x i64> %c +} + +define <8 x i64> @fptos_v8f64_v8i64(<8 x double> %a) { +; CHECK-LABEL: fptos_v8f64_v8i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: ret +entry: + %c = fptosi <8 x double> %a to <8 x i64> + ret <8 x i64> %c +} + +define <8 x i64> @fptou_v8f64_v8i64(<8 x double> %a) { +; CHECK-LABEL: fptou_v8f64_v8i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-NEXT: fcvtzu v2.2d, v2.2d +; CHECK-NEXT: fcvtzu v3.2d, v3.2d +; CHECK-NEXT: ret +entry: + %c = fptoui <8 x double> %a to <8 x i64> + ret <8 x i64> %c +} + +define <16 x i64> @fptos_v16f64_v16i64(<16 x double> %a) { +; CHECK-LABEL: fptos_v16f64_v16i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: ret +entry: + %c = fptosi <16 x double> %a to <16 x i64> + ret <16 x i64> %c +} + +define <16 x i64> @fptou_v16f64_v16i64(<16 x double> %a) { +; CHECK-LABEL: fptou_v16f64_v16i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-NEXT: fcvtzu v2.2d, v2.2d +; CHECK-NEXT: fcvtzu v3.2d, v3.2d +; CHECK-NEXT: fcvtzu v4.2d, v4.2d +; CHECK-NEXT: fcvtzu v5.2d, v5.2d +; CHECK-NEXT: fcvtzu v6.2d, v6.2d +; CHECK-NEXT: fcvtzu v7.2d, v7.2d +; CHECK-NEXT: ret +entry: + %c = fptoui <16 x double> %a to <16 x i64> + ret <16 x i64> %c +} + +define <32 x i64> @fptos_v32f64_v32i64(<32 x double> %a) { +; CHECK-LABEL: fptos_v32f64_v32i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldp q17, q16, [sp, #96] +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: ldp q19, q18, [sp, #64] +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: ldp q21, q20, [sp, #32] +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: fcvtzs v16.2d, v16.2d +; CHECK-NEXT: fcvtzs v17.2d, v17.2d +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: fcvtzs v18.2d, v18.2d +; CHECK-NEXT: fcvtzs v19.2d, v19.2d +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v20.2d, v20.2d +; CHECK-NEXT: fcvtzs v21.2d, v21.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: stp q5, q6, [x8, #80] +; CHECK-NEXT: str q16, [x8, #240] +; CHECK-NEXT: ldp q22, q16, [sp] +; CHECK-NEXT: stp q3, q4, [x8, #48] +; CHECK-NEXT: stp q20, q19, [x8, #176] +; CHECK-NEXT: fcvtzs v16.2d, v16.2d +; CHECK-NEXT: stp q1, q2, [x8, #16] +; CHECK-NEXT: stp q18, q17, [x8, #208] +; CHECK-NEXT: fcvtzs v17.2d, v22.2d +; CHECK-NEXT: str q0, [x8] +; CHECK-NEXT: stp q16, q21, [x8, #144] +; CHECK-NEXT: stp q7, q17, [x8, #112] +; CHECK-NEXT: ret +entry: + %c = fptosi <32 x double> %a to <32 x i64> + ret <32 x i64> %c +} + +define <32 x i64> @fptou_v32f64_v32i64(<32 x double> %a) { +; CHECK-LABEL: fptou_v32f64_v32i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldp q17, q16, [sp, #96] +; CHECK-NEXT: fcvtzu v7.2d, v7.2d +; CHECK-NEXT: ldp q19, q18, [sp, #64] +; CHECK-NEXT: fcvtzu v6.2d, v6.2d +; CHECK-NEXT: ldp q21, q20, [sp, #32] +; CHECK-NEXT: fcvtzu v5.2d, v5.2d +; CHECK-NEXT: fcvtzu v16.2d, v16.2d +; CHECK-NEXT: fcvtzu v17.2d, v17.2d +; CHECK-NEXT: fcvtzu v4.2d, v4.2d +; CHECK-NEXT: fcvtzu v18.2d, v18.2d +; CHECK-NEXT: fcvtzu v19.2d, v19.2d +; CHECK-NEXT: fcvtzu v3.2d, v3.2d +; CHECK-NEXT: fcvtzu v20.2d, v20.2d +; CHECK-NEXT: fcvtzu v21.2d, v21.2d +; CHECK-NEXT: fcvtzu v2.2d, v2.2d +; CHECK-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: stp q5, q6, [x8, #80] +; CHECK-NEXT: str q16, [x8, #240] +; CHECK-NEXT: ldp q22, q16, [sp] +; CHECK-NEXT: stp q3, q4, [x8, #48] +; CHECK-NEXT: stp q20, q19, [x8, #176] +; CHECK-NEXT: fcvtzu v16.2d, v16.2d +; CHECK-NEXT: stp q1, q2, [x8, #16] +; CHECK-NEXT: stp q18, q17, [x8, #208] +; CHECK-NEXT: fcvtzu v17.2d, v22.2d +; CHECK-NEXT: str q0, [x8] +; CHECK-NEXT: stp q16, q21, [x8, #144] +; CHECK-NEXT: stp q7, q17, [x8, #112] +; CHECK-NEXT: ret +entry: + %c = fptoui <32 x double> %a to <32 x i64> + ret <32 x i64> %c +} + +define <2 x i32> @fptos_v2f64_v2i32(<2 x double> %a) { +; CHECK-LABEL: fptos_v2f64_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: ret +entry: + %c = fptosi <2 x double> %a to <2 x i32> + ret <2 x i32> %c +} + +define <2 x i32> @fptou_v2f64_v2i32(<2 x double> %a) { +; CHECK-LABEL: fptou_v2f64_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: ret +entry: + %c = fptoui <2 x double> %a to <2 x i32> + ret <2 x i32> %c +} + +define <3 x i32> @fptos_v3f64_v3i32(<3 x double> %a) { +; CHECK-LABEL: fptos_v3f64_v3i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret +entry: + %c = fptosi <3 x double> %a to <3 x i32> + ret <3 x i32> %c +} + +define <3 x i32> @fptou_v3f64_v3i32(<3 x double> %a) { +; CHECK-LABEL: fptou_v3f64_v3i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: fcvtzu v1.2d, v2.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret +entry: + %c = fptoui <3 x double> %a to <3 x i32> + ret <3 x i32> %c +} + +define <4 x i32> @fptos_v4f64_v4i32(<4 x double> %a) { +; CHECK-LABEL: fptos_v4f64_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret +entry: + %c = fptosi <4 x double> %a to <4 x i32> + ret <4 x i32> %c +} + +define <4 x i32> @fptou_v4f64_v4i32(<4 x double> %a) { +; CHECK-LABEL: fptou_v4f64_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret +entry: + %c = fptoui <4 x double> %a to <4 x i32> + ret <4 x i32> %c +} + +define <8 x i32> @fptos_v8f64_v8i32(<8 x double> %a) { +; CHECK-LABEL: fptos_v8f64_v8i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uzp1 v1.4s, v2.4s, v3.4s +; CHECK-NEXT: ret +entry: + %c = fptosi <8 x double> %a to <8 x i32> + ret <8 x i32> %c +} + +define <8 x i32> @fptou_v8f64_v8i32(<8 x double> %a) { +; CHECK-LABEL: fptou_v8f64_v8i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: fcvtzu v3.2d, v3.2d +; CHECK-NEXT: fcvtzu v2.2d, v2.2d +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uzp1 v1.4s, v2.4s, v3.4s +; CHECK-NEXT: ret +entry: + %c = fptoui <8 x double> %a to <8 x i32> + ret <8 x i32> %c +} + +define <16 x i32> @fptos_v16f64_v16i32(<16 x double> %a) { +; CHECK-LABEL: fptos_v16f64_v16i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uzp1 v1.4s, v2.4s, v3.4s +; CHECK-NEXT: uzp1 v2.4s, v4.4s, v5.4s +; CHECK-NEXT: uzp1 v3.4s, v6.4s, v7.4s +; CHECK-NEXT: ret +entry: + %c = fptosi <16 x double> %a to <16 x i32> + ret <16 x i32> %c +} + +define <16 x i32> @fptou_v16f64_v16i32(<16 x double> %a) { +; CHECK-LABEL: fptou_v16f64_v16i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu v3.2d, v3.2d +; CHECK-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: fcvtzu v2.2d, v2.2d +; CHECK-NEXT: fcvtzu v5.2d, v5.2d +; CHECK-NEXT: fcvtzu v4.2d, v4.2d +; CHECK-NEXT: fcvtzu v7.2d, v7.2d +; CHECK-NEXT: fcvtzu v6.2d, v6.2d +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uzp1 v1.4s, v2.4s, v3.4s +; CHECK-NEXT: uzp1 v2.4s, v4.4s, v5.4s +; CHECK-NEXT: uzp1 v3.4s, v6.4s, v7.4s +; CHECK-NEXT: ret +entry: + %c = fptoui <16 x double> %a to <16 x i32> + ret <16 x i32> %c +} + +define <32 x i32> @fptos_v32f64_v32i32(<32 x double> %a) { +; CHECK-LABEL: fptos_v32f64_v32i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldp q16, q17, [sp, #96] +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: ldp q18, q19, [sp, #64] +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: ldp q20, q21, [sp, #32] +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: ldp q22, q23, [sp] +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: fcvtzs v21.2d, v21.2d +; CHECK-NEXT: fcvtzs v20.2d, v20.2d +; CHECK-NEXT: fcvtzs v23.2d, v23.2d +; CHECK-NEXT: fcvtzs v22.2d, v22.2d +; CHECK-NEXT: fcvtzs v19.2d, v19.2d +; CHECK-NEXT: fcvtzs v18.2d, v18.2d +; CHECK-NEXT: fcvtzs v17.2d, v17.2d +; CHECK-NEXT: fcvtzs v16.2d, v16.2d +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uzp1 v1.4s, v2.4s, v3.4s +; CHECK-NEXT: uzp1 v2.4s, v4.4s, v5.4s +; CHECK-NEXT: uzp1 v3.4s, v6.4s, v7.4s +; CHECK-NEXT: uzp1 v5.4s, v20.4s, v21.4s +; CHECK-NEXT: uzp1 v4.4s, v22.4s, v23.4s +; CHECK-NEXT: uzp1 v6.4s, v18.4s, v19.4s +; CHECK-NEXT: uzp1 v7.4s, v16.4s, v17.4s +; CHECK-NEXT: ret +entry: + %c = fptosi <32 x double> %a to <32 x i32> + ret <32 x i32> %c +} + +define <32 x i32> @fptou_v32f64_v32i32(<32 x double> %a) { +; CHECK-LABEL: fptou_v32f64_v32i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldp q16, q17, [sp, #96] +; CHECK-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-NEXT: ldp q18, q19, [sp, #64] +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: ldp q20, q21, [sp, #32] +; CHECK-NEXT: fcvtzu v3.2d, v3.2d +; CHECK-NEXT: ldp q22, q23, [sp] +; CHECK-NEXT: fcvtzu v2.2d, v2.2d +; CHECK-NEXT: fcvtzu v5.2d, v5.2d +; CHECK-NEXT: fcvtzu v4.2d, v4.2d +; CHECK-NEXT: fcvtzu v7.2d, v7.2d +; CHECK-NEXT: fcvtzu v6.2d, v6.2d +; CHECK-NEXT: fcvtzu v21.2d, v21.2d +; CHECK-NEXT: fcvtzu v20.2d, v20.2d +; CHECK-NEXT: fcvtzu v23.2d, v23.2d +; CHECK-NEXT: fcvtzu v22.2d, v22.2d +; CHECK-NEXT: fcvtzu v19.2d, v19.2d +; CHECK-NEXT: fcvtzu v18.2d, v18.2d +; CHECK-NEXT: fcvtzu v17.2d, v17.2d +; CHECK-NEXT: fcvtzu v16.2d, v16.2d +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uzp1 v1.4s, v2.4s, v3.4s +; CHECK-NEXT: uzp1 v2.4s, v4.4s, v5.4s +; CHECK-NEXT: uzp1 v3.4s, v6.4s, v7.4s +; CHECK-NEXT: uzp1 v5.4s, v20.4s, v21.4s +; CHECK-NEXT: uzp1 v4.4s, v22.4s, v23.4s +; CHECK-NEXT: uzp1 v6.4s, v18.4s, v19.4s +; CHECK-NEXT: uzp1 v7.4s, v16.4s, v17.4s +; CHECK-NEXT: ret +entry: + %c = fptoui <32 x double> %a to <32 x i32> + ret <32 x i32> %c +} + +define <2 x i16> @fptos_v2f64_v2i16(<2 x double> %a) { +; CHECK-LABEL: fptos_v2f64_v2i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: ret +entry: + %c = fptosi <2 x double> %a to <2 x i16> + ret <2 x i16> %c +} + +define <2 x i16> @fptou_v2f64_v2i16(<2 x double> %a) { +; CHECK-LABEL: fptou_v2f64_v2i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: ret +entry: + %c = fptoui <2 x double> %a to <2 x i16> + ret <2 x i16> %c +} + +define <3 x i16> @fptos_v3f64_v3i16(<3 x double> %a) { +; CHECK-LABEL: fptos_v3f64_v3i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +entry: + %c = fptosi <3 x double> %a to <3 x i16> + ret <3 x i16> %c +} + +define <3 x i16> @fptou_v3f64_v3i16(<3 x double> %a) { +; CHECK-LABEL: fptou_v3f64_v3i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +entry: + %c = fptoui <3 x double> %a to <3 x i16> + ret <3 x i16> %c +} + +define <4 x i16> @fptos_v4f64_v4i16(<4 x double> %a) { +; CHECK-LABEL: fptos_v4f64_v4i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +entry: + %c = fptosi <4 x double> %a to <4 x i16> + ret <4 x i16> %c +} + +define <4 x i16> @fptou_v4f64_v4i16(<4 x double> %a) { +; CHECK-LABEL: fptou_v4f64_v4i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +entry: + %c = fptoui <4 x double> %a to <4 x i16> + ret <4 x i16> %c +} + +define <8 x i16> @fptos_v8f64_v8i16(<8 x double> %a) { +; CHECK-LABEL: fptos_v8f64_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: adrp x8, .LCPI54_0 +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v6.2s, v3.2d +; CHECK-NEXT: xtn v5.2s, v2.2d +; CHECK-NEXT: xtn v4.2s, v1.2d +; CHECK-NEXT: xtn v3.2s, v0.2d +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI54_0] +; CHECK-NEXT: tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b +; CHECK-NEXT: ret +entry: + %c = fptosi <8 x double> %a to <8 x i16> + ret <8 x i16> %c +} + +define <8 x i16> @fptou_v8f64_v8i16(<8 x double> %a) { +; CHECK-LABEL: fptou_v8f64_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: adrp x8, .LCPI55_0 +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v6.2s, v3.2d +; CHECK-NEXT: xtn v5.2s, v2.2d +; CHECK-NEXT: xtn v4.2s, v1.2d +; CHECK-NEXT: xtn v3.2s, v0.2d +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI55_0] +; CHECK-NEXT: tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b +; CHECK-NEXT: ret +entry: + %c = fptoui <8 x double> %a to <8 x i16> + ret <8 x i16> %c +} + +define <16 x i16> @fptos_v16f64_v16i16(<16 x double> %a) { +; CHECK-LABEL: fptos_v16f64_v16i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: adrp x8, .LCPI56_0 +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: xtn v19.2s, v3.2d +; CHECK-NEXT: xtn v23.2s, v7.2d +; CHECK-NEXT: xtn v18.2s, v2.2d +; CHECK-NEXT: xtn v22.2s, v6.2d +; CHECK-NEXT: xtn v17.2s, v1.2d +; CHECK-NEXT: xtn v21.2s, v5.2d +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI56_0] +; CHECK-NEXT: xtn v16.2s, v0.2d +; CHECK-NEXT: xtn v20.2s, v4.2d +; CHECK-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b +; CHECK-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b +; CHECK-NEXT: ret +entry: + %c = fptosi <16 x double> %a to <16 x i16> + ret <16 x i16> %c +} + +define <16 x i16> @fptou_v16f64_v16i16(<16 x double> %a) { +; CHECK-LABEL: fptou_v16f64_v16i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: adrp x8, .LCPI57_0 +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: xtn v19.2s, v3.2d +; CHECK-NEXT: xtn v23.2s, v7.2d +; CHECK-NEXT: xtn v18.2s, v2.2d +; CHECK-NEXT: xtn v22.2s, v6.2d +; CHECK-NEXT: xtn v17.2s, v1.2d +; CHECK-NEXT: xtn v21.2s, v5.2d +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI57_0] +; CHECK-NEXT: xtn v16.2s, v0.2d +; CHECK-NEXT: xtn v20.2s, v4.2d +; CHECK-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b +; CHECK-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b +; CHECK-NEXT: ret +entry: + %c = fptoui <16 x double> %a to <16 x i16> + ret <16 x i16> %c +} + +define <32 x i16> @fptos_v32f64_v32i16(<32 x double> %a) { +; CHECK-LABEL: fptos_v32f64_v32i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset b8, -8 +; CHECK-NEXT: .cfi_offset b9, -16 +; CHECK-NEXT: .cfi_offset b10, -24 +; CHECK-NEXT: .cfi_offset b11, -32 +; CHECK-NEXT: .cfi_offset b12, -40 +; CHECK-NEXT: .cfi_offset b13, -48 +; CHECK-NEXT: .cfi_offset b14, -56 +; CHECK-NEXT: .cfi_offset b15, -64 +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v18.2d, v2.2d +; CHECK-NEXT: adrp x8, .LCPI58_0 +; CHECK-NEXT: fcvtzs v19.2d, v1.2d +; CHECK-NEXT: ldp q20, q21, [sp, #160] +; CHECK-NEXT: fcvtzs v22.2d, v0.2d +; CHECK-NEXT: ldp q23, q24, [sp, #96] +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: ldp q16, q17, [sp, #128] +; CHECK-NEXT: xtn v3.2s, v3.2d +; CHECK-NEXT: fcvtzs v21.2d, v21.2d +; CHECK-NEXT: fcvtzs v20.2d, v20.2d +; CHECK-NEXT: xtn v2.2s, v18.2d +; CHECK-NEXT: ldp q18, q25, [sp, #64] +; CHECK-NEXT: xtn v1.2s, v19.2d +; CHECK-NEXT: fcvtzs v19.2d, v24.2d +; CHECK-NEXT: fcvtzs v17.2d, v17.2d +; CHECK-NEXT: xtn v0.2s, v22.2d +; CHECK-NEXT: fcvtzs v22.2d, v23.2d +; CHECK-NEXT: xtn v29.2s, v7.2d +; CHECK-NEXT: fcvtzs v7.2d, v25.2d +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: fcvtzs v18.2d, v18.2d +; CHECK-NEXT: fcvtzs v16.2d, v16.2d +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: xtn v15.2s, v21.2d +; CHECK-NEXT: xtn v11.2s, v19.2d +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: xtn v14.2s, v20.2d +; CHECK-NEXT: xtn v10.2s, v22.2d +; CHECK-NEXT: xtn v13.2s, v17.2d +; CHECK-NEXT: xtn v9.2s, v7.2d +; CHECK-NEXT: xtn v28.2s, v6.2d +; CHECK-NEXT: xtn v8.2s, v18.2d +; CHECK-NEXT: xtn v12.2s, v16.2d +; CHECK-NEXT: xtn v27.2s, v5.2d +; CHECK-NEXT: xtn v26.2s, v4.2d +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI58_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b +; CHECK-NEXT: tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b +; CHECK-NEXT: tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: ret +entry: + %c = fptosi <32 x double> %a to <32 x i16> + ret <32 x i16> %c +} + +define <32 x i16> @fptou_v32f64_v32i16(<32 x double> %a) { +; CHECK-LABEL: fptou_v32f64_v32i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset b8, -8 +; CHECK-NEXT: .cfi_offset b9, -16 +; CHECK-NEXT: .cfi_offset b10, -24 +; CHECK-NEXT: .cfi_offset b11, -32 +; CHECK-NEXT: .cfi_offset b12, -40 +; CHECK-NEXT: .cfi_offset b13, -48 +; CHECK-NEXT: .cfi_offset b14, -56 +; CHECK-NEXT: .cfi_offset b15, -64 +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v18.2d, v2.2d +; CHECK-NEXT: adrp x8, .LCPI59_0 +; CHECK-NEXT: fcvtzs v19.2d, v1.2d +; CHECK-NEXT: ldp q20, q21, [sp, #160] +; CHECK-NEXT: fcvtzs v22.2d, v0.2d +; CHECK-NEXT: ldp q23, q24, [sp, #96] +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: ldp q16, q17, [sp, #128] +; CHECK-NEXT: xtn v3.2s, v3.2d +; CHECK-NEXT: fcvtzs v21.2d, v21.2d +; CHECK-NEXT: fcvtzs v20.2d, v20.2d +; CHECK-NEXT: xtn v2.2s, v18.2d +; CHECK-NEXT: ldp q18, q25, [sp, #64] +; CHECK-NEXT: xtn v1.2s, v19.2d +; CHECK-NEXT: fcvtzs v19.2d, v24.2d +; CHECK-NEXT: fcvtzs v17.2d, v17.2d +; CHECK-NEXT: xtn v0.2s, v22.2d +; CHECK-NEXT: fcvtzs v22.2d, v23.2d +; CHECK-NEXT: xtn v29.2s, v7.2d +; CHECK-NEXT: fcvtzs v7.2d, v25.2d +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: fcvtzs v18.2d, v18.2d +; CHECK-NEXT: fcvtzs v16.2d, v16.2d +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: xtn v15.2s, v21.2d +; CHECK-NEXT: xtn v11.2s, v19.2d +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: xtn v14.2s, v20.2d +; CHECK-NEXT: xtn v10.2s, v22.2d +; CHECK-NEXT: xtn v13.2s, v17.2d +; CHECK-NEXT: xtn v9.2s, v7.2d +; CHECK-NEXT: xtn v28.2s, v6.2d +; CHECK-NEXT: xtn v8.2s, v18.2d +; CHECK-NEXT: xtn v12.2s, v16.2d +; CHECK-NEXT: xtn v27.2s, v5.2d +; CHECK-NEXT: xtn v26.2s, v4.2d +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI59_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b +; CHECK-NEXT: tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b +; CHECK-NEXT: tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: ret +entry: + %c = fptoui <32 x double> %a to <32 x i16> + ret <32 x i16> %c +} + +define <2 x i8> @fptos_v2f64_v2i8(<2 x double> %a) { +; CHECK-LABEL: fptos_v2f64_v2i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: ret +entry: + %c = fptosi <2 x double> %a to <2 x i8> + ret <2 x i8> %c +} + +define <2 x i8> @fptou_v2f64_v2i8(<2 x double> %a) { +; CHECK-LABEL: fptou_v2f64_v2i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: ret +entry: + %c = fptoui <2 x double> %a to <2 x i8> + ret <2 x i8> %c +} + +define <3 x i8> @fptos_v3f64_v3i8(<3 x double> %a) { +; CHECK-LABEL: fptos_v3f64_v3i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: umov w0, v0.h[0] +; CHECK-NEXT: umov w1, v0.h[1] +; CHECK-NEXT: umov w2, v0.h[2] +; CHECK-NEXT: ret +entry: + %c = fptosi <3 x double> %a to <3 x i8> + ret <3 x i8> %c +} + +define <3 x i8> @fptou_v3f64_v3i8(<3 x double> %a) { +; CHECK-LABEL: fptou_v3f64_v3i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: umov w0, v0.h[0] +; CHECK-NEXT: umov w1, v0.h[1] +; CHECK-NEXT: umov w2, v0.h[2] +; CHECK-NEXT: ret +entry: + %c = fptoui <3 x double> %a to <3 x i8> + ret <3 x i8> %c +} + +define <4 x i8> @fptos_v4f64_v4i8(<4 x double> %a) { +; CHECK-LABEL: fptos_v4f64_v4i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +entry: + %c = fptosi <4 x double> %a to <4 x i8> + ret <4 x i8> %c +} + +define <4 x i8> @fptou_v4f64_v4i8(<4 x double> %a) { +; CHECK-LABEL: fptou_v4f64_v4i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +entry: + %c = fptoui <4 x double> %a to <4 x i8> + ret <4 x i8> %c +} + +define <8 x i8> @fptos_v8f64_v8i8(<8 x double> %a) { +; CHECK-LABEL: fptos_v8f64_v8i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v3.2s, v3.2d +; CHECK-NEXT: xtn v2.2s, v2.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: uzp1 v2.4h, v2.4h, v3.4h +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: uzp1 v0.8b, v0.8b, v2.8b +; CHECK-NEXT: ret +entry: + %c = fptosi <8 x double> %a to <8 x i8> + ret <8 x i8> %c +} + +define <8 x i8> @fptou_v8f64_v8i8(<8 x double> %a) { +; CHECK-LABEL: fptou_v8f64_v8i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v3.2s, v3.2d +; CHECK-NEXT: xtn v2.2s, v2.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: uzp1 v2.4h, v2.4h, v3.4h +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: uzp1 v0.8b, v0.8b, v2.8b +; CHECK-NEXT: ret +entry: + %c = fptoui <8 x double> %a to <8 x i8> + ret <8 x i8> %c +} + +define <16 x i8> @fptos_v16f64_v16i8(<16 x double> %a) { +; CHECK-LABEL: fptos_v16f64_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v7.2s, v7.2d +; CHECK-NEXT: xtn v6.2s, v6.2d +; CHECK-NEXT: xtn v5.2s, v5.2d +; CHECK-NEXT: xtn v4.2s, v4.2d +; CHECK-NEXT: xtn v3.2s, v3.2d +; CHECK-NEXT: xtn v2.2s, v2.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: uzp1 v6.4h, v6.4h, v7.4h +; CHECK-NEXT: uzp1 v4.4h, v4.4h, v5.4h +; CHECK-NEXT: uzp1 v2.4h, v2.4h, v3.4h +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: mov v4.d[1], v6.d[0] +; CHECK-NEXT: mov v0.d[1], v2.d[0] +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v4.16b +; CHECK-NEXT: ret +entry: + %c = fptosi <16 x double> %a to <16 x i8> + ret <16 x i8> %c +} + +define <16 x i8> @fptou_v16f64_v16i8(<16 x double> %a) { +; CHECK-LABEL: fptou_v16f64_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v7.2s, v7.2d +; CHECK-NEXT: xtn v6.2s, v6.2d +; CHECK-NEXT: xtn v5.2s, v5.2d +; CHECK-NEXT: xtn v4.2s, v4.2d +; CHECK-NEXT: xtn v3.2s, v3.2d +; CHECK-NEXT: xtn v2.2s, v2.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: uzp1 v6.4h, v6.4h, v7.4h +; CHECK-NEXT: uzp1 v4.4h, v4.4h, v5.4h +; CHECK-NEXT: uzp1 v2.4h, v2.4h, v3.4h +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: mov v4.d[1], v6.d[0] +; CHECK-NEXT: mov v0.d[1], v2.d[0] +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v4.16b +; CHECK-NEXT: ret +entry: + %c = fptoui <16 x double> %a to <16 x i8> + ret <16 x i8> %c +} + +define <32 x i8> @fptos_v32f64_v32i8(<32 x double> %a) { +; CHECK-LABEL: fptos_v32f64_v32i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldp q16, q17, [sp] +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: ldp q18, q19, [sp, #32] +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: ldp q20, q21, [sp, #64] +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: ldp q22, q23, [sp, #96] +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v21.2d, v21.2d +; CHECK-NEXT: fcvtzs v20.2d, v20.2d +; CHECK-NEXT: fcvtzs v23.2d, v23.2d +; CHECK-NEXT: fcvtzs v22.2d, v22.2d +; CHECK-NEXT: fcvtzs v19.2d, v19.2d +; CHECK-NEXT: fcvtzs v18.2d, v18.2d +; CHECK-NEXT: fcvtzs v17.2d, v17.2d +; CHECK-NEXT: fcvtzs v16.2d, v16.2d +; CHECK-NEXT: xtn v7.2s, v7.2d +; CHECK-NEXT: xtn v6.2s, v6.2d +; CHECK-NEXT: xtn v5.2s, v5.2d +; CHECK-NEXT: xtn v4.2s, v4.2d +; CHECK-NEXT: xtn v3.2s, v3.2d +; CHECK-NEXT: xtn v2.2s, v2.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: xtn v23.2s, v23.2d +; CHECK-NEXT: xtn v22.2s, v22.2d +; CHECK-NEXT: xtn v21.2s, v21.2d +; CHECK-NEXT: xtn v20.2s, v20.2d +; CHECK-NEXT: xtn v19.2s, v19.2d +; CHECK-NEXT: xtn v18.2s, v18.2d +; CHECK-NEXT: xtn v17.2s, v17.2d +; CHECK-NEXT: xtn v16.2s, v16.2d +; CHECK-NEXT: uzp1 v6.4h, v6.4h, v7.4h +; CHECK-NEXT: uzp1 v4.4h, v4.4h, v5.4h +; CHECK-NEXT: uzp1 v2.4h, v2.4h, v3.4h +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: uzp1 v1.4h, v22.4h, v23.4h +; CHECK-NEXT: uzp1 v3.4h, v20.4h, v21.4h +; CHECK-NEXT: uzp1 v5.4h, v18.4h, v19.4h +; CHECK-NEXT: uzp1 v7.4h, v16.4h, v17.4h +; CHECK-NEXT: mov v4.d[1], v6.d[0] +; CHECK-NEXT: mov v0.d[1], v2.d[0] +; CHECK-NEXT: mov v3.d[1], v1.d[0] +; CHECK-NEXT: mov v7.d[1], v5.d[0] +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v4.16b +; CHECK-NEXT: uzp1 v1.16b, v7.16b, v3.16b +; CHECK-NEXT: ret +entry: + %c = fptosi <32 x double> %a to <32 x i8> + ret <32 x i8> %c +} + +define <32 x i8> @fptou_v32f64_v32i8(<32 x double> %a) { +; CHECK-LABEL: fptou_v32f64_v32i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldp q16, q17, [sp] +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: ldp q18, q19, [sp, #32] +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: ldp q20, q21, [sp, #64] +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: ldp q22, q23, [sp, #96] +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v21.2d, v21.2d +; CHECK-NEXT: fcvtzs v20.2d, v20.2d +; CHECK-NEXT: fcvtzs v23.2d, v23.2d +; CHECK-NEXT: fcvtzs v22.2d, v22.2d +; CHECK-NEXT: fcvtzs v19.2d, v19.2d +; CHECK-NEXT: fcvtzs v18.2d, v18.2d +; CHECK-NEXT: fcvtzs v17.2d, v17.2d +; CHECK-NEXT: fcvtzs v16.2d, v16.2d +; CHECK-NEXT: xtn v7.2s, v7.2d +; CHECK-NEXT: xtn v6.2s, v6.2d +; CHECK-NEXT: xtn v5.2s, v5.2d +; CHECK-NEXT: xtn v4.2s, v4.2d +; CHECK-NEXT: xtn v3.2s, v3.2d +; CHECK-NEXT: xtn v2.2s, v2.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: xtn v23.2s, v23.2d +; CHECK-NEXT: xtn v22.2s, v22.2d +; CHECK-NEXT: xtn v21.2s, v21.2d +; CHECK-NEXT: xtn v20.2s, v20.2d +; CHECK-NEXT: xtn v19.2s, v19.2d +; CHECK-NEXT: xtn v18.2s, v18.2d +; CHECK-NEXT: xtn v17.2s, v17.2d +; CHECK-NEXT: xtn v16.2s, v16.2d +; CHECK-NEXT: uzp1 v6.4h, v6.4h, v7.4h +; CHECK-NEXT: uzp1 v4.4h, v4.4h, v5.4h +; CHECK-NEXT: uzp1 v2.4h, v2.4h, v3.4h +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: uzp1 v1.4h, v22.4h, v23.4h +; CHECK-NEXT: uzp1 v3.4h, v20.4h, v21.4h +; CHECK-NEXT: uzp1 v5.4h, v18.4h, v19.4h +; CHECK-NEXT: uzp1 v7.4h, v16.4h, v17.4h +; CHECK-NEXT: mov v4.d[1], v6.d[0] +; CHECK-NEXT: mov v0.d[1], v2.d[0] +; CHECK-NEXT: mov v3.d[1], v1.d[0] +; CHECK-NEXT: mov v7.d[1], v5.d[0] +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v4.16b +; CHECK-NEXT: uzp1 v1.16b, v7.16b, v3.16b +; CHECK-NEXT: ret +entry: + %c = fptoui <32 x double> %a to <32 x i8> + ret <32 x i8> %c +} + +define <2 x i64> @fptos_v2f32_v2i64(<2 x float> %a) { +; CHECK-LABEL: fptos_v2f32_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: ret +entry: + %c = fptosi <2 x float> %a to <2 x i64> + ret <2 x i64> %c +} + +define <2 x i64> @fptou_v2f32_v2i64(<2 x float> %a) { +; CHECK-LABEL: fptou_v2f32_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: ret +entry: + %c = fptoui <2 x float> %a to <2 x i64> + ret <2 x i64> %c +} + +define <3 x i64> @fptos_v3f32_v3i64(<3 x float> %a) { +; CHECK-LABEL: fptos_v3f32_v3i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v1.2d, v0.2s +; CHECK-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-NEXT: fcvtzs v3.2d, v1.2d +; CHECK-NEXT: fcvtzs v2.2d, v0.2d +; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-NEXT: fmov d0, d3 +; CHECK-NEXT: ext v1.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-NEXT: ret +entry: + %c = fptosi <3 x float> %a to <3 x i64> + ret <3 x i64> %c +} + +define <3 x i64> @fptou_v3f32_v3i64(<3 x float> %a) { +; CHECK-LABEL: fptou_v3f32_v3i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v1.2d, v0.2s +; CHECK-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-NEXT: fcvtzu v3.2d, v1.2d +; CHECK-NEXT: fcvtzu v2.2d, v0.2d +; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-NEXT: fmov d0, d3 +; CHECK-NEXT: ext v1.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-NEXT: ret +entry: + %c = fptoui <3 x float> %a to <3 x i64> + ret <3 x i64> %c +} + +define <4 x i64> @fptos_v4f32_v4i64(<4 x float> %a) { +; CHECK-LABEL: fptos_v4f32_v4i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl2 v1.2d, v0.4s +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: ret +entry: + %c = fptosi <4 x float> %a to <4 x i64> + ret <4 x i64> %c +} + +define <4 x i64> @fptou_v4f32_v4i64(<4 x float> %a) { +; CHECK-LABEL: fptou_v4f32_v4i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl2 v1.2d, v0.4s +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: ret +entry: + %c = fptoui <4 x float> %a to <4 x i64> + ret <4 x i64> %c +} + +define <8 x i64> @fptos_v8f32_v8i64(<8 x float> %a) { +; CHECK-LABEL: fptos_v8f32_v8i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v2.2d, v0.2s +; CHECK-NEXT: fcvtl2 v3.2d, v0.4s +; CHECK-NEXT: fcvtl2 v4.2d, v1.4s +; CHECK-NEXT: fcvtl v5.2d, v1.2s +; CHECK-NEXT: fcvtzs v0.2d, v2.2d +; CHECK-NEXT: fcvtzs v1.2d, v3.2d +; CHECK-NEXT: fcvtzs v3.2d, v4.2d +; CHECK-NEXT: fcvtzs v2.2d, v5.2d +; CHECK-NEXT: ret +entry: + %c = fptosi <8 x float> %a to <8 x i64> + ret <8 x i64> %c +} + +define <8 x i64> @fptou_v8f32_v8i64(<8 x float> %a) { +; CHECK-LABEL: fptou_v8f32_v8i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v2.2d, v0.2s +; CHECK-NEXT: fcvtl2 v3.2d, v0.4s +; CHECK-NEXT: fcvtl2 v4.2d, v1.4s +; CHECK-NEXT: fcvtl v5.2d, v1.2s +; CHECK-NEXT: fcvtzu v0.2d, v2.2d +; CHECK-NEXT: fcvtzu v1.2d, v3.2d +; CHECK-NEXT: fcvtzu v3.2d, v4.2d +; CHECK-NEXT: fcvtzu v2.2d, v5.2d +; CHECK-NEXT: ret +entry: + %c = fptoui <8 x float> %a to <8 x i64> + ret <8 x i64> %c +} + +define <16 x i64> @fptos_v16f32_v16i64(<16 x float> %a) { +; CHECK-LABEL: fptos_v16f32_v16i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl2 v4.2d, v0.4s +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtl2 v5.2d, v1.4s +; CHECK-NEXT: fcvtl v6.2d, v1.2s +; CHECK-NEXT: fcvtl v7.2d, v2.2s +; CHECK-NEXT: fcvtl2 v16.2d, v2.4s +; CHECK-NEXT: fcvtl2 v17.2d, v3.4s +; CHECK-NEXT: fcvtl v18.2d, v3.2s +; CHECK-NEXT: fcvtzs v1.2d, v4.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v3.2d, v5.2d +; CHECK-NEXT: fcvtzs v2.2d, v6.2d +; CHECK-NEXT: fcvtzs v4.2d, v7.2d +; CHECK-NEXT: fcvtzs v5.2d, v16.2d +; CHECK-NEXT: fcvtzs v7.2d, v17.2d +; CHECK-NEXT: fcvtzs v6.2d, v18.2d +; CHECK-NEXT: ret +entry: + %c = fptosi <16 x float> %a to <16 x i64> + ret <16 x i64> %c +} + +define <16 x i64> @fptou_v16f32_v16i64(<16 x float> %a) { +; CHECK-LABEL: fptou_v16f32_v16i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl2 v4.2d, v0.4s +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtl2 v5.2d, v1.4s +; CHECK-NEXT: fcvtl v6.2d, v1.2s +; CHECK-NEXT: fcvtl v7.2d, v2.2s +; CHECK-NEXT: fcvtl2 v16.2d, v2.4s +; CHECK-NEXT: fcvtl2 v17.2d, v3.4s +; CHECK-NEXT: fcvtl v18.2d, v3.2s +; CHECK-NEXT: fcvtzu v1.2d, v4.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: fcvtzu v3.2d, v5.2d +; CHECK-NEXT: fcvtzu v2.2d, v6.2d +; CHECK-NEXT: fcvtzu v4.2d, v7.2d +; CHECK-NEXT: fcvtzu v5.2d, v16.2d +; CHECK-NEXT: fcvtzu v7.2d, v17.2d +; CHECK-NEXT: fcvtzu v6.2d, v18.2d +; CHECK-NEXT: ret +entry: + %c = fptoui <16 x float> %a to <16 x i64> + ret <16 x i64> %c +} + +define <32 x i64> @fptos_v32f32_v32i64(<32 x float> %a) { +; CHECK-LABEL: fptos_v32f32_v32i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl2 v16.2d, v7.4s +; CHECK-NEXT: fcvtl v7.2d, v7.2s +; CHECK-NEXT: fcvtl2 v17.2d, v6.4s +; CHECK-NEXT: fcvtl v6.2d, v6.2s +; CHECK-NEXT: fcvtl2 v18.2d, v5.4s +; CHECK-NEXT: fcvtl v5.2d, v5.2s +; CHECK-NEXT: fcvtl2 v19.2d, v4.4s +; CHECK-NEXT: fcvtl v4.2d, v4.2s +; CHECK-NEXT: fcvtl2 v20.2d, v3.4s +; CHECK-NEXT: fcvtl v3.2d, v3.2s +; CHECK-NEXT: fcvtzs v16.2d, v16.2d +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: fcvtzs v17.2d, v17.2d +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: fcvtzs v18.2d, v18.2d +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: stp q7, q16, [x8, #224] +; CHECK-NEXT: fcvtl2 v7.2d, v2.4s +; CHECK-NEXT: fcvtzs v16.2d, v19.2d +; CHECK-NEXT: stp q5, q18, [x8, #160] +; CHECK-NEXT: fcvtl v2.2d, v2.2s +; CHECK-NEXT: fcvtl2 v5.2d, v0.4s +; CHECK-NEXT: stp q6, q17, [x8, #192] +; CHECK-NEXT: fcvtl2 v6.2d, v1.4s +; CHECK-NEXT: fcvtzs v17.2d, v20.2d +; CHECK-NEXT: fcvtl v1.2d, v1.2s +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: stp q4, q16, [x8, #128] +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v4.2d, v6.2d +; CHECK-NEXT: stp q3, q17, [x8, #96] +; CHECK-NEXT: fcvtzs v3.2d, v5.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: stp q2, q7, [x8, #64] +; CHECK-NEXT: stp q0, q3, [x8] +; CHECK-NEXT: stp q1, q4, [x8, #32] +; CHECK-NEXT: ret +entry: + %c = fptosi <32 x float> %a to <32 x i64> + ret <32 x i64> %c +} + +define <32 x i64> @fptou_v32f32_v32i64(<32 x float> %a) { +; CHECK-LABEL: fptou_v32f32_v32i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl2 v16.2d, v7.4s +; CHECK-NEXT: fcvtl v7.2d, v7.2s +; CHECK-NEXT: fcvtl2 v17.2d, v6.4s +; CHECK-NEXT: fcvtl v6.2d, v6.2s +; CHECK-NEXT: fcvtl2 v18.2d, v5.4s +; CHECK-NEXT: fcvtl v5.2d, v5.2s +; CHECK-NEXT: fcvtl2 v19.2d, v4.4s +; CHECK-NEXT: fcvtl v4.2d, v4.2s +; CHECK-NEXT: fcvtl2 v20.2d, v3.4s +; CHECK-NEXT: fcvtl v3.2d, v3.2s +; CHECK-NEXT: fcvtzu v16.2d, v16.2d +; CHECK-NEXT: fcvtzu v7.2d, v7.2d +; CHECK-NEXT: fcvtzu v17.2d, v17.2d +; CHECK-NEXT: fcvtzu v6.2d, v6.2d +; CHECK-NEXT: fcvtzu v18.2d, v18.2d +; CHECK-NEXT: fcvtzu v5.2d, v5.2d +; CHECK-NEXT: fcvtzu v4.2d, v4.2d +; CHECK-NEXT: fcvtzu v3.2d, v3.2d +; CHECK-NEXT: stp q7, q16, [x8, #224] +; CHECK-NEXT: fcvtl2 v7.2d, v2.4s +; CHECK-NEXT: fcvtzu v16.2d, v19.2d +; CHECK-NEXT: stp q5, q18, [x8, #160] +; CHECK-NEXT: fcvtl v2.2d, v2.2s +; CHECK-NEXT: fcvtl2 v5.2d, v0.4s +; CHECK-NEXT: stp q6, q17, [x8, #192] +; CHECK-NEXT: fcvtl2 v6.2d, v1.4s +; CHECK-NEXT: fcvtzu v17.2d, v20.2d +; CHECK-NEXT: fcvtl v1.2d, v1.2s +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: stp q4, q16, [x8, #128] +; CHECK-NEXT: fcvtzu v7.2d, v7.2d +; CHECK-NEXT: fcvtzu v2.2d, v2.2d +; CHECK-NEXT: fcvtzu v4.2d, v6.2d +; CHECK-NEXT: stp q3, q17, [x8, #96] +; CHECK-NEXT: fcvtzu v3.2d, v5.2d +; CHECK-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: stp q2, q7, [x8, #64] +; CHECK-NEXT: stp q0, q3, [x8] +; CHECK-NEXT: stp q1, q4, [x8, #32] +; CHECK-NEXT: ret +entry: + %c = fptoui <32 x float> %a to <32 x i64> + ret <32 x i64> %c +} + +define <2 x i32> @fptos_v2f32_v2i32(<2 x float> %a) { +; CHECK-LABEL: fptos_v2f32_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: ret +entry: + %c = fptosi <2 x float> %a to <2 x i32> + ret <2 x i32> %c +} + +define <2 x i32> @fptou_v2f32_v2i32(<2 x float> %a) { +; CHECK-LABEL: fptou_v2f32_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: ret +entry: + %c = fptoui <2 x float> %a to <2 x i32> + ret <2 x i32> %c +} + +define <3 x i32> @fptos_v3f32_v3i32(<3 x float> %a) { +; CHECK-LABEL: fptos_v3f32_v3i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret +entry: + %c = fptosi <3 x float> %a to <3 x i32> + ret <3 x i32> %c +} + +define <3 x i32> @fptou_v3f32_v3i32(<3 x float> %a) { +; CHECK-LABEL: fptou_v3f32_v3i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret +entry: + %c = fptoui <3 x float> %a to <3 x i32> + ret <3 x i32> %c +} + +define <4 x i32> @fptos_v4f32_v4i32(<4 x float> %a) { +; CHECK-LABEL: fptos_v4f32_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret +entry: + %c = fptosi <4 x float> %a to <4 x i32> + ret <4 x i32> %c +} + +define <4 x i32> @fptou_v4f32_v4i32(<4 x float> %a) { +; CHECK-LABEL: fptou_v4f32_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret +entry: + %c = fptoui <4 x float> %a to <4 x i32> + ret <4 x i32> %c +} + +define <8 x i32> @fptos_v8f32_v8i32(<8 x float> %a) { +; CHECK-LABEL: fptos_v8f32_v8i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: ret +entry: + %c = fptosi <8 x float> %a to <8 x i32> + ret <8 x i32> %c +} + +define <8 x i32> @fptou_v8f32_v8i32(<8 x float> %a) { +; CHECK-LABEL: fptou_v8f32_v8i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NEXT: ret +entry: + %c = fptoui <8 x float> %a to <8 x i32> + ret <8 x i32> %c +} + +define <16 x i32> @fptos_v16f32_v16i32(<16 x float> %a) { +; CHECK-LABEL: fptos_v16f32_v16i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-NEXT: ret +entry: + %c = fptosi <16 x float> %a to <16 x i32> + ret <16 x i32> %c +} + +define <16 x i32> @fptou_v16f32_v16i32(<16 x float> %a) { +; CHECK-LABEL: fptou_v16f32_v16i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-NEXT: fcvtzu v3.4s, v3.4s +; CHECK-NEXT: ret +entry: + %c = fptoui <16 x float> %a to <16 x i32> + ret <16 x i32> %c +} + +define <32 x i32> @fptos_v32f32_v32i32(<32 x float> %a) { +; CHECK-LABEL: fptos_v32f32_v32i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-NEXT: fcvtzs v4.4s, v4.4s +; CHECK-NEXT: fcvtzs v5.4s, v5.4s +; CHECK-NEXT: fcvtzs v6.4s, v6.4s +; CHECK-NEXT: fcvtzs v7.4s, v7.4s +; CHECK-NEXT: ret +entry: + %c = fptosi <32 x float> %a to <32 x i32> + ret <32 x i32> %c +} + +define <32 x i32> @fptou_v32f32_v32i32(<32 x float> %a) { +; CHECK-LABEL: fptou_v32f32_v32i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-NEXT: fcvtzu v3.4s, v3.4s +; CHECK-NEXT: fcvtzu v4.4s, v4.4s +; CHECK-NEXT: fcvtzu v5.4s, v5.4s +; CHECK-NEXT: fcvtzu v6.4s, v6.4s +; CHECK-NEXT: fcvtzu v7.4s, v7.4s +; CHECK-NEXT: ret +entry: + %c = fptoui <32 x float> %a to <32 x i32> + ret <32 x i32> %c +} + +define <2 x i16> @fptos_v2f32_v2i16(<2 x float> %a) { +; CHECK-LABEL: fptos_v2f32_v2i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: ret +entry: + %c = fptosi <2 x float> %a to <2 x i16> + ret <2 x i16> %c +} + +define <2 x i16> @fptou_v2f32_v2i16(<2 x float> %a) { +; CHECK-LABEL: fptou_v2f32_v2i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: ret +entry: + %c = fptoui <2 x float> %a to <2 x i16> + ret <2 x i16> %c +} + +define <3 x i16> @fptos_v3f32_v3i16(<3 x float> %a) { +; CHECK-LABEL: fptos_v3f32_v3i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: ret +entry: + %c = fptosi <3 x float> %a to <3 x i16> + ret <3 x i16> %c +} + +define <3 x i16> @fptou_v3f32_v3i16(<3 x float> %a) { +; CHECK-LABEL: fptou_v3f32_v3i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: ret +entry: + %c = fptoui <3 x float> %a to <3 x i16> + ret <3 x i16> %c +} + +define <4 x i16> @fptos_v4f32_v4i16(<4 x float> %a) { +; CHECK-LABEL: fptos_v4f32_v4i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: ret +entry: + %c = fptosi <4 x float> %a to <4 x i16> + ret <4 x i16> %c +} + +define <4 x i16> @fptou_v4f32_v4i16(<4 x float> %a) { +; CHECK-LABEL: fptou_v4f32_v4i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: ret +entry: + %c = fptoui <4 x float> %a to <4 x i16> + ret <4 x i16> %c +} + +define <8 x i16> @fptos_v8f32_v8i16(<8 x float> %a) { +; CHECK-LABEL: fptos_v8f32_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +entry: + %c = fptosi <8 x float> %a to <8 x i16> + ret <8 x i16> %c +} + +define <8 x i16> @fptou_v8f32_v8i16(<8 x float> %a) { +; CHECK-LABEL: fptou_v8f32_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +entry: + %c = fptoui <8 x float> %a to <8 x i16> + ret <8 x i16> %c +} + +define <16 x i16> @fptos_v16f32_v16i16(<16 x float> %a) { +; CHECK-LABEL: fptos_v16f32_v16i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: uzp1 v1.8h, v2.8h, v3.8h +; CHECK-NEXT: ret +entry: + %c = fptosi <16 x float> %a to <16 x i16> + ret <16 x i16> %c +} + +define <16 x i16> @fptou_v16f32_v16i16(<16 x float> %a) { +; CHECK-LABEL: fptou_v16f32_v16i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fcvtzu v3.4s, v3.4s +; CHECK-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: uzp1 v1.8h, v2.8h, v3.8h +; CHECK-NEXT: ret +entry: + %c = fptoui <16 x float> %a to <16 x i16> + ret <16 x i16> %c +} + +define <32 x i16> @fptos_v32f32_v32i16(<32 x float> %a) { +; CHECK-LABEL: fptos_v32f32_v32i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: fcvtzs v5.4s, v5.4s +; CHECK-NEXT: fcvtzs v4.4s, v4.4s +; CHECK-NEXT: fcvtzs v7.4s, v7.4s +; CHECK-NEXT: fcvtzs v6.4s, v6.4s +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: uzp1 v1.8h, v2.8h, v3.8h +; CHECK-NEXT: uzp1 v2.8h, v4.8h, v5.8h +; CHECK-NEXT: uzp1 v3.8h, v6.8h, v7.8h +; CHECK-NEXT: ret +entry: + %c = fptosi <32 x float> %a to <32 x i16> + ret <32 x i16> %c +} + +define <32 x i16> @fptou_v32f32_v32i16(<32 x float> %a) { +; CHECK-LABEL: fptou_v32f32_v32i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu v3.4s, v3.4s +; CHECK-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-NEXT: fcvtzu v5.4s, v5.4s +; CHECK-NEXT: fcvtzu v4.4s, v4.4s +; CHECK-NEXT: fcvtzu v7.4s, v7.4s +; CHECK-NEXT: fcvtzu v6.4s, v6.4s +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: uzp1 v1.8h, v2.8h, v3.8h +; CHECK-NEXT: uzp1 v2.8h, v4.8h, v5.8h +; CHECK-NEXT: uzp1 v3.8h, v6.8h, v7.8h +; CHECK-NEXT: ret +entry: + %c = fptoui <32 x float> %a to <32 x i16> + ret <32 x i16> %c +} + +define <2 x i8> @fptos_v2f32_v2i8(<2 x float> %a) { +; CHECK-LABEL: fptos_v2f32_v2i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: ret +entry: + %c = fptosi <2 x float> %a to <2 x i8> + ret <2 x i8> %c +} + +define <2 x i8> @fptou_v2f32_v2i8(<2 x float> %a) { +; CHECK-LABEL: fptou_v2f32_v2i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: ret +entry: + %c = fptoui <2 x float> %a to <2 x i8> + ret <2 x i8> %c +} + +define <3 x i8> @fptos_v3f32_v3i8(<3 x float> %a) { +; CHECK-LABEL: fptos_v3f32_v3i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: umov w0, v0.h[0] +; CHECK-NEXT: umov w1, v0.h[1] +; CHECK-NEXT: umov w2, v0.h[2] +; CHECK-NEXT: ret +entry: + %c = fptosi <3 x float> %a to <3 x i8> + ret <3 x i8> %c +} + +define <3 x i8> @fptou_v3f32_v3i8(<3 x float> %a) { +; CHECK-LABEL: fptou_v3f32_v3i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: umov w0, v0.h[0] +; CHECK-NEXT: umov w1, v0.h[1] +; CHECK-NEXT: umov w2, v0.h[2] +; CHECK-NEXT: ret +entry: + %c = fptoui <3 x float> %a to <3 x i8> + ret <3 x i8> %c +} + +define <4 x i8> @fptos_v4f32_v4i8(<4 x float> %a) { +; CHECK-LABEL: fptos_v4f32_v4i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: ret +entry: + %c = fptosi <4 x float> %a to <4 x i8> + ret <4 x i8> %c +} + +define <4 x i8> @fptou_v4f32_v4i8(<4 x float> %a) { +; CHECK-LABEL: fptou_v4f32_v4i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: ret +entry: + %c = fptoui <4 x float> %a to <4 x i8> + ret <4 x i8> %c +} + +define <8 x i8> @fptos_v8f32_v8i8(<8 x float> %a) { +; CHECK-LABEL: fptos_v8f32_v8i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: xtn v1.4h, v1.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: uzp1 v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ret +entry: + %c = fptosi <8 x float> %a to <8 x i8> + ret <8 x i8> %c +} + +define <8 x i8> @fptou_v8f32_v8i8(<8 x float> %a) { +; CHECK-LABEL: fptou_v8f32_v8i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: xtn v1.4h, v1.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: uzp1 v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ret +entry: + %c = fptoui <8 x float> %a to <8 x i8> + ret <8 x i8> %c +} + +define <16 x i8> @fptos_v16f32_v16i8(<16 x float> %a) { +; CHECK-LABEL: fptos_v16f32_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: xtn v3.4h, v3.4s +; CHECK-NEXT: xtn v2.4h, v2.4s +; CHECK-NEXT: xtn v1.4h, v1.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: mov v2.d[1], v3.d[0] +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b +; CHECK-NEXT: ret +entry: + %c = fptosi <16 x float> %a to <16 x i8> + ret <16 x i8> %c +} + +define <16 x i8> @fptou_v16f32_v16i8(<16 x float> %a) { +; CHECK-LABEL: fptou_v16f32_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: xtn v3.4h, v3.4s +; CHECK-NEXT: xtn v2.4h, v2.4s +; CHECK-NEXT: xtn v1.4h, v1.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: mov v2.d[1], v3.d[0] +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b +; CHECK-NEXT: ret +entry: + %c = fptoui <16 x float> %a to <16 x i8> + ret <16 x i8> %c +} + +define <32 x i8> @fptos_v32f32_v32i8(<32 x float> %a) { +; CHECK-LABEL: fptos_v32f32_v32i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v7.4s, v7.4s +; CHECK-NEXT: fcvtzs v6.4s, v6.4s +; CHECK-NEXT: fcvtzs v5.4s, v5.4s +; CHECK-NEXT: fcvtzs v4.4s, v4.4s +; CHECK-NEXT: xtn v3.4h, v3.4s +; CHECK-NEXT: xtn v2.4h, v2.4s +; CHECK-NEXT: xtn v1.4h, v1.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: xtn v7.4h, v7.4s +; CHECK-NEXT: xtn v6.4h, v6.4s +; CHECK-NEXT: xtn v5.4h, v5.4s +; CHECK-NEXT: xtn v4.4h, v4.4s +; CHECK-NEXT: mov v2.d[1], v3.d[0] +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: mov v6.d[1], v7.d[0] +; CHECK-NEXT: mov v4.d[1], v5.d[0] +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b +; CHECK-NEXT: uzp1 v1.16b, v4.16b, v6.16b +; CHECK-NEXT: ret +entry: + %c = fptosi <32 x float> %a to <32 x i8> + ret <32 x i8> %c +} + +define <32 x i8> @fptou_v32f32_v32i8(<32 x float> %a) { +; CHECK-LABEL: fptou_v32f32_v32i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v7.4s, v7.4s +; CHECK-NEXT: fcvtzs v6.4s, v6.4s +; CHECK-NEXT: fcvtzs v5.4s, v5.4s +; CHECK-NEXT: fcvtzs v4.4s, v4.4s +; CHECK-NEXT: xtn v3.4h, v3.4s +; CHECK-NEXT: xtn v2.4h, v2.4s +; CHECK-NEXT: xtn v1.4h, v1.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: xtn v7.4h, v7.4s +; CHECK-NEXT: xtn v6.4h, v6.4s +; CHECK-NEXT: xtn v5.4h, v5.4s +; CHECK-NEXT: xtn v4.4h, v4.4s +; CHECK-NEXT: mov v2.d[1], v3.d[0] +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: mov v6.d[1], v7.d[0] +; CHECK-NEXT: mov v4.d[1], v5.d[0] +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b +; CHECK-NEXT: uzp1 v1.16b, v4.16b, v6.16b +; CHECK-NEXT: ret +entry: + %c = fptoui <32 x float> %a to <32 x i8> + ret <32 x i8> %c +} + +define <2 x i64> @fptos_v2f16_v2i64(<2 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptos_v2f16_v2i64: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvtzs x8, s0 +; CHECK-SD-NOFP16-NEXT: fcvtzs x9, s1 +; CHECK-SD-NOFP16-NEXT: fmov d0, x8 +; CHECK-SD-NOFP16-NEXT: mov v0.d[1], x9 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_v2f16_v2i64: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-FP16-NEXT: mov h1, v0.h[1] +; CHECK-SD-FP16-NEXT: fcvtzs x8, h0 +; CHECK-SD-FP16-NEXT: fcvtzs x9, h1 +; CHECK-SD-FP16-NEXT: fmov d0, x8 +; CHECK-SD-FP16-NEXT: mov v0.d[1], x9 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i64: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fcvtzs x8, s0 +; CHECK-GI-NOFP16-NEXT: fcvtzs x9, s1 +; CHECK-GI-NOFP16-NEXT: fmov d0, x8 +; CHECK-GI-NOFP16-NEXT: mov v0.d[1], x9 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i64: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: fcvtzs x8, h0 +; CHECK-GI-FP16-NEXT: fcvtzs x9, h1 +; CHECK-GI-FP16-NEXT: fmov d0, x8 +; CHECK-GI-FP16-NEXT: mov v0.d[1], x9 +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptosi <2 x half> %a to <2 x i64> + ret <2 x i64> %c +} + +define <2 x i64> @fptou_v2f16_v2i64(<2 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptou_v2f16_v2i64: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvtzu x8, s0 +; CHECK-SD-NOFP16-NEXT: fcvtzu x9, s1 +; CHECK-SD-NOFP16-NEXT: fmov d0, x8 +; CHECK-SD-NOFP16-NEXT: mov v0.d[1], x9 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_v2f16_v2i64: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-FP16-NEXT: mov h1, v0.h[1] +; CHECK-SD-FP16-NEXT: fcvtzu x8, h0 +; CHECK-SD-FP16-NEXT: fcvtzu x9, h1 +; CHECK-SD-FP16-NEXT: fmov d0, x8 +; CHECK-SD-FP16-NEXT: mov v0.d[1], x9 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i64: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fcvtzu x8, s0 +; CHECK-GI-NOFP16-NEXT: fcvtzu x9, s1 +; CHECK-GI-NOFP16-NEXT: fmov d0, x8 +; CHECK-GI-NOFP16-NEXT: mov v0.d[1], x9 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i64: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: fcvtzu x8, h0 +; CHECK-GI-FP16-NEXT: fcvtzu x9, h1 +; CHECK-GI-FP16-NEXT: fmov d0, x8 +; CHECK-GI-FP16-NEXT: mov v0.d[1], x9 +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptoui <2 x half> %a to <2 x i64> + ret <2 x i64> %c +} + +define <3 x i64> @fptos_v3f16_v3i64(<3 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptos_v3f16_v3i64: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1] +; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 +; CHECK-SD-NOFP16-NEXT: fcvtzs x8, s0 +; CHECK-SD-NOFP16-NEXT: fcvtzs x9, s1 +; CHECK-SD-NOFP16-NEXT: fcvtzs x10, s2 +; CHECK-SD-NOFP16-NEXT: fmov d0, x8 +; CHECK-SD-NOFP16-NEXT: fmov d1, x9 +; CHECK-SD-NOFP16-NEXT: fmov d2, x10 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_v3f16_v3i64: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-FP16-NEXT: mov h1, v0.h[1] +; CHECK-SD-FP16-NEXT: mov h2, v0.h[2] +; CHECK-SD-FP16-NEXT: fcvtzs x8, h0 +; CHECK-SD-FP16-NEXT: fcvtzs x9, h1 +; CHECK-SD-FP16-NEXT: fcvtzs x10, h2 +; CHECK-SD-FP16-NEXT: fmov d0, x8 +; CHECK-SD-FP16-NEXT: fmov d1, x9 +; CHECK-SD-FP16-NEXT: fmov d2, x10 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptos_v3f16_v3i64: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 +; CHECK-GI-NOFP16-NEXT: fcvtzs x8, s0 +; CHECK-GI-NOFP16-NEXT: fcvtzs x9, s1 +; CHECK-GI-NOFP16-NEXT: fcvtzs x10, s2 +; CHECK-GI-NOFP16-NEXT: fmov d0, x8 +; CHECK-GI-NOFP16-NEXT: fmov d1, x9 +; CHECK-GI-NOFP16-NEXT: fmov d2, x10 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptos_v3f16_v3i64: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: fcvtzs x8, h0 +; CHECK-GI-FP16-NEXT: fcvtzs x9, h1 +; CHECK-GI-FP16-NEXT: fcvtzs x10, h2 +; CHECK-GI-FP16-NEXT: fmov d0, x8 +; CHECK-GI-FP16-NEXT: fmov d1, x9 +; CHECK-GI-FP16-NEXT: fmov d2, x10 +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptosi <3 x half> %a to <3 x i64> + ret <3 x i64> %c +} + +define <3 x i64> @fptou_v3f16_v3i64(<3 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptou_v3f16_v3i64: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1] +; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 +; CHECK-SD-NOFP16-NEXT: fcvtzu x8, s0 +; CHECK-SD-NOFP16-NEXT: fcvtzu x9, s1 +; CHECK-SD-NOFP16-NEXT: fcvtzu x10, s2 +; CHECK-SD-NOFP16-NEXT: fmov d0, x8 +; CHECK-SD-NOFP16-NEXT: fmov d1, x9 +; CHECK-SD-NOFP16-NEXT: fmov d2, x10 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_v3f16_v3i64: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-FP16-NEXT: mov h1, v0.h[1] +; CHECK-SD-FP16-NEXT: mov h2, v0.h[2] +; CHECK-SD-FP16-NEXT: fcvtzu x8, h0 +; CHECK-SD-FP16-NEXT: fcvtzu x9, h1 +; CHECK-SD-FP16-NEXT: fcvtzu x10, h2 +; CHECK-SD-FP16-NEXT: fmov d0, x8 +; CHECK-SD-FP16-NEXT: fmov d1, x9 +; CHECK-SD-FP16-NEXT: fmov d2, x10 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptou_v3f16_v3i64: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 +; CHECK-GI-NOFP16-NEXT: fcvtzu x8, s0 +; CHECK-GI-NOFP16-NEXT: fcvtzu x9, s1 +; CHECK-GI-NOFP16-NEXT: fcvtzu x10, s2 +; CHECK-GI-NOFP16-NEXT: fmov d0, x8 +; CHECK-GI-NOFP16-NEXT: fmov d1, x9 +; CHECK-GI-NOFP16-NEXT: fmov d2, x10 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptou_v3f16_v3i64: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: fcvtzu x8, h0 +; CHECK-GI-FP16-NEXT: fcvtzu x9, h1 +; CHECK-GI-FP16-NEXT: fcvtzu x10, h2 +; CHECK-GI-FP16-NEXT: fmov d0, x8 +; CHECK-GI-FP16-NEXT: fmov d1, x9 +; CHECK-GI-FP16-NEXT: fmov d2, x10 +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptoui <3 x half> %a to <3 x i64> + ret <3 x i64> %c +} + +define <4 x i64> @fptos_v4f16_v4i64(<4 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptos_v4f16_v4i64: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[2] +; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[1] +; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: fcvtzs x8, s0 +; CHECK-SD-NOFP16-NEXT: fcvtzs x9, s1 +; CHECK-SD-NOFP16-NEXT: fcvtzs x10, s2 +; CHECK-SD-NOFP16-NEXT: fcvtzs x11, s3 +; CHECK-SD-NOFP16-NEXT: fmov d0, x8 +; CHECK-SD-NOFP16-NEXT: fmov d1, x9 +; CHECK-SD-NOFP16-NEXT: mov v0.d[1], x10 +; CHECK-SD-NOFP16-NEXT: mov v1.d[1], x11 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_v4f16_v4i64: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-FP16-NEXT: mov h1, v0.h[2] +; CHECK-SD-FP16-NEXT: mov h2, v0.h[1] +; CHECK-SD-FP16-NEXT: mov h3, v0.h[3] +; CHECK-SD-FP16-NEXT: fcvtzs x8, h0 +; CHECK-SD-FP16-NEXT: fcvtzs x9, h1 +; CHECK-SD-FP16-NEXT: fcvtzs x10, h2 +; CHECK-SD-FP16-NEXT: fcvtzs x11, h3 +; CHECK-SD-FP16-NEXT: fmov d0, x8 +; CHECK-SD-FP16-NEXT: fmov d1, x9 +; CHECK-SD-FP16-NEXT: mov v0.d[1], x10 +; CHECK-SD-FP16-NEXT: mov v1.d[1], x11 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptos_v4f16_v4i64: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 +; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 +; CHECK-GI-NOFP16-NEXT: fcvtzs x8, s0 +; CHECK-GI-NOFP16-NEXT: fcvtzs x9, s1 +; CHECK-GI-NOFP16-NEXT: fcvtzs x10, s2 +; CHECK-GI-NOFP16-NEXT: fcvtzs x11, s3 +; CHECK-GI-NOFP16-NEXT: fmov d0, x8 +; CHECK-GI-NOFP16-NEXT: fmov d1, x9 +; CHECK-GI-NOFP16-NEXT: mov v0.d[1], x10 +; CHECK-GI-NOFP16-NEXT: mov v1.d[1], x11 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptos_v4f16_v4i64: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: fcvtzs x8, h0 +; CHECK-GI-FP16-NEXT: fcvtzs x9, h1 +; CHECK-GI-FP16-NEXT: fcvtzs x10, h2 +; CHECK-GI-FP16-NEXT: fcvtzs x11, h3 +; CHECK-GI-FP16-NEXT: fmov d0, x8 +; CHECK-GI-FP16-NEXT: fmov d1, x9 +; CHECK-GI-FP16-NEXT: mov v0.d[1], x10 +; CHECK-GI-FP16-NEXT: mov v1.d[1], x11 +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptosi <4 x half> %a to <4 x i64> + ret <4 x i64> %c +} + +define <4 x i64> @fptou_v4f16_v4i64(<4 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptou_v4f16_v4i64: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[2] +; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[1] +; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: fcvtzu x8, s0 +; CHECK-SD-NOFP16-NEXT: fcvtzu x9, s1 +; CHECK-SD-NOFP16-NEXT: fcvtzu x10, s2 +; CHECK-SD-NOFP16-NEXT: fcvtzu x11, s3 +; CHECK-SD-NOFP16-NEXT: fmov d0, x8 +; CHECK-SD-NOFP16-NEXT: fmov d1, x9 +; CHECK-SD-NOFP16-NEXT: mov v0.d[1], x10 +; CHECK-SD-NOFP16-NEXT: mov v1.d[1], x11 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_v4f16_v4i64: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-FP16-NEXT: mov h1, v0.h[2] +; CHECK-SD-FP16-NEXT: mov h2, v0.h[1] +; CHECK-SD-FP16-NEXT: mov h3, v0.h[3] +; CHECK-SD-FP16-NEXT: fcvtzu x8, h0 +; CHECK-SD-FP16-NEXT: fcvtzu x9, h1 +; CHECK-SD-FP16-NEXT: fcvtzu x10, h2 +; CHECK-SD-FP16-NEXT: fcvtzu x11, h3 +; CHECK-SD-FP16-NEXT: fmov d0, x8 +; CHECK-SD-FP16-NEXT: fmov d1, x9 +; CHECK-SD-FP16-NEXT: mov v0.d[1], x10 +; CHECK-SD-FP16-NEXT: mov v1.d[1], x11 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptou_v4f16_v4i64: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 +; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 +; CHECK-GI-NOFP16-NEXT: fcvtzu x8, s0 +; CHECK-GI-NOFP16-NEXT: fcvtzu x9, s1 +; CHECK-GI-NOFP16-NEXT: fcvtzu x10, s2 +; CHECK-GI-NOFP16-NEXT: fcvtzu x11, s3 +; CHECK-GI-NOFP16-NEXT: fmov d0, x8 +; CHECK-GI-NOFP16-NEXT: fmov d1, x9 +; CHECK-GI-NOFP16-NEXT: mov v0.d[1], x10 +; CHECK-GI-NOFP16-NEXT: mov v1.d[1], x11 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptou_v4f16_v4i64: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: fcvtzu x8, h0 +; CHECK-GI-FP16-NEXT: fcvtzu x9, h1 +; CHECK-GI-FP16-NEXT: fcvtzu x10, h2 +; CHECK-GI-FP16-NEXT: fcvtzu x11, h3 +; CHECK-GI-FP16-NEXT: fmov d0, x8 +; CHECK-GI-FP16-NEXT: fmov d1, x9 +; CHECK-GI-FP16-NEXT: mov v0.d[1], x10 +; CHECK-GI-FP16-NEXT: mov v1.d[1], x11 +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptoui <4 x half> %a to <4 x i64> + ret <4 x i64> %c +} + +define <8 x i64> @fptos_v8f16_v8i64(<8 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptos_v8f16_v8i64: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[2] +; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[1] +; CHECK-SD-NOFP16-NEXT: mov h7, v0.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[2] +; CHECK-SD-NOFP16-NEXT: mov h5, v1.h[1] +; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h7 +; CHECK-SD-NOFP16-NEXT: fcvtzs x9, s0 +; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h6 +; CHECK-SD-NOFP16-NEXT: fcvtzs x8, s1 +; CHECK-SD-NOFP16-NEXT: fcvtzs x12, s4 +; CHECK-SD-NOFP16-NEXT: fcvtzs x11, s3 +; CHECK-SD-NOFP16-NEXT: fcvtzs x15, s7 +; CHECK-SD-NOFP16-NEXT: fmov d0, x9 +; CHECK-SD-NOFP16-NEXT: fcvtzs x10, s2 +; CHECK-SD-NOFP16-NEXT: fcvtzs x13, s5 +; CHECK-SD-NOFP16-NEXT: fcvtzs x14, s6 +; CHECK-SD-NOFP16-NEXT: fmov d2, x8 +; CHECK-SD-NOFP16-NEXT: fmov d1, x12 +; CHECK-SD-NOFP16-NEXT: mov v0.d[1], x11 +; CHECK-SD-NOFP16-NEXT: fmov d3, x10 +; CHECK-SD-NOFP16-NEXT: mov v2.d[1], x13 +; CHECK-SD-NOFP16-NEXT: mov v1.d[1], x15 +; CHECK-SD-NOFP16-NEXT: mov v3.d[1], x14 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_v8f16_v8i64: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-FP16-NEXT: mov h4, v0.h[2] +; CHECK-SD-FP16-NEXT: mov h3, v0.h[1] +; CHECK-SD-FP16-NEXT: mov h7, v0.h[3] +; CHECK-SD-FP16-NEXT: fcvtzs x9, h0 +; CHECK-SD-FP16-NEXT: mov h2, v1.h[2] +; CHECK-SD-FP16-NEXT: mov h5, v1.h[1] +; CHECK-SD-FP16-NEXT: mov h6, v1.h[3] +; CHECK-SD-FP16-NEXT: fcvtzs x8, h1 +; CHECK-SD-FP16-NEXT: fcvtzs x12, h4 +; CHECK-SD-FP16-NEXT: fcvtzs x11, h3 +; CHECK-SD-FP16-NEXT: fcvtzs x15, h7 +; CHECK-SD-FP16-NEXT: fmov d0, x9 +; CHECK-SD-FP16-NEXT: fcvtzs x10, h2 +; CHECK-SD-FP16-NEXT: fcvtzs x13, h5 +; CHECK-SD-FP16-NEXT: fcvtzs x14, h6 +; CHECK-SD-FP16-NEXT: fmov d2, x8 +; CHECK-SD-FP16-NEXT: fmov d1, x12 +; CHECK-SD-FP16-NEXT: mov v0.d[1], x11 +; CHECK-SD-FP16-NEXT: fmov d3, x10 +; CHECK-SD-FP16-NEXT: mov v2.d[1], x13 +; CHECK-SD-FP16-NEXT: mov v1.d[1], x15 +; CHECK-SD-FP16-NEXT: mov v3.d[1], x14 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptos_v8f16_v8i64: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[2] +; CHECK-GI-NOFP16-NEXT: mov h5, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov h6, v1.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 +; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 +; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 +; CHECK-GI-NOFP16-NEXT: fcvtzs x9, s0 +; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 +; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 +; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 +; CHECK-GI-NOFP16-NEXT: fcvtzs x8, s1 +; CHECK-GI-NOFP16-NEXT: fcvtzs x12, s4 +; CHECK-GI-NOFP16-NEXT: fcvtzs x11, s3 +; CHECK-GI-NOFP16-NEXT: fcvtzs x15, s7 +; CHECK-GI-NOFP16-NEXT: fmov d0, x9 +; CHECK-GI-NOFP16-NEXT: fcvtzs x10, s2 +; CHECK-GI-NOFP16-NEXT: fcvtzs x13, s5 +; CHECK-GI-NOFP16-NEXT: fcvtzs x14, s6 +; CHECK-GI-NOFP16-NEXT: fmov d2, x8 +; CHECK-GI-NOFP16-NEXT: fmov d1, x12 +; CHECK-GI-NOFP16-NEXT: mov v0.d[1], x11 +; CHECK-GI-NOFP16-NEXT: fmov d3, x10 +; CHECK-GI-NOFP16-NEXT: mov v2.d[1], x13 +; CHECK-GI-NOFP16-NEXT: mov v1.d[1], x15 +; CHECK-GI-NOFP16-NEXT: mov v3.d[1], x14 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptos_v8f16_v8i64: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-GI-FP16-NEXT: mov h4, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h7, v0.h[3] +; CHECK-GI-FP16-NEXT: fcvtzs x9, h0 +; CHECK-GI-FP16-NEXT: mov h2, v1.h[2] +; CHECK-GI-FP16-NEXT: mov h5, v1.h[1] +; CHECK-GI-FP16-NEXT: mov h6, v1.h[3] +; CHECK-GI-FP16-NEXT: fcvtzs x8, h1 +; CHECK-GI-FP16-NEXT: fcvtzs x12, h4 +; CHECK-GI-FP16-NEXT: fcvtzs x11, h3 +; CHECK-GI-FP16-NEXT: fcvtzs x15, h7 +; CHECK-GI-FP16-NEXT: fmov d0, x9 +; CHECK-GI-FP16-NEXT: fcvtzs x10, h2 +; CHECK-GI-FP16-NEXT: fcvtzs x13, h5 +; CHECK-GI-FP16-NEXT: fcvtzs x14, h6 +; CHECK-GI-FP16-NEXT: fmov d2, x8 +; CHECK-GI-FP16-NEXT: fmov d1, x12 +; CHECK-GI-FP16-NEXT: mov v0.d[1], x11 +; CHECK-GI-FP16-NEXT: fmov d3, x10 +; CHECK-GI-FP16-NEXT: mov v2.d[1], x13 +; CHECK-GI-FP16-NEXT: mov v1.d[1], x15 +; CHECK-GI-FP16-NEXT: mov v3.d[1], x14 +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptosi <8 x half> %a to <8 x i64> + ret <8 x i64> %c +} + +define <8 x i64> @fptou_v8f16_v8i64(<8 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptou_v8f16_v8i64: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[2] +; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[1] +; CHECK-SD-NOFP16-NEXT: mov h7, v0.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[2] +; CHECK-SD-NOFP16-NEXT: mov h5, v1.h[1] +; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h7 +; CHECK-SD-NOFP16-NEXT: fcvtzu x9, s0 +; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h6 +; CHECK-SD-NOFP16-NEXT: fcvtzu x8, s1 +; CHECK-SD-NOFP16-NEXT: fcvtzu x12, s4 +; CHECK-SD-NOFP16-NEXT: fcvtzu x11, s3 +; CHECK-SD-NOFP16-NEXT: fcvtzu x15, s7 +; CHECK-SD-NOFP16-NEXT: fmov d0, x9 +; CHECK-SD-NOFP16-NEXT: fcvtzu x10, s2 +; CHECK-SD-NOFP16-NEXT: fcvtzu x13, s5 +; CHECK-SD-NOFP16-NEXT: fcvtzu x14, s6 +; CHECK-SD-NOFP16-NEXT: fmov d2, x8 +; CHECK-SD-NOFP16-NEXT: fmov d1, x12 +; CHECK-SD-NOFP16-NEXT: mov v0.d[1], x11 +; CHECK-SD-NOFP16-NEXT: fmov d3, x10 +; CHECK-SD-NOFP16-NEXT: mov v2.d[1], x13 +; CHECK-SD-NOFP16-NEXT: mov v1.d[1], x15 +; CHECK-SD-NOFP16-NEXT: mov v3.d[1], x14 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_v8f16_v8i64: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-FP16-NEXT: mov h4, v0.h[2] +; CHECK-SD-FP16-NEXT: mov h3, v0.h[1] +; CHECK-SD-FP16-NEXT: mov h7, v0.h[3] +; CHECK-SD-FP16-NEXT: fcvtzu x9, h0 +; CHECK-SD-FP16-NEXT: mov h2, v1.h[2] +; CHECK-SD-FP16-NEXT: mov h5, v1.h[1] +; CHECK-SD-FP16-NEXT: mov h6, v1.h[3] +; CHECK-SD-FP16-NEXT: fcvtzu x8, h1 +; CHECK-SD-FP16-NEXT: fcvtzu x12, h4 +; CHECK-SD-FP16-NEXT: fcvtzu x11, h3 +; CHECK-SD-FP16-NEXT: fcvtzu x15, h7 +; CHECK-SD-FP16-NEXT: fmov d0, x9 +; CHECK-SD-FP16-NEXT: fcvtzu x10, h2 +; CHECK-SD-FP16-NEXT: fcvtzu x13, h5 +; CHECK-SD-FP16-NEXT: fcvtzu x14, h6 +; CHECK-SD-FP16-NEXT: fmov d2, x8 +; CHECK-SD-FP16-NEXT: fmov d1, x12 +; CHECK-SD-FP16-NEXT: mov v0.d[1], x11 +; CHECK-SD-FP16-NEXT: fmov d3, x10 +; CHECK-SD-FP16-NEXT: mov v2.d[1], x13 +; CHECK-SD-FP16-NEXT: mov v1.d[1], x15 +; CHECK-SD-FP16-NEXT: mov v3.d[1], x14 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptou_v8f16_v8i64: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[2] +; CHECK-GI-NOFP16-NEXT: mov h5, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov h6, v1.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 +; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 +; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 +; CHECK-GI-NOFP16-NEXT: fcvtzu x9, s0 +; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 +; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 +; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 +; CHECK-GI-NOFP16-NEXT: fcvtzu x8, s1 +; CHECK-GI-NOFP16-NEXT: fcvtzu x12, s4 +; CHECK-GI-NOFP16-NEXT: fcvtzu x11, s3 +; CHECK-GI-NOFP16-NEXT: fcvtzu x15, s7 +; CHECK-GI-NOFP16-NEXT: fmov d0, x9 +; CHECK-GI-NOFP16-NEXT: fcvtzu x10, s2 +; CHECK-GI-NOFP16-NEXT: fcvtzu x13, s5 +; CHECK-GI-NOFP16-NEXT: fcvtzu x14, s6 +; CHECK-GI-NOFP16-NEXT: fmov d2, x8 +; CHECK-GI-NOFP16-NEXT: fmov d1, x12 +; CHECK-GI-NOFP16-NEXT: mov v0.d[1], x11 +; CHECK-GI-NOFP16-NEXT: fmov d3, x10 +; CHECK-GI-NOFP16-NEXT: mov v2.d[1], x13 +; CHECK-GI-NOFP16-NEXT: mov v1.d[1], x15 +; CHECK-GI-NOFP16-NEXT: mov v3.d[1], x14 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptou_v8f16_v8i64: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-GI-FP16-NEXT: mov h4, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h7, v0.h[3] +; CHECK-GI-FP16-NEXT: fcvtzu x9, h0 +; CHECK-GI-FP16-NEXT: mov h2, v1.h[2] +; CHECK-GI-FP16-NEXT: mov h5, v1.h[1] +; CHECK-GI-FP16-NEXT: mov h6, v1.h[3] +; CHECK-GI-FP16-NEXT: fcvtzu x8, h1 +; CHECK-GI-FP16-NEXT: fcvtzu x12, h4 +; CHECK-GI-FP16-NEXT: fcvtzu x11, h3 +; CHECK-GI-FP16-NEXT: fcvtzu x15, h7 +; CHECK-GI-FP16-NEXT: fmov d0, x9 +; CHECK-GI-FP16-NEXT: fcvtzu x10, h2 +; CHECK-GI-FP16-NEXT: fcvtzu x13, h5 +; CHECK-GI-FP16-NEXT: fcvtzu x14, h6 +; CHECK-GI-FP16-NEXT: fmov d2, x8 +; CHECK-GI-FP16-NEXT: fmov d1, x12 +; CHECK-GI-FP16-NEXT: mov v0.d[1], x11 +; CHECK-GI-FP16-NEXT: fmov d3, x10 +; CHECK-GI-FP16-NEXT: mov v2.d[1], x13 +; CHECK-GI-FP16-NEXT: mov v1.d[1], x15 +; CHECK-GI-FP16-NEXT: mov v3.d[1], x14 +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptoui <8 x half> %a to <8 x i64> + ret <8 x i64> %c +} + +define <16 x i64> @fptos_v16f16_v16i64(<16 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptos_v16f16_v16i64: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NOFP16-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt s5, h0 +; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[2] +; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s6, h2 +; CHECK-SD-NOFP16-NEXT: mov h7, v2.h[1] +; CHECK-SD-NOFP16-NEXT: mov h16, v2.h[2] +; CHECK-SD-NOFP16-NEXT: mov h17, v3.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s19, h3 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fcvtzs x8, s5 +; CHECK-SD-NOFP16-NEXT: mov h5, v1.h[1] +; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s18, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvtzs x9, s6 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h7 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h16 +; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s17, h17 +; CHECK-SD-NOFP16-NEXT: fcvtzs x10, s19 +; CHECK-SD-NOFP16-NEXT: mov h19, v3.h[1] +; CHECK-SD-NOFP16-NEXT: fcvtzs x11, s4 +; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[3] +; CHECK-SD-NOFP16-NEXT: mov h3, v3.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: fcvtzs x13, s7 +; CHECK-SD-NOFP16-NEXT: fcvtzs x12, s6 +; CHECK-SD-NOFP16-NEXT: fcvtzs x15, s18 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h16 +; CHECK-SD-NOFP16-NEXT: fcvtzs x14, s17 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s17, h19 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fmov d2, x9 +; CHECK-SD-NOFP16-NEXT: fcvt s19, h3 +; CHECK-SD-NOFP16-NEXT: fcvtzs x9, s1 +; CHECK-SD-NOFP16-NEXT: fmov d6, x10 +; CHECK-SD-NOFP16-NEXT: fmov d3, x13 +; CHECK-SD-NOFP16-NEXT: fcvtzs x13, s0 +; CHECK-SD-NOFP16-NEXT: fcvtzs x16, s5 +; CHECK-SD-NOFP16-NEXT: fcvtzs x10, s7 +; CHECK-SD-NOFP16-NEXT: fmov d7, x14 +; CHECK-SD-NOFP16-NEXT: fcvtzs x14, s16 +; CHECK-SD-NOFP16-NEXT: fcvtzs x17, s17 +; CHECK-SD-NOFP16-NEXT: fcvtzs x0, s4 +; CHECK-SD-NOFP16-NEXT: fmov d0, x8 +; CHECK-SD-NOFP16-NEXT: fcvtzs x18, s19 +; CHECK-SD-NOFP16-NEXT: fmov d1, x15 +; CHECK-SD-NOFP16-NEXT: fmov d4, x9 +; CHECK-SD-NOFP16-NEXT: mov v2.d[1], x12 +; CHECK-SD-NOFP16-NEXT: fmov d5, x10 +; CHECK-SD-NOFP16-NEXT: mov v0.d[1], x11 +; CHECK-SD-NOFP16-NEXT: mov v3.d[1], x14 +; CHECK-SD-NOFP16-NEXT: mov v1.d[1], x13 +; CHECK-SD-NOFP16-NEXT: mov v4.d[1], x16 +; CHECK-SD-NOFP16-NEXT: mov v6.d[1], x17 +; CHECK-SD-NOFP16-NEXT: mov v7.d[1], x18 +; CHECK-SD-NOFP16-NEXT: mov v5.d[1], x0 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_v16f16_v16i64: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-SD-FP16-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-SD-FP16-NEXT: mov h4, v0.h[1] +; CHECK-SD-FP16-NEXT: mov h5, v0.h[2] +; CHECK-SD-FP16-NEXT: fcvtzs x8, h0 +; CHECK-SD-FP16-NEXT: mov h0, v0.h[3] +; CHECK-SD-FP16-NEXT: fcvtzs x9, h1 +; CHECK-SD-FP16-NEXT: mov h7, v1.h[1] +; CHECK-SD-FP16-NEXT: mov h6, v2.h[2] +; CHECK-SD-FP16-NEXT: mov h16, v3.h[2] +; CHECK-SD-FP16-NEXT: fcvtzs x10, h4 +; CHECK-SD-FP16-NEXT: mov h4, v1.h[2] +; CHECK-SD-FP16-NEXT: fcvtzs x11, h2 +; CHECK-SD-FP16-NEXT: fcvtzs x12, h5 +; CHECK-SD-FP16-NEXT: mov h5, v2.h[1] +; CHECK-SD-FP16-NEXT: mov h17, v2.h[3] +; CHECK-SD-FP16-NEXT: fcvtzs x13, h3 +; CHECK-SD-FP16-NEXT: mov h18, v3.h[1] +; CHECK-SD-FP16-NEXT: mov h1, v1.h[3] +; CHECK-SD-FP16-NEXT: mov h19, v3.h[3] +; CHECK-SD-FP16-NEXT: fcvtzs x14, h6 +; CHECK-SD-FP16-NEXT: fcvtzs x15, h16 +; CHECK-SD-FP16-NEXT: fcvtzs x16, h0 +; CHECK-SD-FP16-NEXT: fcvtzs x0, h4 +; CHECK-SD-FP16-NEXT: fcvtzs x17, h7 +; CHECK-SD-FP16-NEXT: fmov d2, x11 +; CHECK-SD-FP16-NEXT: fcvtzs x11, h5 +; CHECK-SD-FP16-NEXT: fcvtzs x18, h17 +; CHECK-SD-FP16-NEXT: fmov d6, x13 +; CHECK-SD-FP16-NEXT: fcvtzs x13, h18 +; CHECK-SD-FP16-NEXT: fmov d0, x8 +; CHECK-SD-FP16-NEXT: fmov d4, x9 +; CHECK-SD-FP16-NEXT: fmov d3, x14 +; CHECK-SD-FP16-NEXT: fmov d7, x15 +; CHECK-SD-FP16-NEXT: fcvtzs x14, h19 +; CHECK-SD-FP16-NEXT: fcvtzs x15, h1 +; CHECK-SD-FP16-NEXT: fmov d1, x12 +; CHECK-SD-FP16-NEXT: fmov d5, x0 +; CHECK-SD-FP16-NEXT: mov v0.d[1], x10 +; CHECK-SD-FP16-NEXT: mov v4.d[1], x17 +; CHECK-SD-FP16-NEXT: mov v2.d[1], x11 +; CHECK-SD-FP16-NEXT: mov v3.d[1], x18 +; CHECK-SD-FP16-NEXT: mov v6.d[1], x13 +; CHECK-SD-FP16-NEXT: mov v1.d[1], x16 +; CHECK-SD-FP16-NEXT: mov v7.d[1], x14 +; CHECK-SD-FP16-NEXT: mov v5.d[1], x15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptos_v16f16_v16i64: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-GI-NOFP16-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[1] +; CHECK-GI-NOFP16-NEXT: fcvt s5, h0 +; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s6, h2 +; CHECK-GI-NOFP16-NEXT: mov h7, v2.h[1] +; CHECK-GI-NOFP16-NEXT: mov h16, v2.h[2] +; CHECK-GI-NOFP16-NEXT: mov h17, v3.h[2] +; CHECK-GI-NOFP16-NEXT: fcvt s19, h3 +; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 +; CHECK-GI-NOFP16-NEXT: fcvtzs x8, s5 +; CHECK-GI-NOFP16-NEXT: mov h5, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov h2, v2.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s18, h18 +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvtzs x9, s6 +; CHECK-GI-NOFP16-NEXT: fcvt s6, h7 +; CHECK-GI-NOFP16-NEXT: fcvt s7, h16 +; CHECK-GI-NOFP16-NEXT: mov h16, v1.h[2] +; CHECK-GI-NOFP16-NEXT: fcvt s17, h17 +; CHECK-GI-NOFP16-NEXT: fcvtzs x10, s19 +; CHECK-GI-NOFP16-NEXT: mov h19, v3.h[1] +; CHECK-GI-NOFP16-NEXT: fcvtzs x11, s4 +; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[3] +; CHECK-GI-NOFP16-NEXT: mov h3, v3.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 +; CHECK-GI-NOFP16-NEXT: fcvtzs x13, s7 +; CHECK-GI-NOFP16-NEXT: fcvtzs x12, s6 +; CHECK-GI-NOFP16-NEXT: fcvtzs x15, s18 +; CHECK-GI-NOFP16-NEXT: fcvt s7, h16 +; CHECK-GI-NOFP16-NEXT: fcvtzs x14, s17 +; CHECK-GI-NOFP16-NEXT: fcvt s16, h2 +; CHECK-GI-NOFP16-NEXT: fcvt s17, h19 +; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 +; CHECK-GI-NOFP16-NEXT: fmov d2, x9 +; CHECK-GI-NOFP16-NEXT: fcvt s19, h3 +; CHECK-GI-NOFP16-NEXT: fcvtzs x9, s1 +; CHECK-GI-NOFP16-NEXT: fmov d6, x10 +; CHECK-GI-NOFP16-NEXT: fmov d3, x13 +; CHECK-GI-NOFP16-NEXT: fcvtzs x13, s0 +; CHECK-GI-NOFP16-NEXT: fcvtzs x16, s5 +; CHECK-GI-NOFP16-NEXT: fcvtzs x10, s7 +; CHECK-GI-NOFP16-NEXT: fmov d7, x14 +; CHECK-GI-NOFP16-NEXT: fcvtzs x14, s16 +; CHECK-GI-NOFP16-NEXT: fcvtzs x17, s17 +; CHECK-GI-NOFP16-NEXT: fcvtzs x0, s4 +; CHECK-GI-NOFP16-NEXT: fmov d0, x8 +; CHECK-GI-NOFP16-NEXT: fcvtzs x18, s19 +; CHECK-GI-NOFP16-NEXT: fmov d1, x15 +; CHECK-GI-NOFP16-NEXT: fmov d4, x9 +; CHECK-GI-NOFP16-NEXT: mov v2.d[1], x12 +; CHECK-GI-NOFP16-NEXT: fmov d5, x10 +; CHECK-GI-NOFP16-NEXT: mov v0.d[1], x11 +; CHECK-GI-NOFP16-NEXT: mov v3.d[1], x14 +; CHECK-GI-NOFP16-NEXT: mov v1.d[1], x13 +; CHECK-GI-NOFP16-NEXT: mov v4.d[1], x16 +; CHECK-GI-NOFP16-NEXT: mov v6.d[1], x17 +; CHECK-GI-NOFP16-NEXT: mov v7.d[1], x18 +; CHECK-GI-NOFP16-NEXT: mov v5.d[1], x0 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptos_v16f16_v16i64: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-GI-FP16-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-GI-FP16-NEXT: mov h4, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[2] +; CHECK-GI-FP16-NEXT: fcvtzs x8, h0 +; CHECK-GI-FP16-NEXT: mov h0, v0.h[3] +; CHECK-GI-FP16-NEXT: fcvtzs x9, h1 +; CHECK-GI-FP16-NEXT: mov h7, v1.h[1] +; CHECK-GI-FP16-NEXT: mov h6, v2.h[2] +; CHECK-GI-FP16-NEXT: mov h16, v3.h[2] +; CHECK-GI-FP16-NEXT: fcvtzs x10, h4 +; CHECK-GI-FP16-NEXT: mov h4, v1.h[2] +; CHECK-GI-FP16-NEXT: fcvtzs x11, h2 +; CHECK-GI-FP16-NEXT: fcvtzs x12, h5 +; CHECK-GI-FP16-NEXT: mov h5, v2.h[1] +; CHECK-GI-FP16-NEXT: mov h17, v2.h[3] +; CHECK-GI-FP16-NEXT: fcvtzs x13, h3 +; CHECK-GI-FP16-NEXT: mov h18, v3.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v1.h[3] +; CHECK-GI-FP16-NEXT: mov h19, v3.h[3] +; CHECK-GI-FP16-NEXT: fcvtzs x14, h6 +; CHECK-GI-FP16-NEXT: fcvtzs x15, h16 +; CHECK-GI-FP16-NEXT: fcvtzs x16, h0 +; CHECK-GI-FP16-NEXT: fcvtzs x0, h4 +; CHECK-GI-FP16-NEXT: fcvtzs x17, h7 +; CHECK-GI-FP16-NEXT: fmov d2, x11 +; CHECK-GI-FP16-NEXT: fcvtzs x11, h5 +; CHECK-GI-FP16-NEXT: fcvtzs x18, h17 +; CHECK-GI-FP16-NEXT: fmov d6, x13 +; CHECK-GI-FP16-NEXT: fcvtzs x13, h18 +; CHECK-GI-FP16-NEXT: fmov d0, x8 +; CHECK-GI-FP16-NEXT: fmov d4, x9 +; CHECK-GI-FP16-NEXT: fmov d3, x14 +; CHECK-GI-FP16-NEXT: fmov d7, x15 +; CHECK-GI-FP16-NEXT: fcvtzs x14, h19 +; CHECK-GI-FP16-NEXT: fcvtzs x15, h1 +; CHECK-GI-FP16-NEXT: fmov d1, x12 +; CHECK-GI-FP16-NEXT: fmov d5, x0 +; CHECK-GI-FP16-NEXT: mov v0.d[1], x10 +; CHECK-GI-FP16-NEXT: mov v4.d[1], x17 +; CHECK-GI-FP16-NEXT: mov v2.d[1], x11 +; CHECK-GI-FP16-NEXT: mov v3.d[1], x18 +; CHECK-GI-FP16-NEXT: mov v6.d[1], x13 +; CHECK-GI-FP16-NEXT: mov v1.d[1], x16 +; CHECK-GI-FP16-NEXT: mov v7.d[1], x14 +; CHECK-GI-FP16-NEXT: mov v5.d[1], x15 +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptosi <16 x half> %a to <16 x i64> + ret <16 x i64> %c +} + +define <16 x i64> @fptou_v16f16_v16i64(<16 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptou_v16f16_v16i64: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NOFP16-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt s5, h0 +; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[2] +; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s6, h2 +; CHECK-SD-NOFP16-NEXT: mov h7, v2.h[1] +; CHECK-SD-NOFP16-NEXT: mov h16, v2.h[2] +; CHECK-SD-NOFP16-NEXT: mov h17, v3.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s19, h3 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fcvtzu x8, s5 +; CHECK-SD-NOFP16-NEXT: mov h5, v1.h[1] +; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s18, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvtzu x9, s6 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h7 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h16 +; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s17, h17 +; CHECK-SD-NOFP16-NEXT: fcvtzu x10, s19 +; CHECK-SD-NOFP16-NEXT: mov h19, v3.h[1] +; CHECK-SD-NOFP16-NEXT: fcvtzu x11, s4 +; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[3] +; CHECK-SD-NOFP16-NEXT: mov h3, v3.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: fcvtzu x13, s7 +; CHECK-SD-NOFP16-NEXT: fcvtzu x12, s6 +; CHECK-SD-NOFP16-NEXT: fcvtzu x15, s18 +; CHECK-SD-NOFP16-NEXT: fcvt s7, h16 +; CHECK-SD-NOFP16-NEXT: fcvtzu x14, s17 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h2 +; CHECK-SD-NOFP16-NEXT: fcvt s17, h19 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fmov d2, x9 +; CHECK-SD-NOFP16-NEXT: fcvt s19, h3 +; CHECK-SD-NOFP16-NEXT: fcvtzu x9, s1 +; CHECK-SD-NOFP16-NEXT: fmov d6, x10 +; CHECK-SD-NOFP16-NEXT: fmov d3, x13 +; CHECK-SD-NOFP16-NEXT: fcvtzu x13, s0 +; CHECK-SD-NOFP16-NEXT: fcvtzu x16, s5 +; CHECK-SD-NOFP16-NEXT: fcvtzu x10, s7 +; CHECK-SD-NOFP16-NEXT: fmov d7, x14 +; CHECK-SD-NOFP16-NEXT: fcvtzu x14, s16 +; CHECK-SD-NOFP16-NEXT: fcvtzu x17, s17 +; CHECK-SD-NOFP16-NEXT: fcvtzu x0, s4 +; CHECK-SD-NOFP16-NEXT: fmov d0, x8 +; CHECK-SD-NOFP16-NEXT: fcvtzu x18, s19 +; CHECK-SD-NOFP16-NEXT: fmov d1, x15 +; CHECK-SD-NOFP16-NEXT: fmov d4, x9 +; CHECK-SD-NOFP16-NEXT: mov v2.d[1], x12 +; CHECK-SD-NOFP16-NEXT: fmov d5, x10 +; CHECK-SD-NOFP16-NEXT: mov v0.d[1], x11 +; CHECK-SD-NOFP16-NEXT: mov v3.d[1], x14 +; CHECK-SD-NOFP16-NEXT: mov v1.d[1], x13 +; CHECK-SD-NOFP16-NEXT: mov v4.d[1], x16 +; CHECK-SD-NOFP16-NEXT: mov v6.d[1], x17 +; CHECK-SD-NOFP16-NEXT: mov v7.d[1], x18 +; CHECK-SD-NOFP16-NEXT: mov v5.d[1], x0 +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_v16f16_v16i64: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-SD-FP16-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-SD-FP16-NEXT: mov h4, v0.h[1] +; CHECK-SD-FP16-NEXT: mov h5, v0.h[2] +; CHECK-SD-FP16-NEXT: fcvtzu x8, h0 +; CHECK-SD-FP16-NEXT: mov h0, v0.h[3] +; CHECK-SD-FP16-NEXT: fcvtzu x9, h1 +; CHECK-SD-FP16-NEXT: mov h7, v1.h[1] +; CHECK-SD-FP16-NEXT: mov h6, v2.h[2] +; CHECK-SD-FP16-NEXT: mov h16, v3.h[2] +; CHECK-SD-FP16-NEXT: fcvtzu x10, h4 +; CHECK-SD-FP16-NEXT: mov h4, v1.h[2] +; CHECK-SD-FP16-NEXT: fcvtzu x11, h2 +; CHECK-SD-FP16-NEXT: fcvtzu x12, h5 +; CHECK-SD-FP16-NEXT: mov h5, v2.h[1] +; CHECK-SD-FP16-NEXT: mov h17, v2.h[3] +; CHECK-SD-FP16-NEXT: fcvtzu x13, h3 +; CHECK-SD-FP16-NEXT: mov h18, v3.h[1] +; CHECK-SD-FP16-NEXT: mov h1, v1.h[3] +; CHECK-SD-FP16-NEXT: mov h19, v3.h[3] +; CHECK-SD-FP16-NEXT: fcvtzu x14, h6 +; CHECK-SD-FP16-NEXT: fcvtzu x15, h16 +; CHECK-SD-FP16-NEXT: fcvtzu x16, h0 +; CHECK-SD-FP16-NEXT: fcvtzu x0, h4 +; CHECK-SD-FP16-NEXT: fcvtzu x17, h7 +; CHECK-SD-FP16-NEXT: fmov d2, x11 +; CHECK-SD-FP16-NEXT: fcvtzu x11, h5 +; CHECK-SD-FP16-NEXT: fcvtzu x18, h17 +; CHECK-SD-FP16-NEXT: fmov d6, x13 +; CHECK-SD-FP16-NEXT: fcvtzu x13, h18 +; CHECK-SD-FP16-NEXT: fmov d0, x8 +; CHECK-SD-FP16-NEXT: fmov d4, x9 +; CHECK-SD-FP16-NEXT: fmov d3, x14 +; CHECK-SD-FP16-NEXT: fmov d7, x15 +; CHECK-SD-FP16-NEXT: fcvtzu x14, h19 +; CHECK-SD-FP16-NEXT: fcvtzu x15, h1 +; CHECK-SD-FP16-NEXT: fmov d1, x12 +; CHECK-SD-FP16-NEXT: fmov d5, x0 +; CHECK-SD-FP16-NEXT: mov v0.d[1], x10 +; CHECK-SD-FP16-NEXT: mov v4.d[1], x17 +; CHECK-SD-FP16-NEXT: mov v2.d[1], x11 +; CHECK-SD-FP16-NEXT: mov v3.d[1], x18 +; CHECK-SD-FP16-NEXT: mov v6.d[1], x13 +; CHECK-SD-FP16-NEXT: mov v1.d[1], x16 +; CHECK-SD-FP16-NEXT: mov v7.d[1], x14 +; CHECK-SD-FP16-NEXT: mov v5.d[1], x15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptou_v16f16_v16i64: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-GI-NOFP16-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[1] +; CHECK-GI-NOFP16-NEXT: fcvt s5, h0 +; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s6, h2 +; CHECK-GI-NOFP16-NEXT: mov h7, v2.h[1] +; CHECK-GI-NOFP16-NEXT: mov h16, v2.h[2] +; CHECK-GI-NOFP16-NEXT: mov h17, v3.h[2] +; CHECK-GI-NOFP16-NEXT: fcvt s19, h3 +; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 +; CHECK-GI-NOFP16-NEXT: fcvtzu x8, s5 +; CHECK-GI-NOFP16-NEXT: mov h5, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov h2, v2.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s18, h18 +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvtzu x9, s6 +; CHECK-GI-NOFP16-NEXT: fcvt s6, h7 +; CHECK-GI-NOFP16-NEXT: fcvt s7, h16 +; CHECK-GI-NOFP16-NEXT: mov h16, v1.h[2] +; CHECK-GI-NOFP16-NEXT: fcvt s17, h17 +; CHECK-GI-NOFP16-NEXT: fcvtzu x10, s19 +; CHECK-GI-NOFP16-NEXT: mov h19, v3.h[1] +; CHECK-GI-NOFP16-NEXT: fcvtzu x11, s4 +; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[3] +; CHECK-GI-NOFP16-NEXT: mov h3, v3.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 +; CHECK-GI-NOFP16-NEXT: fcvtzu x13, s7 +; CHECK-GI-NOFP16-NEXT: fcvtzu x12, s6 +; CHECK-GI-NOFP16-NEXT: fcvtzu x15, s18 +; CHECK-GI-NOFP16-NEXT: fcvt s7, h16 +; CHECK-GI-NOFP16-NEXT: fcvtzu x14, s17 +; CHECK-GI-NOFP16-NEXT: fcvt s16, h2 +; CHECK-GI-NOFP16-NEXT: fcvt s17, h19 +; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 +; CHECK-GI-NOFP16-NEXT: fmov d2, x9 +; CHECK-GI-NOFP16-NEXT: fcvt s19, h3 +; CHECK-GI-NOFP16-NEXT: fcvtzu x9, s1 +; CHECK-GI-NOFP16-NEXT: fmov d6, x10 +; CHECK-GI-NOFP16-NEXT: fmov d3, x13 +; CHECK-GI-NOFP16-NEXT: fcvtzu x13, s0 +; CHECK-GI-NOFP16-NEXT: fcvtzu x16, s5 +; CHECK-GI-NOFP16-NEXT: fcvtzu x10, s7 +; CHECK-GI-NOFP16-NEXT: fmov d7, x14 +; CHECK-GI-NOFP16-NEXT: fcvtzu x14, s16 +; CHECK-GI-NOFP16-NEXT: fcvtzu x17, s17 +; CHECK-GI-NOFP16-NEXT: fcvtzu x0, s4 +; CHECK-GI-NOFP16-NEXT: fmov d0, x8 +; CHECK-GI-NOFP16-NEXT: fcvtzu x18, s19 +; CHECK-GI-NOFP16-NEXT: fmov d1, x15 +; CHECK-GI-NOFP16-NEXT: fmov d4, x9 +; CHECK-GI-NOFP16-NEXT: mov v2.d[1], x12 +; CHECK-GI-NOFP16-NEXT: fmov d5, x10 +; CHECK-GI-NOFP16-NEXT: mov v0.d[1], x11 +; CHECK-GI-NOFP16-NEXT: mov v3.d[1], x14 +; CHECK-GI-NOFP16-NEXT: mov v1.d[1], x13 +; CHECK-GI-NOFP16-NEXT: mov v4.d[1], x16 +; CHECK-GI-NOFP16-NEXT: mov v6.d[1], x17 +; CHECK-GI-NOFP16-NEXT: mov v7.d[1], x18 +; CHECK-GI-NOFP16-NEXT: mov v5.d[1], x0 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptou_v16f16_v16i64: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-GI-FP16-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-GI-FP16-NEXT: mov h4, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[2] +; CHECK-GI-FP16-NEXT: fcvtzu x8, h0 +; CHECK-GI-FP16-NEXT: mov h0, v0.h[3] +; CHECK-GI-FP16-NEXT: fcvtzu x9, h1 +; CHECK-GI-FP16-NEXT: mov h7, v1.h[1] +; CHECK-GI-FP16-NEXT: mov h6, v2.h[2] +; CHECK-GI-FP16-NEXT: mov h16, v3.h[2] +; CHECK-GI-FP16-NEXT: fcvtzu x10, h4 +; CHECK-GI-FP16-NEXT: mov h4, v1.h[2] +; CHECK-GI-FP16-NEXT: fcvtzu x11, h2 +; CHECK-GI-FP16-NEXT: fcvtzu x12, h5 +; CHECK-GI-FP16-NEXT: mov h5, v2.h[1] +; CHECK-GI-FP16-NEXT: mov h17, v2.h[3] +; CHECK-GI-FP16-NEXT: fcvtzu x13, h3 +; CHECK-GI-FP16-NEXT: mov h18, v3.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v1.h[3] +; CHECK-GI-FP16-NEXT: mov h19, v3.h[3] +; CHECK-GI-FP16-NEXT: fcvtzu x14, h6 +; CHECK-GI-FP16-NEXT: fcvtzu x15, h16 +; CHECK-GI-FP16-NEXT: fcvtzu x16, h0 +; CHECK-GI-FP16-NEXT: fcvtzu x0, h4 +; CHECK-GI-FP16-NEXT: fcvtzu x17, h7 +; CHECK-GI-FP16-NEXT: fmov d2, x11 +; CHECK-GI-FP16-NEXT: fcvtzu x11, h5 +; CHECK-GI-FP16-NEXT: fcvtzu x18, h17 +; CHECK-GI-FP16-NEXT: fmov d6, x13 +; CHECK-GI-FP16-NEXT: fcvtzu x13, h18 +; CHECK-GI-FP16-NEXT: fmov d0, x8 +; CHECK-GI-FP16-NEXT: fmov d4, x9 +; CHECK-GI-FP16-NEXT: fmov d3, x14 +; CHECK-GI-FP16-NEXT: fmov d7, x15 +; CHECK-GI-FP16-NEXT: fcvtzu x14, h19 +; CHECK-GI-FP16-NEXT: fcvtzu x15, h1 +; CHECK-GI-FP16-NEXT: fmov d1, x12 +; CHECK-GI-FP16-NEXT: fmov d5, x0 +; CHECK-GI-FP16-NEXT: mov v0.d[1], x10 +; CHECK-GI-FP16-NEXT: mov v4.d[1], x17 +; CHECK-GI-FP16-NEXT: mov v2.d[1], x11 +; CHECK-GI-FP16-NEXT: mov v3.d[1], x18 +; CHECK-GI-FP16-NEXT: mov v6.d[1], x13 +; CHECK-GI-FP16-NEXT: mov v1.d[1], x16 +; CHECK-GI-FP16-NEXT: mov v7.d[1], x14 +; CHECK-GI-FP16-NEXT: mov v5.d[1], x15 +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptoui <16 x half> %a to <16 x i64> + ret <16 x i64> %c +} + +define <32 x i64> @fptos_v32f16_v32i64(<32 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptos_v32f16_v32i64: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-SD-NOFP16-NEXT: ext v5.16b, v2.16b, v2.16b, #8 +; CHECK-SD-NOFP16-NEXT: ext v6.16b, v3.16b, v3.16b, #8 +; CHECK-SD-NOFP16-NEXT: ext v7.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h2 +; CHECK-SD-NOFP16-NEXT: mov h26, v2.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s19, h0 +; CHECK-SD-NOFP16-NEXT: mov h27, v3.h[2] +; CHECK-SD-NOFP16-NEXT: mov h20, v2.h[1] +; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[1] +; CHECK-SD-NOFP16-NEXT: mov h16, v4.h[2] +; CHECK-SD-NOFP16-NEXT: mov h17, v5.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s23, h5 +; CHECK-SD-NOFP16-NEXT: fcvt s24, h6 +; CHECK-SD-NOFP16-NEXT: mov h25, v6.h[2] +; CHECK-SD-NOFP16-NEXT: fcvtzs x9, s21 +; CHECK-SD-NOFP16-NEXT: fcvtzs x11, s22 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h7 +; CHECK-SD-NOFP16-NEXT: mov h21, v3.h[3] +; CHECK-SD-NOFP16-NEXT: fcvtzs x10, s19 +; CHECK-SD-NOFP16-NEXT: fcvt s27, h27 +; CHECK-SD-NOFP16-NEXT: fcvt s20, h20 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h16 +; CHECK-SD-NOFP16-NEXT: fcvt s17, h17 +; CHECK-SD-NOFP16-NEXT: fcvtzs x12, s23 +; CHECK-SD-NOFP16-NEXT: fcvtzs x13, s24 +; CHECK-SD-NOFP16-NEXT: fcvt s23, h25 +; CHECK-SD-NOFP16-NEXT: fcvt s25, h26 +; CHECK-SD-NOFP16-NEXT: mov h26, v3.h[1] +; CHECK-SD-NOFP16-NEXT: mov h24, v2.h[3] +; CHECK-SD-NOFP16-NEXT: fmov d19, x9 +; CHECK-SD-NOFP16-NEXT: fcvtzs x9, s22 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h3 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h21 +; CHECK-SD-NOFP16-NEXT: fcvtzs x14, s16 +; CHECK-SD-NOFP16-NEXT: fcvtzs x15, s17 +; CHECK-SD-NOFP16-NEXT: fmov d2, x12 +; CHECK-SD-NOFP16-NEXT: fmov d16, x13 +; CHECK-SD-NOFP16-NEXT: fcvtzs x12, s23 +; CHECK-SD-NOFP16-NEXT: fcvtzs x13, s25 +; CHECK-SD-NOFP16-NEXT: mov h23, v1.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s25, h26 +; CHECK-SD-NOFP16-NEXT: fcvt s24, h24 +; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[3] +; CHECK-SD-NOFP16-NEXT: fmov d26, x11 +; CHECK-SD-NOFP16-NEXT: fcvtzs x11, s21 +; CHECK-SD-NOFP16-NEXT: fmov d3, x14 +; CHECK-SD-NOFP16-NEXT: fmov d17, x15 +; CHECK-SD-NOFP16-NEXT: fcvtzs x14, s22 +; CHECK-SD-NOFP16-NEXT: fcvtzs x15, s27 +; CHECK-SD-NOFP16-NEXT: mov h22, v0.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s18, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h23 +; CHECK-SD-NOFP16-NEXT: fmov d23, x13 +; CHECK-SD-NOFP16-NEXT: fcvtzs x13, s25 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fmov d25, x14 +; CHECK-SD-NOFP16-NEXT: fcvtzs x14, s24 +; CHECK-SD-NOFP16-NEXT: fmov d24, x15 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h22 +; CHECK-SD-NOFP16-NEXT: fcvtzs x15, s18 +; CHECK-SD-NOFP16-NEXT: mov h18, v7.h[1] +; CHECK-SD-NOFP16-NEXT: mov v25.d[1], x13 +; CHECK-SD-NOFP16-NEXT: fcvtzs x13, s21 +; CHECK-SD-NOFP16-NEXT: mov h21, v7.h[2] +; CHECK-SD-NOFP16-NEXT: mov v24.d[1], x11 +; CHECK-SD-NOFP16-NEXT: fcvtzs x11, s20 +; CHECK-SD-NOFP16-NEXT: mov h20, v0.h[1] +; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[3] +; CHECK-SD-NOFP16-NEXT: mov v23.d[1], x14 +; CHECK-SD-NOFP16-NEXT: fcvtzs x14, s1 +; CHECK-SD-NOFP16-NEXT: mov h1, v6.h[3] +; CHECK-SD-NOFP16-NEXT: mov h6, v6.h[1] +; CHECK-SD-NOFP16-NEXT: mov v19.d[1], x15 +; CHECK-SD-NOFP16-NEXT: mov h7, v7.h[3] +; CHECK-SD-NOFP16-NEXT: stp q25, q24, [x8, #192] +; CHECK-SD-NOFP16-NEXT: fmov d24, x13 +; CHECK-SD-NOFP16-NEXT: fcvt s20, h20 +; CHECK-SD-NOFP16-NEXT: mov v26.d[1], x11 +; CHECK-SD-NOFP16-NEXT: fcvtzs x11, s22 +; CHECK-SD-NOFP16-NEXT: mov h22, v5.h[1] +; CHECK-SD-NOFP16-NEXT: mov h5, v5.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: mov v24.d[1], x14 +; CHECK-SD-NOFP16-NEXT: mov h25, v4.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s6, h6 +; CHECK-SD-NOFP16-NEXT: stp q26, q23, [x8, #128] +; CHECK-SD-NOFP16-NEXT: fmov d23, x12 +; CHECK-SD-NOFP16-NEXT: fcvtzs x12, s20 +; CHECK-SD-NOFP16-NEXT: mov h20, v4.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: fcvtzs x13, s0 +; CHECK-SD-NOFP16-NEXT: stp q19, q24, [x8, #64] +; CHECK-SD-NOFP16-NEXT: fcvt s22, h22 +; CHECK-SD-NOFP16-NEXT: fmov d0, x10 +; CHECK-SD-NOFP16-NEXT: fmov d19, x11 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fcvtzs x10, s1 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h21 +; CHECK-SD-NOFP16-NEXT: fcvt s24, h25 +; CHECK-SD-NOFP16-NEXT: fcvtzs x11, s6 +; CHECK-SD-NOFP16-NEXT: fcvt s20, h20 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h7 +; CHECK-SD-NOFP16-NEXT: fcvtzs x14, s5 +; CHECK-SD-NOFP16-NEXT: mov v19.d[1], x13 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h18 +; CHECK-SD-NOFP16-NEXT: fcvtzs x13, s22 +; CHECK-SD-NOFP16-NEXT: mov v0.d[1], x12 +; CHECK-SD-NOFP16-NEXT: fcvtzs x12, s4 +; CHECK-SD-NOFP16-NEXT: mov v23.d[1], x10 +; CHECK-SD-NOFP16-NEXT: fcvtzs x10, s1 +; CHECK-SD-NOFP16-NEXT: fcvtzs x15, s24 +; CHECK-SD-NOFP16-NEXT: mov v16.d[1], x11 +; CHECK-SD-NOFP16-NEXT: fcvtzs x11, s20 +; CHECK-SD-NOFP16-NEXT: mov v17.d[1], x14 +; CHECK-SD-NOFP16-NEXT: fcvtzs x14, s6 +; CHECK-SD-NOFP16-NEXT: mov v2.d[1], x13 +; CHECK-SD-NOFP16-NEXT: fcvtzs x13, s5 +; CHECK-SD-NOFP16-NEXT: fmov d4, x9 +; CHECK-SD-NOFP16-NEXT: stp q0, q19, [x8] +; CHECK-SD-NOFP16-NEXT: fmov d0, x12 +; CHECK-SD-NOFP16-NEXT: stp q16, q23, [x8, #224] +; CHECK-SD-NOFP16-NEXT: fmov d1, x10 +; CHECK-SD-NOFP16-NEXT: mov v3.d[1], x15 +; CHECK-SD-NOFP16-NEXT: stp q2, q17, [x8, #160] +; CHECK-SD-NOFP16-NEXT: mov v0.d[1], x11 +; CHECK-SD-NOFP16-NEXT: mov v4.d[1], x13 +; CHECK-SD-NOFP16-NEXT: mov v1.d[1], x14 +; CHECK-SD-NOFP16-NEXT: stp q0, q3, [x8, #96] +; CHECK-SD-NOFP16-NEXT: stp q4, q1, [x8, #32] +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_v32f16_v32i64: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-SD-FP16-NEXT: ext v5.16b, v2.16b, v2.16b, #8 +; CHECK-SD-FP16-NEXT: ext v6.16b, v3.16b, v3.16b, #8 +; CHECK-SD-FP16-NEXT: mov h16, v3.h[2] +; CHECK-SD-FP16-NEXT: fcvtzs x9, h0 +; CHECK-SD-FP16-NEXT: mov h23, v3.h[3] +; CHECK-SD-FP16-NEXT: mov h25, v3.h[1] +; CHECK-SD-FP16-NEXT: fcvtzs x15, h3 +; CHECK-SD-FP16-NEXT: mov h24, v2.h[2] +; CHECK-SD-FP16-NEXT: mov h19, v1.h[2] +; CHECK-SD-FP16-NEXT: mov h21, v2.h[1] +; CHECK-SD-FP16-NEXT: mov h26, v2.h[3] +; CHECK-SD-FP16-NEXT: mov h17, v4.h[2] +; CHECK-SD-FP16-NEXT: mov h18, v5.h[2] +; CHECK-SD-FP16-NEXT: mov h22, v6.h[2] +; CHECK-SD-FP16-NEXT: fcvtzs x10, h5 +; CHECK-SD-FP16-NEXT: fcvtzs x12, h16 +; CHECK-SD-FP16-NEXT: fcvtzs x11, h6 +; CHECK-SD-FP16-NEXT: mov h7, v1.h[1] +; CHECK-SD-FP16-NEXT: mov h20, v1.h[3] +; CHECK-SD-FP16-NEXT: fcvtzs x13, h17 +; CHECK-SD-FP16-NEXT: fcvtzs x14, h18 +; CHECK-SD-FP16-NEXT: fmov d18, x9 +; CHECK-SD-FP16-NEXT: fcvtzs x9, h22 +; CHECK-SD-FP16-NEXT: fmov d3, x10 +; CHECK-SD-FP16-NEXT: fcvtzs x10, h23 +; CHECK-SD-FP16-NEXT: fmov d22, x12 +; CHECK-SD-FP16-NEXT: fcvtzs x12, h25 +; CHECK-SD-FP16-NEXT: fmov d23, x15 +; CHECK-SD-FP16-NEXT: fmov d16, x11 +; CHECK-SD-FP16-NEXT: fcvtzs x11, h2 +; CHECK-SD-FP16-NEXT: fcvtzs x15, h21 +; CHECK-SD-FP16-NEXT: fmov d2, x13 +; CHECK-SD-FP16-NEXT: fcvtzs x13, h24 +; CHECK-SD-FP16-NEXT: fmov d17, x14 +; CHECK-SD-FP16-NEXT: fcvtzs x14, h19 +; CHECK-SD-FP16-NEXT: mov v22.d[1], x10 +; CHECK-SD-FP16-NEXT: fcvtzs x10, h1 +; CHECK-SD-FP16-NEXT: mov v23.d[1], x12 +; CHECK-SD-FP16-NEXT: fmov d19, x9 +; CHECK-SD-FP16-NEXT: fcvtzs x9, h26 +; CHECK-SD-FP16-NEXT: fcvtzs x12, h20 +; CHECK-SD-FP16-NEXT: mov h20, v0.h[2] +; CHECK-SD-FP16-NEXT: fmov d21, x11 +; CHECK-SD-FP16-NEXT: fmov d1, x13 +; CHECK-SD-FP16-NEXT: fcvtzs x13, h7 +; CHECK-SD-FP16-NEXT: mov h24, v0.h[3] +; CHECK-SD-FP16-NEXT: fmov d7, x14 +; CHECK-SD-FP16-NEXT: stp q23, q22, [x8, #192] +; CHECK-SD-FP16-NEXT: fmov d22, x10 +; CHECK-SD-FP16-NEXT: mov v21.d[1], x15 +; CHECK-SD-FP16-NEXT: mov v1.d[1], x9 +; CHECK-SD-FP16-NEXT: mov h23, v0.h[1] +; CHECK-SD-FP16-NEXT: fcvtzs x9, h20 +; CHECK-SD-FP16-NEXT: mov v7.d[1], x12 +; CHECK-SD-FP16-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-SD-FP16-NEXT: mov h20, v6.h[3] +; CHECK-SD-FP16-NEXT: mov v22.d[1], x13 +; CHECK-SD-FP16-NEXT: mov h6, v6.h[1] +; CHECK-SD-FP16-NEXT: fcvtzs x10, h24 +; CHECK-SD-FP16-NEXT: stp q21, q1, [x8, #128] +; CHECK-SD-FP16-NEXT: mov h1, v5.h[1] +; CHECK-SD-FP16-NEXT: mov h5, v5.h[3] +; CHECK-SD-FP16-NEXT: fcvtzs x12, h20 +; CHECK-SD-FP16-NEXT: mov h20, v0.h[2] +; CHECK-SD-FP16-NEXT: fcvtzs x11, h0 +; CHECK-SD-FP16-NEXT: stp q22, q7, [x8, #64] +; CHECK-SD-FP16-NEXT: fmov d7, x9 +; CHECK-SD-FP16-NEXT: fcvtzs x9, h23 +; CHECK-SD-FP16-NEXT: mov h21, v4.h[3] +; CHECK-SD-FP16-NEXT: mov h22, v4.h[1] +; CHECK-SD-FP16-NEXT: fcvtzs x13, h6 +; CHECK-SD-FP16-NEXT: mov h6, v0.h[3] +; CHECK-SD-FP16-NEXT: fcvtzs x14, h5 +; CHECK-SD-FP16-NEXT: mov h0, v0.h[1] +; CHECK-SD-FP16-NEXT: mov v7.d[1], x10 +; CHECK-SD-FP16-NEXT: fcvtzs x10, h1 +; CHECK-SD-FP16-NEXT: mov v19.d[1], x12 +; CHECK-SD-FP16-NEXT: mov v18.d[1], x9 +; CHECK-SD-FP16-NEXT: fcvtzs x9, h4 +; CHECK-SD-FP16-NEXT: fcvtzs x12, h20 +; CHECK-SD-FP16-NEXT: fcvtzs x15, h21 +; CHECK-SD-FP16-NEXT: mov v16.d[1], x13 +; CHECK-SD-FP16-NEXT: fcvtzs x13, h22 +; CHECK-SD-FP16-NEXT: mov v17.d[1], x14 +; CHECK-SD-FP16-NEXT: fcvtzs x14, h6 +; CHECK-SD-FP16-NEXT: fmov d4, x11 +; CHECK-SD-FP16-NEXT: mov v3.d[1], x10 +; CHECK-SD-FP16-NEXT: fcvtzs x10, h0 +; CHECK-SD-FP16-NEXT: stp q18, q7, [x8] +; CHECK-SD-FP16-NEXT: fmov d0, x9 +; CHECK-SD-FP16-NEXT: fmov d1, x12 +; CHECK-SD-FP16-NEXT: stp q16, q19, [x8, #224] +; CHECK-SD-FP16-NEXT: mov v2.d[1], x15 +; CHECK-SD-FP16-NEXT: stp q3, q17, [x8, #160] +; CHECK-SD-FP16-NEXT: mov v0.d[1], x13 +; CHECK-SD-FP16-NEXT: mov v1.d[1], x14 +; CHECK-SD-FP16-NEXT: mov v4.d[1], x10 +; CHECK-SD-FP16-NEXT: stp q0, q2, [x8, #96] +; CHECK-SD-FP16-NEXT: stp q4, q1, [x8, #32] +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptos_v32f16_v32i64: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-GI-NOFP16-NEXT: ext v5.16b, v2.16b, v2.16b, #8 +; CHECK-GI-NOFP16-NEXT: ext v6.16b, v3.16b, v3.16b, #8 +; CHECK-GI-NOFP16-NEXT: ext v7.16b, v0.16b, v0.16b, #8 +; CHECK-GI-NOFP16-NEXT: fcvt s21, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s22, h2 +; CHECK-GI-NOFP16-NEXT: mov h26, v2.h[2] +; CHECK-GI-NOFP16-NEXT: fcvt s19, h0 +; CHECK-GI-NOFP16-NEXT: mov h27, v3.h[2] +; CHECK-GI-NOFP16-NEXT: mov h20, v2.h[1] +; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov h16, v4.h[2] +; CHECK-GI-NOFP16-NEXT: mov h17, v5.h[2] +; CHECK-GI-NOFP16-NEXT: fcvt s23, h5 +; CHECK-GI-NOFP16-NEXT: fcvt s24, h6 +; CHECK-GI-NOFP16-NEXT: mov h25, v6.h[2] +; CHECK-GI-NOFP16-NEXT: fcvtzs x9, s21 +; CHECK-GI-NOFP16-NEXT: fcvtzs x11, s22 +; CHECK-GI-NOFP16-NEXT: fcvt s22, h7 +; CHECK-GI-NOFP16-NEXT: mov h21, v3.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtzs x10, s19 +; CHECK-GI-NOFP16-NEXT: fcvt s27, h27 +; CHECK-GI-NOFP16-NEXT: fcvt s20, h20 +; CHECK-GI-NOFP16-NEXT: fcvt s16, h16 +; CHECK-GI-NOFP16-NEXT: fcvt s17, h17 +; CHECK-GI-NOFP16-NEXT: fcvtzs x12, s23 +; CHECK-GI-NOFP16-NEXT: fcvtzs x13, s24 +; CHECK-GI-NOFP16-NEXT: fcvt s23, h25 +; CHECK-GI-NOFP16-NEXT: fcvt s25, h26 +; CHECK-GI-NOFP16-NEXT: mov h26, v3.h[1] +; CHECK-GI-NOFP16-NEXT: mov h24, v2.h[3] +; CHECK-GI-NOFP16-NEXT: fmov d19, x9 +; CHECK-GI-NOFP16-NEXT: fcvtzs x9, s22 +; CHECK-GI-NOFP16-NEXT: fcvt s22, h3 +; CHECK-GI-NOFP16-NEXT: fcvt s21, h21 +; CHECK-GI-NOFP16-NEXT: fcvtzs x14, s16 +; CHECK-GI-NOFP16-NEXT: fcvtzs x15, s17 +; CHECK-GI-NOFP16-NEXT: fmov d2, x12 +; CHECK-GI-NOFP16-NEXT: fmov d16, x13 +; CHECK-GI-NOFP16-NEXT: fcvtzs x12, s23 +; CHECK-GI-NOFP16-NEXT: fcvtzs x13, s25 +; CHECK-GI-NOFP16-NEXT: mov h23, v1.h[2] +; CHECK-GI-NOFP16-NEXT: fcvt s25, h26 +; CHECK-GI-NOFP16-NEXT: fcvt s24, h24 +; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[3] +; CHECK-GI-NOFP16-NEXT: fmov d26, x11 +; CHECK-GI-NOFP16-NEXT: fcvtzs x11, s21 +; CHECK-GI-NOFP16-NEXT: fmov d3, x14 +; CHECK-GI-NOFP16-NEXT: fmov d17, x15 +; CHECK-GI-NOFP16-NEXT: fcvtzs x14, s22 +; CHECK-GI-NOFP16-NEXT: fcvtzs x15, s27 +; CHECK-GI-NOFP16-NEXT: mov h22, v0.h[2] +; CHECK-GI-NOFP16-NEXT: fcvt s18, h18 +; CHECK-GI-NOFP16-NEXT: fcvt s21, h23 +; CHECK-GI-NOFP16-NEXT: fmov d23, x13 +; CHECK-GI-NOFP16-NEXT: fcvtzs x13, s25 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fmov d25, x14 +; CHECK-GI-NOFP16-NEXT: fcvtzs x14, s24 +; CHECK-GI-NOFP16-NEXT: fmov d24, x15 +; CHECK-GI-NOFP16-NEXT: fcvt s22, h22 +; CHECK-GI-NOFP16-NEXT: fcvtzs x15, s18 +; CHECK-GI-NOFP16-NEXT: mov h18, v7.h[1] +; CHECK-GI-NOFP16-NEXT: mov v25.d[1], x13 +; CHECK-GI-NOFP16-NEXT: fcvtzs x13, s21 +; CHECK-GI-NOFP16-NEXT: mov h21, v7.h[2] +; CHECK-GI-NOFP16-NEXT: mov v24.d[1], x11 +; CHECK-GI-NOFP16-NEXT: fcvtzs x11, s20 +; CHECK-GI-NOFP16-NEXT: mov h20, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3] +; CHECK-GI-NOFP16-NEXT: mov v23.d[1], x14 +; CHECK-GI-NOFP16-NEXT: fcvtzs x14, s1 +; CHECK-GI-NOFP16-NEXT: mov h1, v6.h[3] +; CHECK-GI-NOFP16-NEXT: mov h6, v6.h[1] +; CHECK-GI-NOFP16-NEXT: mov v19.d[1], x15 +; CHECK-GI-NOFP16-NEXT: mov h7, v7.h[3] +; CHECK-GI-NOFP16-NEXT: stp q25, q24, [x8, #192] +; CHECK-GI-NOFP16-NEXT: fmov d24, x13 +; CHECK-GI-NOFP16-NEXT: fcvt s20, h20 +; CHECK-GI-NOFP16-NEXT: mov v26.d[1], x11 +; CHECK-GI-NOFP16-NEXT: fcvtzs x11, s22 +; CHECK-GI-NOFP16-NEXT: mov h22, v5.h[1] +; CHECK-GI-NOFP16-NEXT: mov h5, v5.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: mov v24.d[1], x14 +; CHECK-GI-NOFP16-NEXT: mov h25, v4.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 +; CHECK-GI-NOFP16-NEXT: stp q26, q23, [x8, #128] +; CHECK-GI-NOFP16-NEXT: fmov d23, x12 +; CHECK-GI-NOFP16-NEXT: fcvtzs x12, s20 +; CHECK-GI-NOFP16-NEXT: mov h20, v4.h[1] +; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 +; CHECK-GI-NOFP16-NEXT: fcvtzs x13, s0 +; CHECK-GI-NOFP16-NEXT: stp q19, q24, [x8, #64] +; CHECK-GI-NOFP16-NEXT: fcvt s22, h22 +; CHECK-GI-NOFP16-NEXT: fmov d0, x10 +; CHECK-GI-NOFP16-NEXT: fmov d19, x11 +; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 +; CHECK-GI-NOFP16-NEXT: fcvtzs x10, s1 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h21 +; CHECK-GI-NOFP16-NEXT: fcvt s24, h25 +; CHECK-GI-NOFP16-NEXT: fcvtzs x11, s6 +; CHECK-GI-NOFP16-NEXT: fcvt s20, h20 +; CHECK-GI-NOFP16-NEXT: fcvt s6, h7 +; CHECK-GI-NOFP16-NEXT: fcvtzs x14, s5 +; CHECK-GI-NOFP16-NEXT: mov v19.d[1], x13 +; CHECK-GI-NOFP16-NEXT: fcvt s5, h18 +; CHECK-GI-NOFP16-NEXT: fcvtzs x13, s22 +; CHECK-GI-NOFP16-NEXT: mov v0.d[1], x12 +; CHECK-GI-NOFP16-NEXT: fcvtzs x12, s4 +; CHECK-GI-NOFP16-NEXT: mov v23.d[1], x10 +; CHECK-GI-NOFP16-NEXT: fcvtzs x10, s1 +; CHECK-GI-NOFP16-NEXT: fcvtzs x15, s24 +; CHECK-GI-NOFP16-NEXT: mov v16.d[1], x11 +; CHECK-GI-NOFP16-NEXT: fcvtzs x11, s20 +; CHECK-GI-NOFP16-NEXT: mov v17.d[1], x14 +; CHECK-GI-NOFP16-NEXT: fcvtzs x14, s6 +; CHECK-GI-NOFP16-NEXT: mov v2.d[1], x13 +; CHECK-GI-NOFP16-NEXT: fcvtzs x13, s5 +; CHECK-GI-NOFP16-NEXT: fmov d4, x9 +; CHECK-GI-NOFP16-NEXT: stp q0, q19, [x8] +; CHECK-GI-NOFP16-NEXT: fmov d0, x12 +; CHECK-GI-NOFP16-NEXT: stp q16, q23, [x8, #224] +; CHECK-GI-NOFP16-NEXT: fmov d1, x10 +; CHECK-GI-NOFP16-NEXT: mov v3.d[1], x15 +; CHECK-GI-NOFP16-NEXT: stp q2, q17, [x8, #160] +; CHECK-GI-NOFP16-NEXT: mov v0.d[1], x11 +; CHECK-GI-NOFP16-NEXT: mov v4.d[1], x13 +; CHECK-GI-NOFP16-NEXT: mov v1.d[1], x14 +; CHECK-GI-NOFP16-NEXT: stp q0, q3, [x8, #96] +; CHECK-GI-NOFP16-NEXT: stp q4, q1, [x8, #32] +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptos_v32f16_v32i64: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-GI-FP16-NEXT: ext v5.16b, v2.16b, v2.16b, #8 +; CHECK-GI-FP16-NEXT: ext v6.16b, v3.16b, v3.16b, #8 +; CHECK-GI-FP16-NEXT: mov h16, v3.h[2] +; CHECK-GI-FP16-NEXT: fcvtzs x9, h0 +; CHECK-GI-FP16-NEXT: mov h23, v3.h[3] +; CHECK-GI-FP16-NEXT: mov h25, v3.h[1] +; CHECK-GI-FP16-NEXT: fcvtzs x15, h3 +; CHECK-GI-FP16-NEXT: mov h24, v2.h[2] +; CHECK-GI-FP16-NEXT: mov h19, v1.h[2] +; CHECK-GI-FP16-NEXT: mov h21, v2.h[1] +; CHECK-GI-FP16-NEXT: mov h26, v2.h[3] +; CHECK-GI-FP16-NEXT: mov h17, v4.h[2] +; CHECK-GI-FP16-NEXT: mov h18, v5.h[2] +; CHECK-GI-FP16-NEXT: mov h22, v6.h[2] +; CHECK-GI-FP16-NEXT: fcvtzs x10, h5 +; CHECK-GI-FP16-NEXT: fcvtzs x12, h16 +; CHECK-GI-FP16-NEXT: fcvtzs x11, h6 +; CHECK-GI-FP16-NEXT: mov h7, v1.h[1] +; CHECK-GI-FP16-NEXT: mov h20, v1.h[3] +; CHECK-GI-FP16-NEXT: fcvtzs x13, h17 +; CHECK-GI-FP16-NEXT: fcvtzs x14, h18 +; CHECK-GI-FP16-NEXT: fmov d18, x9 +; CHECK-GI-FP16-NEXT: fcvtzs x9, h22 +; CHECK-GI-FP16-NEXT: fmov d3, x10 +; CHECK-GI-FP16-NEXT: fcvtzs x10, h23 +; CHECK-GI-FP16-NEXT: fmov d22, x12 +; CHECK-GI-FP16-NEXT: fcvtzs x12, h25 +; CHECK-GI-FP16-NEXT: fmov d23, x15 +; CHECK-GI-FP16-NEXT: fmov d16, x11 +; CHECK-GI-FP16-NEXT: fcvtzs x11, h2 +; CHECK-GI-FP16-NEXT: fcvtzs x15, h21 +; CHECK-GI-FP16-NEXT: fmov d2, x13 +; CHECK-GI-FP16-NEXT: fcvtzs x13, h24 +; CHECK-GI-FP16-NEXT: fmov d17, x14 +; CHECK-GI-FP16-NEXT: fcvtzs x14, h19 +; CHECK-GI-FP16-NEXT: mov v22.d[1], x10 +; CHECK-GI-FP16-NEXT: fcvtzs x10, h1 +; CHECK-GI-FP16-NEXT: mov v23.d[1], x12 +; CHECK-GI-FP16-NEXT: fmov d19, x9 +; CHECK-GI-FP16-NEXT: fcvtzs x9, h26 +; CHECK-GI-FP16-NEXT: fcvtzs x12, h20 +; CHECK-GI-FP16-NEXT: mov h20, v0.h[2] +; CHECK-GI-FP16-NEXT: fmov d21, x11 +; CHECK-GI-FP16-NEXT: fmov d1, x13 +; CHECK-GI-FP16-NEXT: fcvtzs x13, h7 +; CHECK-GI-FP16-NEXT: mov h24, v0.h[3] +; CHECK-GI-FP16-NEXT: fmov d7, x14 +; CHECK-GI-FP16-NEXT: stp q23, q22, [x8, #192] +; CHECK-GI-FP16-NEXT: fmov d22, x10 +; CHECK-GI-FP16-NEXT: mov v21.d[1], x15 +; CHECK-GI-FP16-NEXT: mov v1.d[1], x9 +; CHECK-GI-FP16-NEXT: mov h23, v0.h[1] +; CHECK-GI-FP16-NEXT: fcvtzs x9, h20 +; CHECK-GI-FP16-NEXT: mov v7.d[1], x12 +; CHECK-GI-FP16-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-GI-FP16-NEXT: mov h20, v6.h[3] +; CHECK-GI-FP16-NEXT: mov v22.d[1], x13 +; CHECK-GI-FP16-NEXT: mov h6, v6.h[1] +; CHECK-GI-FP16-NEXT: fcvtzs x10, h24 +; CHECK-GI-FP16-NEXT: stp q21, q1, [x8, #128] +; CHECK-GI-FP16-NEXT: mov h1, v5.h[1] +; CHECK-GI-FP16-NEXT: mov h5, v5.h[3] +; CHECK-GI-FP16-NEXT: fcvtzs x12, h20 +; CHECK-GI-FP16-NEXT: mov h20, v0.h[2] +; CHECK-GI-FP16-NEXT: fcvtzs x11, h0 +; CHECK-GI-FP16-NEXT: stp q22, q7, [x8, #64] +; CHECK-GI-FP16-NEXT: fmov d7, x9 +; CHECK-GI-FP16-NEXT: fcvtzs x9, h23 +; CHECK-GI-FP16-NEXT: mov h21, v4.h[3] +; CHECK-GI-FP16-NEXT: mov h22, v4.h[1] +; CHECK-GI-FP16-NEXT: fcvtzs x13, h6 +; CHECK-GI-FP16-NEXT: mov h6, v0.h[3] +; CHECK-GI-FP16-NEXT: fcvtzs x14, h5 +; CHECK-GI-FP16-NEXT: mov h0, v0.h[1] +; CHECK-GI-FP16-NEXT: mov v7.d[1], x10 +; CHECK-GI-FP16-NEXT: fcvtzs x10, h1 +; CHECK-GI-FP16-NEXT: mov v19.d[1], x12 +; CHECK-GI-FP16-NEXT: mov v18.d[1], x9 +; CHECK-GI-FP16-NEXT: fcvtzs x9, h4 +; CHECK-GI-FP16-NEXT: fcvtzs x12, h20 +; CHECK-GI-FP16-NEXT: fcvtzs x15, h21 +; CHECK-GI-FP16-NEXT: mov v16.d[1], x13 +; CHECK-GI-FP16-NEXT: fcvtzs x13, h22 +; CHECK-GI-FP16-NEXT: mov v17.d[1], x14 +; CHECK-GI-FP16-NEXT: fcvtzs x14, h6 +; CHECK-GI-FP16-NEXT: fmov d4, x11 +; CHECK-GI-FP16-NEXT: mov v3.d[1], x10 +; CHECK-GI-FP16-NEXT: fcvtzs x10, h0 +; CHECK-GI-FP16-NEXT: stp q18, q7, [x8] +; CHECK-GI-FP16-NEXT: fmov d0, x9 +; CHECK-GI-FP16-NEXT: fmov d1, x12 +; CHECK-GI-FP16-NEXT: stp q16, q19, [x8, #224] +; CHECK-GI-FP16-NEXT: mov v2.d[1], x15 +; CHECK-GI-FP16-NEXT: stp q3, q17, [x8, #160] +; CHECK-GI-FP16-NEXT: mov v0.d[1], x13 +; CHECK-GI-FP16-NEXT: mov v1.d[1], x14 +; CHECK-GI-FP16-NEXT: mov v4.d[1], x10 +; CHECK-GI-FP16-NEXT: stp q0, q2, [x8, #96] +; CHECK-GI-FP16-NEXT: stp q4, q1, [x8, #32] +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptosi <32 x half> %a to <32 x i64> + ret <32 x i64> %c +} + +define <32 x i64> @fptou_v32f16_v32i64(<32 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptou_v32f16_v32i64: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-SD-NOFP16-NEXT: ext v5.16b, v2.16b, v2.16b, #8 +; CHECK-SD-NOFP16-NEXT: ext v6.16b, v3.16b, v3.16b, #8 +; CHECK-SD-NOFP16-NEXT: ext v7.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h1 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h2 +; CHECK-SD-NOFP16-NEXT: mov h26, v2.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s19, h0 +; CHECK-SD-NOFP16-NEXT: mov h27, v3.h[2] +; CHECK-SD-NOFP16-NEXT: mov h20, v2.h[1] +; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[1] +; CHECK-SD-NOFP16-NEXT: mov h16, v4.h[2] +; CHECK-SD-NOFP16-NEXT: mov h17, v5.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s23, h5 +; CHECK-SD-NOFP16-NEXT: fcvt s24, h6 +; CHECK-SD-NOFP16-NEXT: mov h25, v6.h[2] +; CHECK-SD-NOFP16-NEXT: fcvtzu x9, s21 +; CHECK-SD-NOFP16-NEXT: fcvtzu x11, s22 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h7 +; CHECK-SD-NOFP16-NEXT: mov h21, v3.h[3] +; CHECK-SD-NOFP16-NEXT: fcvtzu x10, s19 +; CHECK-SD-NOFP16-NEXT: fcvt s27, h27 +; CHECK-SD-NOFP16-NEXT: fcvt s20, h20 +; CHECK-SD-NOFP16-NEXT: fcvt s16, h16 +; CHECK-SD-NOFP16-NEXT: fcvt s17, h17 +; CHECK-SD-NOFP16-NEXT: fcvtzu x12, s23 +; CHECK-SD-NOFP16-NEXT: fcvtzu x13, s24 +; CHECK-SD-NOFP16-NEXT: fcvt s23, h25 +; CHECK-SD-NOFP16-NEXT: fcvt s25, h26 +; CHECK-SD-NOFP16-NEXT: mov h26, v3.h[1] +; CHECK-SD-NOFP16-NEXT: mov h24, v2.h[3] +; CHECK-SD-NOFP16-NEXT: fmov d19, x9 +; CHECK-SD-NOFP16-NEXT: fcvtzu x9, s22 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h3 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h21 +; CHECK-SD-NOFP16-NEXT: fcvtzu x14, s16 +; CHECK-SD-NOFP16-NEXT: fcvtzu x15, s17 +; CHECK-SD-NOFP16-NEXT: fmov d2, x12 +; CHECK-SD-NOFP16-NEXT: fmov d16, x13 +; CHECK-SD-NOFP16-NEXT: fcvtzu x12, s23 +; CHECK-SD-NOFP16-NEXT: fcvtzu x13, s25 +; CHECK-SD-NOFP16-NEXT: mov h23, v1.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s25, h26 +; CHECK-SD-NOFP16-NEXT: fcvt s24, h24 +; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[3] +; CHECK-SD-NOFP16-NEXT: fmov d26, x11 +; CHECK-SD-NOFP16-NEXT: fcvtzu x11, s21 +; CHECK-SD-NOFP16-NEXT: fmov d3, x14 +; CHECK-SD-NOFP16-NEXT: fmov d17, x15 +; CHECK-SD-NOFP16-NEXT: fcvtzu x14, s22 +; CHECK-SD-NOFP16-NEXT: fcvtzu x15, s27 +; CHECK-SD-NOFP16-NEXT: mov h22, v0.h[2] +; CHECK-SD-NOFP16-NEXT: fcvt s18, h18 +; CHECK-SD-NOFP16-NEXT: fcvt s21, h23 +; CHECK-SD-NOFP16-NEXT: fmov d23, x13 +; CHECK-SD-NOFP16-NEXT: fcvtzu x13, s25 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: fmov d25, x14 +; CHECK-SD-NOFP16-NEXT: fcvtzu x14, s24 +; CHECK-SD-NOFP16-NEXT: fmov d24, x15 +; CHECK-SD-NOFP16-NEXT: fcvt s22, h22 +; CHECK-SD-NOFP16-NEXT: fcvtzu x15, s18 +; CHECK-SD-NOFP16-NEXT: mov h18, v7.h[1] +; CHECK-SD-NOFP16-NEXT: mov v25.d[1], x13 +; CHECK-SD-NOFP16-NEXT: fcvtzu x13, s21 +; CHECK-SD-NOFP16-NEXT: mov h21, v7.h[2] +; CHECK-SD-NOFP16-NEXT: mov v24.d[1], x11 +; CHECK-SD-NOFP16-NEXT: fcvtzu x11, s20 +; CHECK-SD-NOFP16-NEXT: mov h20, v0.h[1] +; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[3] +; CHECK-SD-NOFP16-NEXT: mov v23.d[1], x14 +; CHECK-SD-NOFP16-NEXT: fcvtzu x14, s1 +; CHECK-SD-NOFP16-NEXT: mov h1, v6.h[3] +; CHECK-SD-NOFP16-NEXT: mov h6, v6.h[1] +; CHECK-SD-NOFP16-NEXT: mov v19.d[1], x15 +; CHECK-SD-NOFP16-NEXT: mov h7, v7.h[3] +; CHECK-SD-NOFP16-NEXT: stp q25, q24, [x8, #192] +; CHECK-SD-NOFP16-NEXT: fmov d24, x13 +; CHECK-SD-NOFP16-NEXT: fcvt s20, h20 +; CHECK-SD-NOFP16-NEXT: mov v26.d[1], x11 +; CHECK-SD-NOFP16-NEXT: fcvtzu x11, s22 +; CHECK-SD-NOFP16-NEXT: mov h22, v5.h[1] +; CHECK-SD-NOFP16-NEXT: mov h5, v5.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 +; CHECK-SD-NOFP16-NEXT: mov v24.d[1], x14 +; CHECK-SD-NOFP16-NEXT: mov h25, v4.h[3] +; CHECK-SD-NOFP16-NEXT: fcvt s6, h6 +; CHECK-SD-NOFP16-NEXT: stp q26, q23, [x8, #128] +; CHECK-SD-NOFP16-NEXT: fmov d23, x12 +; CHECK-SD-NOFP16-NEXT: fcvtzu x12, s20 +; CHECK-SD-NOFP16-NEXT: mov h20, v4.h[1] +; CHECK-SD-NOFP16-NEXT: fcvt s5, h5 +; CHECK-SD-NOFP16-NEXT: fcvtzu x13, s0 +; CHECK-SD-NOFP16-NEXT: stp q19, q24, [x8, #64] +; CHECK-SD-NOFP16-NEXT: fcvt s22, h22 +; CHECK-SD-NOFP16-NEXT: fmov d0, x10 +; CHECK-SD-NOFP16-NEXT: fmov d19, x11 +; CHECK-SD-NOFP16-NEXT: fcvt s4, h4 +; CHECK-SD-NOFP16-NEXT: fcvtzu x10, s1 +; CHECK-SD-NOFP16-NEXT: fcvt s1, h21 +; CHECK-SD-NOFP16-NEXT: fcvt s24, h25 +; CHECK-SD-NOFP16-NEXT: fcvtzu x11, s6 +; CHECK-SD-NOFP16-NEXT: fcvt s20, h20 +; CHECK-SD-NOFP16-NEXT: fcvt s6, h7 +; CHECK-SD-NOFP16-NEXT: fcvtzu x14, s5 +; CHECK-SD-NOFP16-NEXT: mov v19.d[1], x13 +; CHECK-SD-NOFP16-NEXT: fcvt s5, h18 +; CHECK-SD-NOFP16-NEXT: fcvtzu x13, s22 +; CHECK-SD-NOFP16-NEXT: mov v0.d[1], x12 +; CHECK-SD-NOFP16-NEXT: fcvtzu x12, s4 +; CHECK-SD-NOFP16-NEXT: mov v23.d[1], x10 +; CHECK-SD-NOFP16-NEXT: fcvtzu x10, s1 +; CHECK-SD-NOFP16-NEXT: fcvtzu x15, s24 +; CHECK-SD-NOFP16-NEXT: mov v16.d[1], x11 +; CHECK-SD-NOFP16-NEXT: fcvtzu x11, s20 +; CHECK-SD-NOFP16-NEXT: mov v17.d[1], x14 +; CHECK-SD-NOFP16-NEXT: fcvtzu x14, s6 +; CHECK-SD-NOFP16-NEXT: mov v2.d[1], x13 +; CHECK-SD-NOFP16-NEXT: fcvtzu x13, s5 +; CHECK-SD-NOFP16-NEXT: fmov d4, x9 +; CHECK-SD-NOFP16-NEXT: stp q0, q19, [x8] +; CHECK-SD-NOFP16-NEXT: fmov d0, x12 +; CHECK-SD-NOFP16-NEXT: stp q16, q23, [x8, #224] +; CHECK-SD-NOFP16-NEXT: fmov d1, x10 +; CHECK-SD-NOFP16-NEXT: mov v3.d[1], x15 +; CHECK-SD-NOFP16-NEXT: stp q2, q17, [x8, #160] +; CHECK-SD-NOFP16-NEXT: mov v0.d[1], x11 +; CHECK-SD-NOFP16-NEXT: mov v4.d[1], x13 +; CHECK-SD-NOFP16-NEXT: mov v1.d[1], x14 +; CHECK-SD-NOFP16-NEXT: stp q0, q3, [x8, #96] +; CHECK-SD-NOFP16-NEXT: stp q4, q1, [x8, #32] +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_v32f16_v32i64: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-SD-FP16-NEXT: ext v5.16b, v2.16b, v2.16b, #8 +; CHECK-SD-FP16-NEXT: ext v6.16b, v3.16b, v3.16b, #8 +; CHECK-SD-FP16-NEXT: mov h16, v3.h[2] +; CHECK-SD-FP16-NEXT: fcvtzu x9, h0 +; CHECK-SD-FP16-NEXT: mov h23, v3.h[3] +; CHECK-SD-FP16-NEXT: mov h25, v3.h[1] +; CHECK-SD-FP16-NEXT: fcvtzu x15, h3 +; CHECK-SD-FP16-NEXT: mov h24, v2.h[2] +; CHECK-SD-FP16-NEXT: mov h19, v1.h[2] +; CHECK-SD-FP16-NEXT: mov h21, v2.h[1] +; CHECK-SD-FP16-NEXT: mov h26, v2.h[3] +; CHECK-SD-FP16-NEXT: mov h17, v4.h[2] +; CHECK-SD-FP16-NEXT: mov h18, v5.h[2] +; CHECK-SD-FP16-NEXT: mov h22, v6.h[2] +; CHECK-SD-FP16-NEXT: fcvtzu x10, h5 +; CHECK-SD-FP16-NEXT: fcvtzu x12, h16 +; CHECK-SD-FP16-NEXT: fcvtzu x11, h6 +; CHECK-SD-FP16-NEXT: mov h7, v1.h[1] +; CHECK-SD-FP16-NEXT: mov h20, v1.h[3] +; CHECK-SD-FP16-NEXT: fcvtzu x13, h17 +; CHECK-SD-FP16-NEXT: fcvtzu x14, h18 +; CHECK-SD-FP16-NEXT: fmov d18, x9 +; CHECK-SD-FP16-NEXT: fcvtzu x9, h22 +; CHECK-SD-FP16-NEXT: fmov d3, x10 +; CHECK-SD-FP16-NEXT: fcvtzu x10, h23 +; CHECK-SD-FP16-NEXT: fmov d22, x12 +; CHECK-SD-FP16-NEXT: fcvtzu x12, h25 +; CHECK-SD-FP16-NEXT: fmov d23, x15 +; CHECK-SD-FP16-NEXT: fmov d16, x11 +; CHECK-SD-FP16-NEXT: fcvtzu x11, h2 +; CHECK-SD-FP16-NEXT: fcvtzu x15, h21 +; CHECK-SD-FP16-NEXT: fmov d2, x13 +; CHECK-SD-FP16-NEXT: fcvtzu x13, h24 +; CHECK-SD-FP16-NEXT: fmov d17, x14 +; CHECK-SD-FP16-NEXT: fcvtzu x14, h19 +; CHECK-SD-FP16-NEXT: mov v22.d[1], x10 +; CHECK-SD-FP16-NEXT: fcvtzu x10, h1 +; CHECK-SD-FP16-NEXT: mov v23.d[1], x12 +; CHECK-SD-FP16-NEXT: fmov d19, x9 +; CHECK-SD-FP16-NEXT: fcvtzu x9, h26 +; CHECK-SD-FP16-NEXT: fcvtzu x12, h20 +; CHECK-SD-FP16-NEXT: mov h20, v0.h[2] +; CHECK-SD-FP16-NEXT: fmov d21, x11 +; CHECK-SD-FP16-NEXT: fmov d1, x13 +; CHECK-SD-FP16-NEXT: fcvtzu x13, h7 +; CHECK-SD-FP16-NEXT: mov h24, v0.h[3] +; CHECK-SD-FP16-NEXT: fmov d7, x14 +; CHECK-SD-FP16-NEXT: stp q23, q22, [x8, #192] +; CHECK-SD-FP16-NEXT: fmov d22, x10 +; CHECK-SD-FP16-NEXT: mov v21.d[1], x15 +; CHECK-SD-FP16-NEXT: mov v1.d[1], x9 +; CHECK-SD-FP16-NEXT: mov h23, v0.h[1] +; CHECK-SD-FP16-NEXT: fcvtzu x9, h20 +; CHECK-SD-FP16-NEXT: mov v7.d[1], x12 +; CHECK-SD-FP16-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-SD-FP16-NEXT: mov h20, v6.h[3] +; CHECK-SD-FP16-NEXT: mov v22.d[1], x13 +; CHECK-SD-FP16-NEXT: mov h6, v6.h[1] +; CHECK-SD-FP16-NEXT: fcvtzu x10, h24 +; CHECK-SD-FP16-NEXT: stp q21, q1, [x8, #128] +; CHECK-SD-FP16-NEXT: mov h1, v5.h[1] +; CHECK-SD-FP16-NEXT: mov h5, v5.h[3] +; CHECK-SD-FP16-NEXT: fcvtzu x12, h20 +; CHECK-SD-FP16-NEXT: mov h20, v0.h[2] +; CHECK-SD-FP16-NEXT: fcvtzu x11, h0 +; CHECK-SD-FP16-NEXT: stp q22, q7, [x8, #64] +; CHECK-SD-FP16-NEXT: fmov d7, x9 +; CHECK-SD-FP16-NEXT: fcvtzu x9, h23 +; CHECK-SD-FP16-NEXT: mov h21, v4.h[3] +; CHECK-SD-FP16-NEXT: mov h22, v4.h[1] +; CHECK-SD-FP16-NEXT: fcvtzu x13, h6 +; CHECK-SD-FP16-NEXT: mov h6, v0.h[3] +; CHECK-SD-FP16-NEXT: fcvtzu x14, h5 +; CHECK-SD-FP16-NEXT: mov h0, v0.h[1] +; CHECK-SD-FP16-NEXT: mov v7.d[1], x10 +; CHECK-SD-FP16-NEXT: fcvtzu x10, h1 +; CHECK-SD-FP16-NEXT: mov v19.d[1], x12 +; CHECK-SD-FP16-NEXT: mov v18.d[1], x9 +; CHECK-SD-FP16-NEXT: fcvtzu x9, h4 +; CHECK-SD-FP16-NEXT: fcvtzu x12, h20 +; CHECK-SD-FP16-NEXT: fcvtzu x15, h21 +; CHECK-SD-FP16-NEXT: mov v16.d[1], x13 +; CHECK-SD-FP16-NEXT: fcvtzu x13, h22 +; CHECK-SD-FP16-NEXT: mov v17.d[1], x14 +; CHECK-SD-FP16-NEXT: fcvtzu x14, h6 +; CHECK-SD-FP16-NEXT: fmov d4, x11 +; CHECK-SD-FP16-NEXT: mov v3.d[1], x10 +; CHECK-SD-FP16-NEXT: fcvtzu x10, h0 +; CHECK-SD-FP16-NEXT: stp q18, q7, [x8] +; CHECK-SD-FP16-NEXT: fmov d0, x9 +; CHECK-SD-FP16-NEXT: fmov d1, x12 +; CHECK-SD-FP16-NEXT: stp q16, q19, [x8, #224] +; CHECK-SD-FP16-NEXT: mov v2.d[1], x15 +; CHECK-SD-FP16-NEXT: stp q3, q17, [x8, #160] +; CHECK-SD-FP16-NEXT: mov v0.d[1], x13 +; CHECK-SD-FP16-NEXT: mov v1.d[1], x14 +; CHECK-SD-FP16-NEXT: mov v4.d[1], x10 +; CHECK-SD-FP16-NEXT: stp q0, q2, [x8, #96] +; CHECK-SD-FP16-NEXT: stp q4, q1, [x8, #32] +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptou_v32f16_v32i64: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-GI-NOFP16-NEXT: ext v5.16b, v2.16b, v2.16b, #8 +; CHECK-GI-NOFP16-NEXT: ext v6.16b, v3.16b, v3.16b, #8 +; CHECK-GI-NOFP16-NEXT: ext v7.16b, v0.16b, v0.16b, #8 +; CHECK-GI-NOFP16-NEXT: fcvt s21, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s22, h2 +; CHECK-GI-NOFP16-NEXT: mov h26, v2.h[2] +; CHECK-GI-NOFP16-NEXT: fcvt s19, h0 +; CHECK-GI-NOFP16-NEXT: mov h27, v3.h[2] +; CHECK-GI-NOFP16-NEXT: mov h20, v2.h[1] +; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov h16, v4.h[2] +; CHECK-GI-NOFP16-NEXT: mov h17, v5.h[2] +; CHECK-GI-NOFP16-NEXT: fcvt s23, h5 +; CHECK-GI-NOFP16-NEXT: fcvt s24, h6 +; CHECK-GI-NOFP16-NEXT: mov h25, v6.h[2] +; CHECK-GI-NOFP16-NEXT: fcvtzu x9, s21 +; CHECK-GI-NOFP16-NEXT: fcvtzu x11, s22 +; CHECK-GI-NOFP16-NEXT: fcvt s22, h7 +; CHECK-GI-NOFP16-NEXT: mov h21, v3.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtzu x10, s19 +; CHECK-GI-NOFP16-NEXT: fcvt s27, h27 +; CHECK-GI-NOFP16-NEXT: fcvt s20, h20 +; CHECK-GI-NOFP16-NEXT: fcvt s16, h16 +; CHECK-GI-NOFP16-NEXT: fcvt s17, h17 +; CHECK-GI-NOFP16-NEXT: fcvtzu x12, s23 +; CHECK-GI-NOFP16-NEXT: fcvtzu x13, s24 +; CHECK-GI-NOFP16-NEXT: fcvt s23, h25 +; CHECK-GI-NOFP16-NEXT: fcvt s25, h26 +; CHECK-GI-NOFP16-NEXT: mov h26, v3.h[1] +; CHECK-GI-NOFP16-NEXT: mov h24, v2.h[3] +; CHECK-GI-NOFP16-NEXT: fmov d19, x9 +; CHECK-GI-NOFP16-NEXT: fcvtzu x9, s22 +; CHECK-GI-NOFP16-NEXT: fcvt s22, h3 +; CHECK-GI-NOFP16-NEXT: fcvt s21, h21 +; CHECK-GI-NOFP16-NEXT: fcvtzu x14, s16 +; CHECK-GI-NOFP16-NEXT: fcvtzu x15, s17 +; CHECK-GI-NOFP16-NEXT: fmov d2, x12 +; CHECK-GI-NOFP16-NEXT: fmov d16, x13 +; CHECK-GI-NOFP16-NEXT: fcvtzu x12, s23 +; CHECK-GI-NOFP16-NEXT: fcvtzu x13, s25 +; CHECK-GI-NOFP16-NEXT: mov h23, v1.h[2] +; CHECK-GI-NOFP16-NEXT: fcvt s25, h26 +; CHECK-GI-NOFP16-NEXT: fcvt s24, h24 +; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[3] +; CHECK-GI-NOFP16-NEXT: fmov d26, x11 +; CHECK-GI-NOFP16-NEXT: fcvtzu x11, s21 +; CHECK-GI-NOFP16-NEXT: fmov d3, x14 +; CHECK-GI-NOFP16-NEXT: fmov d17, x15 +; CHECK-GI-NOFP16-NEXT: fcvtzu x14, s22 +; CHECK-GI-NOFP16-NEXT: fcvtzu x15, s27 +; CHECK-GI-NOFP16-NEXT: mov h22, v0.h[2] +; CHECK-GI-NOFP16-NEXT: fcvt s18, h18 +; CHECK-GI-NOFP16-NEXT: fcvt s21, h23 +; CHECK-GI-NOFP16-NEXT: fmov d23, x13 +; CHECK-GI-NOFP16-NEXT: fcvtzu x13, s25 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fmov d25, x14 +; CHECK-GI-NOFP16-NEXT: fcvtzu x14, s24 +; CHECK-GI-NOFP16-NEXT: fmov d24, x15 +; CHECK-GI-NOFP16-NEXT: fcvt s22, h22 +; CHECK-GI-NOFP16-NEXT: fcvtzu x15, s18 +; CHECK-GI-NOFP16-NEXT: mov h18, v7.h[1] +; CHECK-GI-NOFP16-NEXT: mov v25.d[1], x13 +; CHECK-GI-NOFP16-NEXT: fcvtzu x13, s21 +; CHECK-GI-NOFP16-NEXT: mov h21, v7.h[2] +; CHECK-GI-NOFP16-NEXT: mov v24.d[1], x11 +; CHECK-GI-NOFP16-NEXT: fcvtzu x11, s20 +; CHECK-GI-NOFP16-NEXT: mov h20, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3] +; CHECK-GI-NOFP16-NEXT: mov v23.d[1], x14 +; CHECK-GI-NOFP16-NEXT: fcvtzu x14, s1 +; CHECK-GI-NOFP16-NEXT: mov h1, v6.h[3] +; CHECK-GI-NOFP16-NEXT: mov h6, v6.h[1] +; CHECK-GI-NOFP16-NEXT: mov v19.d[1], x15 +; CHECK-GI-NOFP16-NEXT: mov h7, v7.h[3] +; CHECK-GI-NOFP16-NEXT: stp q25, q24, [x8, #192] +; CHECK-GI-NOFP16-NEXT: fmov d24, x13 +; CHECK-GI-NOFP16-NEXT: fcvt s20, h20 +; CHECK-GI-NOFP16-NEXT: mov v26.d[1], x11 +; CHECK-GI-NOFP16-NEXT: fcvtzu x11, s22 +; CHECK-GI-NOFP16-NEXT: mov h22, v5.h[1] +; CHECK-GI-NOFP16-NEXT: mov h5, v5.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: mov v24.d[1], x14 +; CHECK-GI-NOFP16-NEXT: mov h25, v4.h[3] +; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 +; CHECK-GI-NOFP16-NEXT: stp q26, q23, [x8, #128] +; CHECK-GI-NOFP16-NEXT: fmov d23, x12 +; CHECK-GI-NOFP16-NEXT: fcvtzu x12, s20 +; CHECK-GI-NOFP16-NEXT: mov h20, v4.h[1] +; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 +; CHECK-GI-NOFP16-NEXT: fcvtzu x13, s0 +; CHECK-GI-NOFP16-NEXT: stp q19, q24, [x8, #64] +; CHECK-GI-NOFP16-NEXT: fcvt s22, h22 +; CHECK-GI-NOFP16-NEXT: fmov d0, x10 +; CHECK-GI-NOFP16-NEXT: fmov d19, x11 +; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 +; CHECK-GI-NOFP16-NEXT: fcvtzu x10, s1 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h21 +; CHECK-GI-NOFP16-NEXT: fcvt s24, h25 +; CHECK-GI-NOFP16-NEXT: fcvtzu x11, s6 +; CHECK-GI-NOFP16-NEXT: fcvt s20, h20 +; CHECK-GI-NOFP16-NEXT: fcvt s6, h7 +; CHECK-GI-NOFP16-NEXT: fcvtzu x14, s5 +; CHECK-GI-NOFP16-NEXT: mov v19.d[1], x13 +; CHECK-GI-NOFP16-NEXT: fcvt s5, h18 +; CHECK-GI-NOFP16-NEXT: fcvtzu x13, s22 +; CHECK-GI-NOFP16-NEXT: mov v0.d[1], x12 +; CHECK-GI-NOFP16-NEXT: fcvtzu x12, s4 +; CHECK-GI-NOFP16-NEXT: mov v23.d[1], x10 +; CHECK-GI-NOFP16-NEXT: fcvtzu x10, s1 +; CHECK-GI-NOFP16-NEXT: fcvtzu x15, s24 +; CHECK-GI-NOFP16-NEXT: mov v16.d[1], x11 +; CHECK-GI-NOFP16-NEXT: fcvtzu x11, s20 +; CHECK-GI-NOFP16-NEXT: mov v17.d[1], x14 +; CHECK-GI-NOFP16-NEXT: fcvtzu x14, s6 +; CHECK-GI-NOFP16-NEXT: mov v2.d[1], x13 +; CHECK-GI-NOFP16-NEXT: fcvtzu x13, s5 +; CHECK-GI-NOFP16-NEXT: fmov d4, x9 +; CHECK-GI-NOFP16-NEXT: stp q0, q19, [x8] +; CHECK-GI-NOFP16-NEXT: fmov d0, x12 +; CHECK-GI-NOFP16-NEXT: stp q16, q23, [x8, #224] +; CHECK-GI-NOFP16-NEXT: fmov d1, x10 +; CHECK-GI-NOFP16-NEXT: mov v3.d[1], x15 +; CHECK-GI-NOFP16-NEXT: stp q2, q17, [x8, #160] +; CHECK-GI-NOFP16-NEXT: mov v0.d[1], x11 +; CHECK-GI-NOFP16-NEXT: mov v4.d[1], x13 +; CHECK-GI-NOFP16-NEXT: mov v1.d[1], x14 +; CHECK-GI-NOFP16-NEXT: stp q0, q3, [x8, #96] +; CHECK-GI-NOFP16-NEXT: stp q4, q1, [x8, #32] +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptou_v32f16_v32i64: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-GI-FP16-NEXT: ext v5.16b, v2.16b, v2.16b, #8 +; CHECK-GI-FP16-NEXT: ext v6.16b, v3.16b, v3.16b, #8 +; CHECK-GI-FP16-NEXT: mov h16, v3.h[2] +; CHECK-GI-FP16-NEXT: fcvtzu x9, h0 +; CHECK-GI-FP16-NEXT: mov h23, v3.h[3] +; CHECK-GI-FP16-NEXT: mov h25, v3.h[1] +; CHECK-GI-FP16-NEXT: fcvtzu x15, h3 +; CHECK-GI-FP16-NEXT: mov h24, v2.h[2] +; CHECK-GI-FP16-NEXT: mov h19, v1.h[2] +; CHECK-GI-FP16-NEXT: mov h21, v2.h[1] +; CHECK-GI-FP16-NEXT: mov h26, v2.h[3] +; CHECK-GI-FP16-NEXT: mov h17, v4.h[2] +; CHECK-GI-FP16-NEXT: mov h18, v5.h[2] +; CHECK-GI-FP16-NEXT: mov h22, v6.h[2] +; CHECK-GI-FP16-NEXT: fcvtzu x10, h5 +; CHECK-GI-FP16-NEXT: fcvtzu x12, h16 +; CHECK-GI-FP16-NEXT: fcvtzu x11, h6 +; CHECK-GI-FP16-NEXT: mov h7, v1.h[1] +; CHECK-GI-FP16-NEXT: mov h20, v1.h[3] +; CHECK-GI-FP16-NEXT: fcvtzu x13, h17 +; CHECK-GI-FP16-NEXT: fcvtzu x14, h18 +; CHECK-GI-FP16-NEXT: fmov d18, x9 +; CHECK-GI-FP16-NEXT: fcvtzu x9, h22 +; CHECK-GI-FP16-NEXT: fmov d3, x10 +; CHECK-GI-FP16-NEXT: fcvtzu x10, h23 +; CHECK-GI-FP16-NEXT: fmov d22, x12 +; CHECK-GI-FP16-NEXT: fcvtzu x12, h25 +; CHECK-GI-FP16-NEXT: fmov d23, x15 +; CHECK-GI-FP16-NEXT: fmov d16, x11 +; CHECK-GI-FP16-NEXT: fcvtzu x11, h2 +; CHECK-GI-FP16-NEXT: fcvtzu x15, h21 +; CHECK-GI-FP16-NEXT: fmov d2, x13 +; CHECK-GI-FP16-NEXT: fcvtzu x13, h24 +; CHECK-GI-FP16-NEXT: fmov d17, x14 +; CHECK-GI-FP16-NEXT: fcvtzu x14, h19 +; CHECK-GI-FP16-NEXT: mov v22.d[1], x10 +; CHECK-GI-FP16-NEXT: fcvtzu x10, h1 +; CHECK-GI-FP16-NEXT: mov v23.d[1], x12 +; CHECK-GI-FP16-NEXT: fmov d19, x9 +; CHECK-GI-FP16-NEXT: fcvtzu x9, h26 +; CHECK-GI-FP16-NEXT: fcvtzu x12, h20 +; CHECK-GI-FP16-NEXT: mov h20, v0.h[2] +; CHECK-GI-FP16-NEXT: fmov d21, x11 +; CHECK-GI-FP16-NEXT: fmov d1, x13 +; CHECK-GI-FP16-NEXT: fcvtzu x13, h7 +; CHECK-GI-FP16-NEXT: mov h24, v0.h[3] +; CHECK-GI-FP16-NEXT: fmov d7, x14 +; CHECK-GI-FP16-NEXT: stp q23, q22, [x8, #192] +; CHECK-GI-FP16-NEXT: fmov d22, x10 +; CHECK-GI-FP16-NEXT: mov v21.d[1], x15 +; CHECK-GI-FP16-NEXT: mov v1.d[1], x9 +; CHECK-GI-FP16-NEXT: mov h23, v0.h[1] +; CHECK-GI-FP16-NEXT: fcvtzu x9, h20 +; CHECK-GI-FP16-NEXT: mov v7.d[1], x12 +; CHECK-GI-FP16-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-GI-FP16-NEXT: mov h20, v6.h[3] +; CHECK-GI-FP16-NEXT: mov v22.d[1], x13 +; CHECK-GI-FP16-NEXT: mov h6, v6.h[1] +; CHECK-GI-FP16-NEXT: fcvtzu x10, h24 +; CHECK-GI-FP16-NEXT: stp q21, q1, [x8, #128] +; CHECK-GI-FP16-NEXT: mov h1, v5.h[1] +; CHECK-GI-FP16-NEXT: mov h5, v5.h[3] +; CHECK-GI-FP16-NEXT: fcvtzu x12, h20 +; CHECK-GI-FP16-NEXT: mov h20, v0.h[2] +; CHECK-GI-FP16-NEXT: fcvtzu x11, h0 +; CHECK-GI-FP16-NEXT: stp q22, q7, [x8, #64] +; CHECK-GI-FP16-NEXT: fmov d7, x9 +; CHECK-GI-FP16-NEXT: fcvtzu x9, h23 +; CHECK-GI-FP16-NEXT: mov h21, v4.h[3] +; CHECK-GI-FP16-NEXT: mov h22, v4.h[1] +; CHECK-GI-FP16-NEXT: fcvtzu x13, h6 +; CHECK-GI-FP16-NEXT: mov h6, v0.h[3] +; CHECK-GI-FP16-NEXT: fcvtzu x14, h5 +; CHECK-GI-FP16-NEXT: mov h0, v0.h[1] +; CHECK-GI-FP16-NEXT: mov v7.d[1], x10 +; CHECK-GI-FP16-NEXT: fcvtzu x10, h1 +; CHECK-GI-FP16-NEXT: mov v19.d[1], x12 +; CHECK-GI-FP16-NEXT: mov v18.d[1], x9 +; CHECK-GI-FP16-NEXT: fcvtzu x9, h4 +; CHECK-GI-FP16-NEXT: fcvtzu x12, h20 +; CHECK-GI-FP16-NEXT: fcvtzu x15, h21 +; CHECK-GI-FP16-NEXT: mov v16.d[1], x13 +; CHECK-GI-FP16-NEXT: fcvtzu x13, h22 +; CHECK-GI-FP16-NEXT: mov v17.d[1], x14 +; CHECK-GI-FP16-NEXT: fcvtzu x14, h6 +; CHECK-GI-FP16-NEXT: fmov d4, x11 +; CHECK-GI-FP16-NEXT: mov v3.d[1], x10 +; CHECK-GI-FP16-NEXT: fcvtzu x10, h0 +; CHECK-GI-FP16-NEXT: stp q18, q7, [x8] +; CHECK-GI-FP16-NEXT: fmov d0, x9 +; CHECK-GI-FP16-NEXT: fmov d1, x12 +; CHECK-GI-FP16-NEXT: stp q16, q19, [x8, #224] +; CHECK-GI-FP16-NEXT: mov v2.d[1], x15 +; CHECK-GI-FP16-NEXT: stp q3, q17, [x8, #160] +; CHECK-GI-FP16-NEXT: mov v0.d[1], x13 +; CHECK-GI-FP16-NEXT: mov v1.d[1], x14 +; CHECK-GI-FP16-NEXT: mov v4.d[1], x10 +; CHECK-GI-FP16-NEXT: stp q0, q2, [x8, #96] +; CHECK-GI-FP16-NEXT: stp q4, q1, [x8, #32] +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptoui <32 x half> %a to <32 x i64> + ret <32 x i64> %c +} + +define <2 x i32> @fptos_v2f16_v2i32(<2 x half> %a) { +; CHECK-LABEL: fptos_v2f16_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret +entry: + %c = fptosi <2 x half> %a to <2 x i32> + ret <2 x i32> %c +} + +define <2 x i32> @fptou_v2f16_v2i32(<2 x half> %a) { +; CHECK-LABEL: fptou_v2f16_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret +entry: + %c = fptoui <2 x half> %a to <2 x i32> + ret <2 x i32> %c +} + +define <3 x i32> @fptos_v3f16_v3i32(<3 x half> %a) { +; CHECK-LABEL: fptos_v3f16_v3i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret +entry: + %c = fptosi <3 x half> %a to <3 x i32> + ret <3 x i32> %c +} + +define <3 x i32> @fptou_v3f16_v3i32(<3 x half> %a) { +; CHECK-LABEL: fptou_v3f16_v3i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret +entry: + %c = fptoui <3 x half> %a to <3 x i32> + ret <3 x i32> %c +} + +define <4 x i32> @fptos_v4f16_v4i32(<4 x half> %a) { +; CHECK-LABEL: fptos_v4f16_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret +entry: + %c = fptosi <4 x half> %a to <4 x i32> + ret <4 x i32> %c +} + +define <4 x i32> @fptou_v4f16_v4i32(<4 x half> %a) { +; CHECK-LABEL: fptou_v4f16_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret +entry: + %c = fptoui <4 x half> %a to <4 x i32> + ret <4 x i32> %c +} + +define <8 x i32> @fptos_v8f16_v8i32(<8 x half> %a) { +; CHECK-LABEL: fptos_v8f16_v8i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret +entry: + %c = fptosi <8 x half> %a to <8 x i32> + ret <8 x i32> %c +} + +define <8 x i32> @fptou_v8f16_v8i32(<8 x half> %a) { +; CHECK-LABEL: fptou_v8f16_v8i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret +entry: + %c = fptoui <8 x half> %a to <8 x i32> + ret <8 x i32> %c +} + +define <16 x i32> @fptos_v16f16_v16i32(<16 x half> %a) { +; CHECK-LABEL: fptos_v16f16_v16i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v2.4s, v0.4h +; CHECK-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-NEXT: fcvtl2 v4.4s, v1.8h +; CHECK-NEXT: fcvtl v5.4s, v1.4h +; CHECK-NEXT: fcvtzs v0.4s, v2.4s +; CHECK-NEXT: fcvtzs v1.4s, v3.4s +; CHECK-NEXT: fcvtzs v3.4s, v4.4s +; CHECK-NEXT: fcvtzs v2.4s, v5.4s +; CHECK-NEXT: ret +entry: + %c = fptosi <16 x half> %a to <16 x i32> + ret <16 x i32> %c +} + +define <16 x i32> @fptou_v16f16_v16i32(<16 x half> %a) { +; CHECK-LABEL: fptou_v16f16_v16i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v2.4s, v0.4h +; CHECK-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-NEXT: fcvtl2 v4.4s, v1.8h +; CHECK-NEXT: fcvtl v5.4s, v1.4h +; CHECK-NEXT: fcvtzu v0.4s, v2.4s +; CHECK-NEXT: fcvtzu v1.4s, v3.4s +; CHECK-NEXT: fcvtzu v3.4s, v4.4s +; CHECK-NEXT: fcvtzu v2.4s, v5.4s +; CHECK-NEXT: ret +entry: + %c = fptoui <16 x half> %a to <16 x i32> + ret <16 x i32> %c +} + +define <32 x i32> @fptos_v32f16_v32i32(<32 x half> %a) { +; CHECK-LABEL: fptos_v32f16_v32i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl2 v4.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtl2 v5.4s, v1.8h +; CHECK-NEXT: fcvtl v6.4s, v1.4h +; CHECK-NEXT: fcvtl v7.4s, v2.4h +; CHECK-NEXT: fcvtl2 v16.4s, v2.8h +; CHECK-NEXT: fcvtl2 v17.4s, v3.8h +; CHECK-NEXT: fcvtl v18.4s, v3.4h +; CHECK-NEXT: fcvtzs v1.4s, v4.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v3.4s, v5.4s +; CHECK-NEXT: fcvtzs v2.4s, v6.4s +; CHECK-NEXT: fcvtzs v4.4s, v7.4s +; CHECK-NEXT: fcvtzs v5.4s, v16.4s +; CHECK-NEXT: fcvtzs v7.4s, v17.4s +; CHECK-NEXT: fcvtzs v6.4s, v18.4s +; CHECK-NEXT: ret +entry: + %c = fptosi <32 x half> %a to <32 x i32> + ret <32 x i32> %c +} + +define <32 x i32> @fptou_v32f16_v32i32(<32 x half> %a) { +; CHECK-LABEL: fptou_v32f16_v32i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl2 v4.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtl2 v5.4s, v1.8h +; CHECK-NEXT: fcvtl v6.4s, v1.4h +; CHECK-NEXT: fcvtl v7.4s, v2.4h +; CHECK-NEXT: fcvtl2 v16.4s, v2.8h +; CHECK-NEXT: fcvtl2 v17.4s, v3.8h +; CHECK-NEXT: fcvtl v18.4s, v3.4h +; CHECK-NEXT: fcvtzu v1.4s, v4.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fcvtzu v3.4s, v5.4s +; CHECK-NEXT: fcvtzu v2.4s, v6.4s +; CHECK-NEXT: fcvtzu v4.4s, v7.4s +; CHECK-NEXT: fcvtzu v5.4s, v16.4s +; CHECK-NEXT: fcvtzu v7.4s, v17.4s +; CHECK-NEXT: fcvtzu v6.4s, v18.4s +; CHECK-NEXT: ret +entry: + %c = fptoui <32 x half> %a to <32 x i32> + ret <32 x i32> %c +} + +define <2 x i16> @fptos_v2f16_v2i16(<2 x half> %a) { +; CHECK-LABEL: fptos_v2f16_v2i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret +entry: + %c = fptosi <2 x half> %a to <2 x i16> + ret <2 x i16> %c +} + +define <2 x i16> @fptou_v2f16_v2i16(<2 x half> %a) { +; CHECK-LABEL: fptou_v2f16_v2i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret +entry: + %c = fptoui <2 x half> %a to <2 x i16> + ret <2 x i16> %c +} + +define <3 x i16> @fptos_v3f16_v3i16(<3 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptos_v3f16_v3i16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_v3f16_v3i16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptos_v3f16_v3i16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptos_v3f16_v3i16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptosi <3 x half> %a to <3 x i16> + ret <3 x i16> %c +} + +define <3 x i16> @fptou_v3f16_v3i16(<3 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptou_v3f16_v3i16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_v3f16_v3i16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptou_v3f16_v3i16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptou_v3f16_v3i16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptoui <3 x half> %a to <3 x i16> + ret <3 x i16> %c +} + +define <4 x i16> @fptos_v4f16_v4i16(<4 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptos_v4f16_v4i16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_v4f16_v4i16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptos_v4f16_v4i16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptos_v4f16_v4i16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptosi <4 x half> %a to <4 x i16> + ret <4 x i16> %c +} + +define <4 x i16> @fptou_v4f16_v4i16(<4 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptou_v4f16_v4i16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_v4f16_v4i16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptou_v4f16_v4i16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptou_v4f16_v4i16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptoui <4 x half> %a to <4 x i16> + ret <4 x i16> %c +} + +define <8 x i16> @fptos_v8f16_v8i16(<8 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptos_v8f16_v8i16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_v8f16_v8i16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptos_v8f16_v8i16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptos_v8f16_v8i16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptosi <8 x half> %a to <8 x i16> + ret <8 x i16> %c +} + +define <8 x i16> @fptou_v8f16_v8i16(<8 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptou_v8f16_v8i16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_v8f16_v8i16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptou_v8f16_v8i16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptou_v8f16_v8i16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptoui <8 x half> %a to <8 x i16> + ret <8 x i16> %c +} + +define <16 x i16> @fptos_v16f16_v16i16(<16 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptos_v16f16_v16i16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v1.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-SD-NOFP16-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v3.8h +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_v16f16_v16i16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-SD-FP16-NEXT: fcvtzs v1.8h, v1.8h +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptos_v16f16_v16i16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v3.8h +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptos_v16f16_v16i16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-GI-FP16-NEXT: fcvtzs v1.8h, v1.8h +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptosi <16 x half> %a to <16 x i16> + ret <16 x i16> %c +} + +define <16 x i16> @fptou_v16f16_v16i16(<16 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptou_v16f16_v16i16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v1.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-SD-NOFP16-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v3.4s, v3.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v3.8h +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_v16f16_v16i16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-SD-FP16-NEXT: fcvtzu v1.8h, v1.8h +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptou_v16f16_v16i16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v3.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v3.8h +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptou_v16f16_v16i16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-GI-FP16-NEXT: fcvtzu v1.8h, v1.8h +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptoui <16 x half> %a to <16 x i16> + ret <16 x i16> %c +} + +define <32 x i16> @fptos_v32f16_v32i16(<32 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptos_v32f16_v32i16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl2 v4.4s, v0.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtl2 v5.4s, v1.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-SD-NOFP16-NEXT: fcvtl2 v6.4s, v2.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v2.4s, v2.4h +; CHECK-SD-NOFP16-NEXT: fcvtl2 v7.4s, v3.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v3.4h +; CHECK-SD-NOFP16-NEXT: fcvtzs v4.4s, v4.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v5.4s, v5.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v6.4s, v6.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v7.4s, v7.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v4.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v5.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v2.8h, v2.8h, v6.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v3.8h, v3.8h, v7.8h +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_v32f16_v32i16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-SD-FP16-NEXT: fcvtzs v1.8h, v1.8h +; CHECK-SD-FP16-NEXT: fcvtzs v2.8h, v2.8h +; CHECK-SD-FP16-NEXT: fcvtzs v3.8h, v3.8h +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptos_v32f16_v32i16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl2 v4.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v5.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v6.4s, v2.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v7.4s, v3.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h +; CHECK-GI-NOFP16-NEXT: fcvtzs v4.4s, v4.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v5.4s, v5.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v6.4s, v6.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v7.4s, v7.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v4.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v5.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v2.8h, v2.8h, v6.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v3.8h, v3.8h, v7.8h +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptos_v32f16_v32i16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-GI-FP16-NEXT: fcvtzs v1.8h, v1.8h +; CHECK-GI-FP16-NEXT: fcvtzs v2.8h, v2.8h +; CHECK-GI-FP16-NEXT: fcvtzs v3.8h, v3.8h +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptosi <32 x half> %a to <32 x i16> + ret <32 x i16> %c +} + +define <32 x i16> @fptou_v32f16_v32i16(<32 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptou_v32f16_v32i16: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl2 v4.4s, v0.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtl2 v5.4s, v1.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-SD-NOFP16-NEXT: fcvtl2 v6.4s, v2.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v2.4s, v2.4h +; CHECK-SD-NOFP16-NEXT: fcvtl2 v7.4s, v3.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v3.4h +; CHECK-SD-NOFP16-NEXT: fcvtzu v4.4s, v4.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v5.4s, v5.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v6.4s, v6.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v7.4s, v7.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v3.4s, v3.4s +; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v4.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v5.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v2.8h, v2.8h, v6.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v3.8h, v3.8h, v7.8h +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_v32f16_v32i16: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-SD-FP16-NEXT: fcvtzu v1.8h, v1.8h +; CHECK-SD-FP16-NEXT: fcvtzu v2.8h, v2.8h +; CHECK-SD-FP16-NEXT: fcvtzu v3.8h, v3.8h +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptou_v32f16_v32i16: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl2 v4.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v5.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v6.4s, v2.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v7.4s, v3.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h +; CHECK-GI-NOFP16-NEXT: fcvtzu v4.4s, v4.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v5.4s, v5.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v6.4s, v6.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v7.4s, v7.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v3.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v4.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v5.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v2.8h, v2.8h, v6.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v3.8h, v3.8h, v7.8h +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptou_v32f16_v32i16: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-GI-FP16-NEXT: fcvtzu v1.8h, v1.8h +; CHECK-GI-FP16-NEXT: fcvtzu v2.8h, v2.8h +; CHECK-GI-FP16-NEXT: fcvtzu v3.8h, v3.8h +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptoui <32 x half> %a to <32 x i16> + ret <32 x i16> %c +} + +define <2 x i8> @fptos_v2f16_v2i8(<2 x half> %a) { +; CHECK-LABEL: fptos_v2f16_v2i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret +entry: + %c = fptosi <2 x half> %a to <2 x i8> + ret <2 x i8> %c +} + +define <2 x i8> @fptou_v2f16_v2i8(<2 x half> %a) { +; CHECK-LABEL: fptou_v2f16_v2i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret +entry: + %c = fptoui <2 x half> %a to <2 x i8> + ret <2 x i8> %c +} + +define <3 x i8> @fptos_v3f16_v3i8(<3 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptos_v3f16_v3i8: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NOFP16-NEXT: umov w0, v0.h[0] +; CHECK-SD-NOFP16-NEXT: umov w1, v0.h[1] +; CHECK-SD-NOFP16-NEXT: umov w2, v0.h[2] +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_v3f16_v3i8: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-SD-FP16-NEXT: umov w0, v0.h[0] +; CHECK-SD-FP16-NEXT: umov w1, v0.h[1] +; CHECK-SD-FP16-NEXT: umov w2, v0.h[2] +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptos_v3f16_v3i8: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: umov w0, v0.h[0] +; CHECK-GI-NOFP16-NEXT: umov w1, v0.h[1] +; CHECK-GI-NOFP16-NEXT: umov w2, v0.h[2] +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptos_v3f16_v3i8: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-GI-FP16-NEXT: umov w0, v0.h[0] +; CHECK-GI-FP16-NEXT: umov w1, v0.h[1] +; CHECK-GI-FP16-NEXT: umov w2, v0.h[2] +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptosi <3 x half> %a to <3 x i8> + ret <3 x i8> %c +} + +define <3 x i8> @fptou_v3f16_v3i8(<3 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptou_v3f16_v3i8: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NOFP16-NEXT: umov w0, v0.h[0] +; CHECK-SD-NOFP16-NEXT: umov w1, v0.h[1] +; CHECK-SD-NOFP16-NEXT: umov w2, v0.h[2] +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_v3f16_v3i8: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-SD-FP16-NEXT: umov w0, v0.h[0] +; CHECK-SD-FP16-NEXT: umov w1, v0.h[1] +; CHECK-SD-FP16-NEXT: umov w2, v0.h[2] +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptou_v3f16_v3i8: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: umov w0, v0.h[0] +; CHECK-GI-NOFP16-NEXT: umov w1, v0.h[1] +; CHECK-GI-NOFP16-NEXT: umov w2, v0.h[2] +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptou_v3f16_v3i8: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-GI-FP16-NEXT: umov w0, v0.h[0] +; CHECK-GI-FP16-NEXT: umov w1, v0.h[1] +; CHECK-GI-FP16-NEXT: umov w2, v0.h[2] +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptoui <3 x half> %a to <3 x i8> + ret <3 x i8> %c +} + +define <4 x i8> @fptos_v4f16_v4i8(<4 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptos_v4f16_v4i8: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_v4f16_v4i8: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptos_v4f16_v4i8: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptos_v4f16_v4i8: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptosi <4 x half> %a to <4 x i8> + ret <4 x i8> %c +} + +define <4 x i8> @fptou_v4f16_v4i8(<4 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptou_v4f16_v4i8: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_v4f16_v4i8: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptou_v4f16_v4i8: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptou_v4f16_v4i8: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptoui <4 x half> %a to <4 x i8> + ret <4 x i8> %c +} + +define <8 x i8> @fptos_v8f16_v8i8(<8 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptos_v8f16_v8i8: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-SD-NOFP16-NEXT: xtn v0.8b, v0.8h +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_v8f16_v8i8: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-SD-FP16-NEXT: xtn v0.8b, v0.8h +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptos_v8f16_v8i8: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-GI-NOFP16-NEXT: xtn v0.8b, v0.8h +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptos_v8f16_v8i8: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-GI-FP16-NEXT: xtn v0.8b, v0.8h +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptosi <8 x half> %a to <8 x i8> + ret <8 x i8> %c +} + +define <8 x i8> @fptou_v8f16_v8i8(<8 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptou_v8f16_v8i8: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-SD-NOFP16-NEXT: xtn v0.8b, v0.8h +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_v8f16_v8i8: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-SD-FP16-NEXT: xtn v0.8b, v0.8h +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptou_v8f16_v8i8: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-GI-NOFP16-NEXT: xtn v0.8b, v0.8h +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptou_v8f16_v8i8: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-GI-FP16-NEXT: xtn v0.8b, v0.8h +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptoui <8 x half> %a to <8 x i8> + ret <8 x i8> %c +} + +define <16 x i8> @fptos_v16f16_v16i8(<16 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptos_v16f16_v16i8: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v1.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v2.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v3.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_v16f16_v16i8: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzs v1.8h, v1.8h +; CHECK-SD-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-SD-FP16-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptos_v16f16_v16i8: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v2.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v3.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptos_v16f16_v16i8: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzs v1.8h, v1.8h +; CHECK-GI-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-GI-FP16-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptosi <16 x half> %a to <16 x i8> + ret <16 x i8> %c +} + +define <16 x i8> @fptou_v16f16_v16i8(<16 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptou_v16f16_v16i8: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v1.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v3.4s, v3.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v2.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v3.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_v16f16_v16i8: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzu v1.8h, v1.8h +; CHECK-SD-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-SD-FP16-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptou_v16f16_v16i8: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v3.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v2.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v3.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptou_v16f16_v16i8: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzu v1.8h, v1.8h +; CHECK-GI-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-GI-FP16-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptoui <16 x half> %a to <16 x i8> + ret <16 x i8> %c +} + +define <32 x i8> @fptos_v32f16_v32i8(<32 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptos_v32f16_v32i8: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl2 v4.4s, v1.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-SD-NOFP16-NEXT: fcvtl2 v5.4s, v0.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtl2 v6.4s, v3.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v3.4h +; CHECK-SD-NOFP16-NEXT: fcvtl2 v7.4s, v2.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v2.4s, v2.4h +; CHECK-SD-NOFP16-NEXT: fcvtzs v4.4s, v4.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v5.4s, v5.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v6.4s, v6.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v7.4s, v7.4s +; CHECK-SD-NOFP16-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-SD-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v4.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v5.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v3.8h, v3.8h, v6.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v2.8h, v2.8h, v7.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-SD-NOFP16-NEXT: uzp1 v1.16b, v2.16b, v3.16b +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptos_v32f16_v32i8: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzs v1.8h, v1.8h +; CHECK-SD-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-SD-FP16-NEXT: fcvtzs v3.8h, v3.8h +; CHECK-SD-FP16-NEXT: fcvtzs v2.8h, v2.8h +; CHECK-SD-FP16-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-SD-FP16-NEXT: uzp1 v1.16b, v2.16b, v3.16b +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptos_v32f16_v32i8: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl2 v4.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v5.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v6.4s, v3.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v7.4s, v2.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h +; CHECK-GI-NOFP16-NEXT: fcvtzs v4.4s, v4.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v5.4s, v5.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v6.4s, v6.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v7.4s, v7.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v4.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v5.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v3.8h, v3.8h, v6.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v2.8h, v2.8h, v7.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-GI-NOFP16-NEXT: uzp1 v1.16b, v2.16b, v3.16b +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptos_v32f16_v32i8: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzs v1.8h, v1.8h +; CHECK-GI-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-GI-FP16-NEXT: fcvtzs v3.8h, v3.8h +; CHECK-GI-FP16-NEXT: fcvtzs v2.8h, v2.8h +; CHECK-GI-FP16-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-GI-FP16-NEXT: uzp1 v1.16b, v2.16b, v3.16b +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptosi <32 x half> %a to <32 x i8> + ret <32 x i8> %c +} + +define <32 x i8> @fptou_v32f16_v32i8(<32 x half> %a) { +; CHECK-SD-NOFP16-LABEL: fptou_v32f16_v32i8: +; CHECK-SD-NOFP16: // %bb.0: // %entry +; CHECK-SD-NOFP16-NEXT: fcvtl2 v4.4s, v1.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-SD-NOFP16-NEXT: fcvtl2 v5.4s, v0.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-SD-NOFP16-NEXT: fcvtl2 v6.4s, v3.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v3.4h +; CHECK-SD-NOFP16-NEXT: fcvtl2 v7.4s, v2.8h +; CHECK-SD-NOFP16-NEXT: fcvtl v2.4s, v2.4h +; CHECK-SD-NOFP16-NEXT: fcvtzu v4.4s, v4.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v5.4s, v5.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v6.4s, v6.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v3.4s, v3.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v7.4s, v7.4s +; CHECK-SD-NOFP16-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-SD-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v4.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v5.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v3.8h, v3.8h, v6.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v2.8h, v2.8h, v7.8h +; CHECK-SD-NOFP16-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-SD-NOFP16-NEXT: uzp1 v1.16b, v2.16b, v3.16b +; CHECK-SD-NOFP16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fptou_v32f16_v32i8: +; CHECK-SD-FP16: // %bb.0: // %entry +; CHECK-SD-FP16-NEXT: fcvtzu v1.8h, v1.8h +; CHECK-SD-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-SD-FP16-NEXT: fcvtzu v3.8h, v3.8h +; CHECK-SD-FP16-NEXT: fcvtzu v2.8h, v2.8h +; CHECK-SD-FP16-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-SD-FP16-NEXT: uzp1 v1.16b, v2.16b, v3.16b +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptou_v32f16_v32i8: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvtl2 v4.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v5.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v6.4s, v3.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v7.4s, v2.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h +; CHECK-GI-NOFP16-NEXT: fcvtzu v4.4s, v4.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v5.4s, v5.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v6.4s, v6.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v3.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v7.4s, v7.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v4.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v5.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v3.8h, v3.8h, v6.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v2.8h, v2.8h, v7.8h +; CHECK-GI-NOFP16-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-GI-NOFP16-NEXT: uzp1 v1.16b, v2.16b, v3.16b +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptou_v32f16_v32i8: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzu v1.8h, v1.8h +; CHECK-GI-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-GI-FP16-NEXT: fcvtzu v3.8h, v3.8h +; CHECK-GI-FP16-NEXT: fcvtzu v2.8h, v2.8h +; CHECK-GI-FP16-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-GI-FP16-NEXT: uzp1 v1.16b, v2.16b, v3.16b +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptoui <32 x half> %a to <32 x i8> + ret <32 x i8> %c +} From 4266815f4d82bd7571bf6ae85eb15fcc0b3ae37e Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 17 Oct 2023 18:41:23 +0100 Subject: [PATCH 363/720] [AArch64] Convert negative constant aarch64_neon_sshl to VASHR (#68918) In replacing shifts by splat with constant shifts, we can handle negative shifts by flipping the sign and using a VASHR or VLSHR. --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 11 ++++++++--- llvm/test/CodeGen/AArch64/arm64-vshift.ll | 13 ++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 64d00dafd835b..a16a102e472e7 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -19100,9 +19100,14 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) { case Intrinsic::aarch64_neon_sshl: case Intrinsic::aarch64_neon_ushl: // For positive shift amounts we can use SHL, as ushl/sshl perform a regular - // left shift for positive shift amounts. Below, we only replace the current - // node with VSHL, if this condition is met. - Opcode = AArch64ISD::VSHL; + // left shift for positive shift amounts. For negative shifts we can use a + // VASHR/VLSHR as appropiate. + if (ShiftAmount < 0) { + Opcode = IID == Intrinsic::aarch64_neon_sshl ? AArch64ISD::VASHR + : AArch64ISD::VLSHR; + ShiftAmount = -ShiftAmount; + } else + Opcode = AArch64ISD::VSHL; IsRightShift = false; break; } diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll index 367c3be242a17..1dfd977186b0e 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll @@ -2130,9 +2130,8 @@ define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind { ; CHECK-LABEL: neon.ushll4s_neg_constant_shift: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: movi.2d v1, #0xffffffffffffffff ; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ushl.4s v0, v0, v1 +; CHECK-NEXT: ushr.4s v0, v0, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> @@ -2250,9 +2249,8 @@ define <16 x i8> @neon.sshl16b_non_splat_constant_shift(ptr %A) nounwind { define <16 x i8> @neon.sshl16b_neg_constant_shift(ptr %A) nounwind { ; CHECK-LABEL: neon.sshl16b_neg_constant_shift: ; CHECK: // %bb.0: -; CHECK-NEXT: movi.16b v1, #254 ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sshl.16b v0, v0, v1 +; CHECK-NEXT: sshr.16b v0, v0, #2 ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) @@ -2300,9 +2298,8 @@ define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind { ; CHECK-LABEL: neon.sshll4s_neg_constant_shift: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: movi.2d v1, #0xffffffffffffffff ; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: sshl.4s v0, v0, v1 +; CHECK-NEXT: sshr.4s v0, v0, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> @@ -2377,10 +2374,8 @@ define i64 @neon.sshll_scalar_constant_shift_m1(ptr %A) nounwind { ; CHECK-LABEL: neon.sshll_scalar_constant_shift_m1: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff -; CHECK-NEXT: fmov d1, x9 ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: sshl d0, d0, d1 +; CHECK-NEXT: sshr d0, d0, #1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1 = load i32, ptr %A From 66775f8ccdcc8264ef349518e1c59d96d4227823 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 16 Oct 2023 12:50:29 -0700 Subject: [PATCH 364/720] [SLP]Fix PR69196: Instruction does not dominate all uses During emission of the postponed gathers, need to insert them before user instruction to avoid use before definition crash. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 113 +++++++++++------- .../non-scheduled-inst-reused-as-last-inst.ll | 45 +++++++ 2 files changed, 112 insertions(+), 46 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 6a9bdc26bc88f..32ddd82d9adbd 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2477,11 +2477,15 @@ class BoUpSLP { bool ResizeAllowed = false) const; /// Vectorize a single entry in the tree. - Value *vectorizeTree(TreeEntry *E); + /// \param PostponedPHIs true, if need to postpone emission of phi nodes to + /// avoid issues with def-use order. + Value *vectorizeTree(TreeEntry *E, bool PostponedPHIs); /// Vectorize a single entry in the tree, the \p Idx-th operand of the entry /// \p E. - Value *vectorizeOperand(TreeEntry *E, unsigned NodeIdx); + /// \param PostponedPHIs true, if need to postpone emission of phi nodes to + /// avoid issues with def-use order. + Value *vectorizeOperand(TreeEntry *E, unsigned NodeIdx, bool PostponedPHIs); /// Create a new vector from a list of scalar values. Produces a sequence /// which exploits values reused across lanes, and arranges the inserts @@ -2644,6 +2648,9 @@ class BoUpSLP { /// The Scalars are vectorized into this value. It is initialized to Null. WeakTrackingVH VectorizedValue = nullptr; + /// New vector phi instructions emitted for the vectorized phi nodes. + PHINode *PHI = nullptr; + /// Do we need to gather this sequence or vectorize it /// (either with vector instruction or with scatter/gather /// intrinsics for store/load)? @@ -9991,7 +9998,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { } }; -Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) { +Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx, + bool PostponedPHIs) { ValueList &VL = E->getOperand(NodeIdx); if (E->State == TreeEntry::PossibleStridedVectorize && !E->ReorderIndices.empty()) { @@ -10040,7 +10048,7 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) { ShuffleBuilder.add(V, Mask); return ShuffleBuilder.finalize(std::nullopt); }; - Value *V = vectorizeTree(VE); + Value *V = vectorizeTree(VE, PostponedPHIs); if (VF != cast(V->getType())->getNumElements()) { if (!VE->ReuseShuffleIndices.empty()) { // Reshuffle to get only unique values. @@ -10113,14 +10121,7 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) { assert(I->get()->UserTreeIndices.size() == 1 && "Expected only single user for the gather node."); assert(I->get()->isSame(VL) && "Expected same list of scalars."); - IRBuilder<>::InsertPointGuard Guard(Builder); - if (E->getOpcode() != Instruction::InsertElement && - E->getOpcode() != Instruction::PHI) { - Instruction *LastInst = &getLastInstructionInBundle(E); - assert(LastInst && "Failed to find last instruction in bundle"); - Builder.SetInsertPoint(LastInst->getParent(), LastInst->getIterator()); - } - return vectorizeTree(I->get()); + return vectorizeTree(I->get(), PostponedPHIs); } template @@ -10480,10 +10481,12 @@ Value *BoUpSLP::createBuildVector(const TreeEntry *E) { *this); } -Value *BoUpSLP::vectorizeTree(TreeEntry *E) { +Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { IRBuilder<>::InsertPointGuard Guard(Builder); - if (E->VectorizedValue) { + if (E->VectorizedValue && + (E->State != TreeEntry::Vectorize || E->getOpcode() != Instruction::PHI || + E->isAltShuffle())) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n"); return E->VectorizedValue; } @@ -10530,21 +10533,32 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { E != VectorizableTree.front().get() || !E->UserTreeIndices.empty()) && "PHI reordering is free."); + if (PostponedPHIs && E->VectorizedValue) + return E->VectorizedValue; auto *PH = cast(VL0); Builder.SetInsertPoint(PH->getParent(), PH->getParent()->getFirstNonPHIIt()); Builder.SetCurrentDebugLocation(PH->getDebugLoc()); - PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues()); - Value *V = NewPhi; - - // Adjust insertion point once all PHI's have been generated. - Builder.SetInsertPoint(PH->getParent(), - PH->getParent()->getFirstInsertionPt()); - Builder.SetCurrentDebugLocation(PH->getDebugLoc()); + if (PostponedPHIs || !E->VectorizedValue) { + PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues()); + E->PHI = NewPhi; + Value *V = NewPhi; + + // Adjust insertion point once all PHI's have been generated. + Builder.SetInsertPoint(PH->getParent(), + PH->getParent()->getFirstInsertionPt()); + Builder.SetCurrentDebugLocation(PH->getDebugLoc()); - V = FinalShuffle(V, E); + V = FinalShuffle(V, E); - E->VectorizedValue = V; + E->VectorizedValue = V; + if (PostponedPHIs) + return V; + } + PHINode *NewPhi = cast(E->PHI); + // If phi node is fully emitted - exit. + if (NewPhi->getNumIncomingValues() != 0) + return NewPhi; // PHINodes may have multiple entries from the same block. We want to // visit every block once. @@ -10557,7 +10571,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // Stop emission if all incoming values are generated. if (NewPhi->getNumIncomingValues() == PH->getNumIncomingValues()) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); - return V; + return NewPhi; } if (!VisitedBBs.insert(IBB).second) { @@ -10567,13 +10581,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Builder.SetInsertPoint(IBB->getTerminator()); Builder.SetCurrentDebugLocation(PH->getDebugLoc()); - Value *Vec = vectorizeOperand(E, i); + Value *Vec = vectorizeOperand(E, i, /*PostponedPHIs=*/true); NewPhi->addIncoming(Vec, IBB); } assert(NewPhi->getNumIncomingValues() == PH->getNumIncomingValues() && "Invalid number of incoming values"); - return V; + return NewPhi; } case Instruction::ExtractElement: { @@ -10596,7 +10610,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::InsertElement: { assert(E->ReuseShuffleIndices.empty() && "All inserts should be unique"); Builder.SetInsertPoint(cast(E->Scalars.back())); - Value *V = vectorizeOperand(E, 1); + Value *V = vectorizeOperand(E, 1, PostponedPHIs); // Create InsertVector shuffle if necessary auto *FirstInsert = cast(*find_if(E->Scalars, [E](Value *V) { @@ -10754,7 +10768,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::BitCast: { setInsertPointAfterBundle(E); - Value *InVec = vectorizeOperand(E, 0); + Value *InVec = vectorizeOperand(E, 0, PostponedPHIs); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; @@ -10772,12 +10786,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::ICmp: { setInsertPointAfterBundle(E); - Value *L = vectorizeOperand(E, 0); + Value *L = vectorizeOperand(E, 0, PostponedPHIs); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; } - Value *R = vectorizeOperand(E, 1); + Value *R = vectorizeOperand(E, 1, PostponedPHIs); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; @@ -10795,17 +10809,17 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::Select: { setInsertPointAfterBundle(E); - Value *Cond = vectorizeOperand(E, 0); + Value *Cond = vectorizeOperand(E, 0, PostponedPHIs); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; } - Value *True = vectorizeOperand(E, 1); + Value *True = vectorizeOperand(E, 1, PostponedPHIs); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; } - Value *False = vectorizeOperand(E, 2); + Value *False = vectorizeOperand(E, 2, PostponedPHIs); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; @@ -10821,7 +10835,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::FNeg: { setInsertPointAfterBundle(E); - Value *Op = vectorizeOperand(E, 0); + Value *Op = vectorizeOperand(E, 0, PostponedPHIs); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); @@ -10861,12 +10875,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::Xor: { setInsertPointAfterBundle(E); - Value *LHS = vectorizeOperand(E, 0); + Value *LHS = vectorizeOperand(E, 0, PostponedPHIs); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; } - Value *RHS = vectorizeOperand(E, 1); + Value *RHS = vectorizeOperand(E, 1, PostponedPHIs); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; @@ -10911,7 +10925,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { assert((E->State == TreeEntry::ScatterVectorize || E->State == TreeEntry::PossibleStridedVectorize) && "Unhandled state"); - Value *VecPtr = vectorizeOperand(E, 0); + Value *VecPtr = vectorizeOperand(E, 0, PostponedPHIs); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; @@ -10935,7 +10949,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { setInsertPointAfterBundle(E); - Value *VecValue = vectorizeOperand(E, 0); + Value *VecValue = vectorizeOperand(E, 0, PostponedPHIs); VecValue = FinalShuffle(VecValue, E); Value *Ptr = SI->getPointerOperand(); @@ -10963,7 +10977,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { auto *GEP0 = cast(VL0); setInsertPointAfterBundle(E); - Value *Op0 = vectorizeOperand(E, 0); + Value *Op0 = vectorizeOperand(E, 0, PostponedPHIs); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; @@ -10971,7 +10985,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { SmallVector OpVecs; for (int J = 1, N = GEP0->getNumOperands(); J < N; ++J) { - Value *OpVec = vectorizeOperand(E, J); + Value *OpVec = vectorizeOperand(E, J, PostponedPHIs); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; @@ -11030,7 +11044,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { continue; } - Value *OpVec = vectorizeOperand(E, j); + Value *OpVec = vectorizeOperand(E, j, PostponedPHIs); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; @@ -11087,15 +11101,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *LHS = nullptr, *RHS = nullptr; if (Instruction::isBinaryOp(E->getOpcode()) || isa(VL0)) { setInsertPointAfterBundle(E); - LHS = vectorizeOperand(E, 0); + LHS = vectorizeOperand(E, 0, PostponedPHIs); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; } - RHS = vectorizeOperand(E, 1); + RHS = vectorizeOperand(E, 1, PostponedPHIs); } else { setInsertPointAfterBundle(E); - LHS = vectorizeOperand(E, 0); + LHS = vectorizeOperand(E, 0, PostponedPHIs); } if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); @@ -11197,7 +11211,14 @@ Value *BoUpSLP::vectorizeTree( else Builder.SetInsertPoint(&F->getEntryBlock(), F->getEntryBlock().begin()); - auto *VectorRoot = vectorizeTree(VectorizableTree[0].get()); + // Postpone emission of PHIs operands to avoid cyclic dependencies issues. + auto *VectorRoot = + vectorizeTree(VectorizableTree[0].get(), /*PostponedPHIs=*/true); + for (const std::unique_ptr &TE : VectorizableTree) + if (TE->State == TreeEntry::Vectorize && + TE->getOpcode() == Instruction::PHI && !TE->isAltShuffle() && + TE->VectorizedValue) + (void)vectorizeTree(TE.get(), /*PostponedPHIs=*/false); // Run through the list of postponed gathers and emit them, replacing the temp // emitted allocas with actual vector instructions. ArrayRef PostponedNodes = PostponedGathers.getArrayRef(); @@ -11216,7 +11237,7 @@ Value *BoUpSLP::vectorizeTree( cast(TE->UserTreeIndices.front().UserTE->VectorizedValue); Builder.SetInsertPoint(PrevVec); Builder.SetCurrentDebugLocation(UserI->getDebugLoc()); - Value *Vec = vectorizeTree(TE); + Value *Vec = vectorizeTree(TE, /*PostponedPHIs=*/false); PrevVec->replaceAllUsesWith(Vec); PostponedValues.try_emplace(Vec).first->second.push_back(TE); // Replace the stub vector node, if it was used before for one of the diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll new file mode 100644 index 0000000000000..3a9eca2bf2e6b --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-9999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define void @foo() { +; CHECK-LABEL: define void @foo() { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 0, i32 0 +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0 +; CHECK-NEXT: [[CALL:%.*]] = call i64 null(i32 [[TMP7]]) +; CHECK-NEXT: br label [[BB4]] +; CHECK: bb4: +; CHECK-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1]] +; CHECK: bb5: +; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP4]], [[BB4]] ] +; CHECK-NEXT: ret void +; +bb: + br label %bb1 + +bb1: + %phi = phi i32 [ 0, %bb ], [ %or, %bb4 ] + %phi2 = phi i32 [ 0, %bb ], [ %or3, %bb4 ] + %and = and i32 0, 0 + %shl = shl i32 %phi, %and + %or = or i32 %shl, 0 + %call = call i64 null(i32 %or) + %or3 = or i32 %phi2, 0 + br label %bb4 + +bb4: + br i1 false, label %bb5, label %bb1 + +bb5: + %phi6 = phi i32 [ %shl, %bb4 ] + %phi7 = phi i32 [ %or3, %bb4 ] + ret void +} From a22a1fe151b8198ddc5cd4963f1e3f8e23b57114 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 17 Oct 2023 10:53:22 -0700 Subject: [PATCH 365/720] [AMDGPU] support 64-bit immediates in SIInstrInfo::FoldImmediate (#69260) This is a part of https://github.com/llvm/llvm-project/issues/67781. Until we select more 64-bit move immediates the impact is minimal. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 53 ++-- llvm/lib/Target/AMDGPU/SIInstructions.td | 1 + .../ipra-return-address-save-restore.ll | 8 +- .../test/CodeGen/AMDGPU/peephole-fold-imm.mir | 227 +++++++++++++++++- .../AMDGPU/promote-constOffset-to-imm.ll | 6 +- 5 files changed, 270 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 51397cbb79146..2ad07550c7639 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3203,11 +3203,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, switch (DefMI.getOpcode()) { default: return false; + case AMDGPU::V_MOV_B64_e32: case AMDGPU::S_MOV_B64: - // TODO: We could fold 64-bit immediates, but this get complicated - // when there are sub-registers. - return false; - + case AMDGPU::V_MOV_B64_PSEUDO: + case AMDGPU::S_MOV_B64_IMM_PSEUDO: case AMDGPU::V_MOV_B32_e32: case AMDGPU::S_MOV_B32: case AMDGPU::V_ACCVGPR_WRITE_B32_e64: @@ -3220,19 +3219,45 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (!ImmOp->isImm()) return false; + auto getImmFor = [ImmOp](const MachineOperand &UseOp) -> int64_t { + int64_t Imm = ImmOp->getImm(); + switch (UseOp.getSubReg()) { + default: + return Imm; + case AMDGPU::sub0: + return Lo_32(Imm); + case AMDGPU::sub1: + return Hi_32(Imm); + case AMDGPU::lo16: + return APInt(16, Imm).getSExtValue(); + case AMDGPU::hi16: + return APInt(32, Imm).ashr(16).getSExtValue(); + case AMDGPU::sub1_lo16: + return APInt(16, Hi_32(Imm)).getSExtValue(); + case AMDGPU::sub1_hi16: + return APInt(32, Hi_32(Imm)).ashr(16).getSExtValue(); + } + }; + + assert(!DefMI.getOperand(0).getSubReg() && "Expected SSA form"); + unsigned Opc = UseMI.getOpcode(); if (Opc == AMDGPU::COPY) { + assert(!UseMI.getOperand(0).getSubReg() && "Expected SSA form"); + Register DstReg = UseMI.getOperand(0).getReg(); - bool Is16Bit = getOpSize(UseMI, 0) == 2; + unsigned OpSize = getOpSize(UseMI, 0); + bool Is16Bit = OpSize == 2; + bool Is64Bit = OpSize == 8; bool isVGPRCopy = RI.isVGPR(*MRI, DstReg); - unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32; - APInt Imm(32, ImmOp->getImm()); - - if (UseMI.getOperand(1).getSubReg() == AMDGPU::hi16) - Imm = Imm.ashr(16); + unsigned NewOpc = isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO + : AMDGPU::V_MOV_B32_e32 + : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO + : AMDGPU::S_MOV_B32; + APInt Imm(Is64Bit ? 64 : 32, getImmFor(UseMI.getOperand(1))); if (RI.isAGPR(*MRI, DstReg)) { - if (!isInlineConstant(Imm)) + if (Is64Bit || !isInlineConstant(Imm)) return false; NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64; } @@ -3317,7 +3342,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (NewOpc == AMDGPU::V_FMAMK_F16_t16) return false; - const int64_t Imm = ImmOp->getImm(); + const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1); // FIXME: This would be a lot easier if we could return a new instruction // instead of having to modify in place. @@ -3401,8 +3426,6 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (NewOpc == AMDGPU::V_FMAAK_F16_t16) return false; - const int64_t Imm = ImmOp->getImm(); - // FIXME: This would be a lot easier if we could return a new instruction // instead of having to modify in place. @@ -3413,7 +3436,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); // ChangingToImmediate adds Src2 back to the instruction. - Src2->ChangeToImmediate(Imm); + Src2->ChangeToImmediate(getImmFor(*Src2)); // These come before src2. removeModOperands(UseMI); diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 9fdd6f04d2a0f..567f1b812c180 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -151,6 +151,7 @@ def S_MOV_B64_IMM_PSEUDO : SPseudoInstSI <(outs SReg_64:$sdst), let SchedRW = [WriteSALU, Write64Bit]; let Size = 16; // Needs maximum 2 s_mov_b32 instructions 8 byte long each. let Uses = []; + let UseNamedOperandTable = 1; } // Pseudoinstruction for @llvm.amdgcn.wqm. It is turned into a copy after the diff --git a/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll b/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll index ef3c95b17598f..741164bc04506 100644 --- a/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll +++ b/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll @@ -28,12 +28,10 @@ declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture) #1 ; Function Attrs: norecurse define internal fastcc void @svm_node_closure_bsdf(ptr addrspace(1) %sd, ptr %stack, <4 x i32> %node, ptr %offset, i32 %0, i8 %trunc, float %1, float %2, float %mul80, i1 %cmp412.old, <4 x i32> %3, float %4, i32 %5, i1 %cmp440, i1 %cmp442, i1 %or.cond1306, float %.op, ptr addrspace(1) %arrayidx.i.i2202, ptr addrspace(1) %retval.0.i.i22089, ptr addrspace(1) %retval.1.i221310, i1 %cmp575, ptr addrspace(1) %num_closure_left.i2215, i32 %6, i1 %cmp.i2216, i32 %7, i64 %idx.ext.i2223, i32 %sub5.i2221) #2 { ; GCN-LABEL: {{^}}svm_node_closure_bsdf: -; GCN-DAG: v_writelane_b32 [[CSR_VGPR:v[0-9]+]], s30, -; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, -; GCN: s_movk_i32 s30, 0x60 +; GCN-NOT: v_writelane_b32 +; GCN: s_movk_i32 s28, 0x60 ; GCN-NOT: s31 -; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]], -; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]], +; GCN-NOT: v_readlane_b32 ; GCN: s_waitcnt vmcnt(0) ; GCN: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir index 4a77c03a8facd..ade192bde4dca 100644 --- a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir +++ b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefix=GCN %s --- name: fold_simm_virtual @@ -119,3 +119,228 @@ body: | SI_RETURN_TO_EPILOG $vgpr0_lo16 ... + +--- +name: fold_sreg_64_sub0_to_vgpr_32 +body: | + bb.0: + + ; GCN-LABEL: name: fold_sreg_64_sub0_to_vgpr_32 + ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200 + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1412567312, implicit $exec + ; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_MOV_B32_e32_]] + %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200 + %1:vgpr_32 = COPY killed %0.sub0 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_sreg_64_sub1_to_vgpr_32 +body: | + bb.0: + + ; GCN-LABEL: name: fold_sreg_64_sub1_to_vgpr_32 + ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1311768467750121200 + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 305419896, implicit $exec + ; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_MOV_B32_e32_]] + %0:sreg_64 = S_MOV_B64 1311768467750121200 + %1:vgpr_32 = COPY killed %0.sub1 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_vreg_64_sub1_to_vgpr_32 +body: | + bb.0: + + ; GCN-LABEL: name: fold_vreg_64_sub1_to_vgpr_32 + ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 305419896, implicit $exec + ; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_MOV_B32_e32_]] + %0:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec + %1:vgpr_32 = COPY killed %0.sub1 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_sreg_64_to_vreg_64 +body: | + bb.0: + + ; GCN-LABEL: name: fold_sreg_64_to_vreg_64 + ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200 + ; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec + ; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_MOV_B]] + %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200 + %1:vreg_64_align2 = COPY killed %0 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_sreg_64_to_sreg_64 +body: | + bb.0: + + ; GCN-LABEL: name: fold_sreg_64_to_sreg_64 + ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1311768467750121200 + ; GCN-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200 + ; GCN-NEXT: SI_RETURN_TO_EPILOG [[S_MOV_B]] + %0:sreg_64 = S_MOV_B64 1311768467750121200 + %1:sreg_64 = COPY killed %0 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_sreg_64_lo16_to_sgpr_lo16 +body: | + bb.0: + + ; GCN-LABEL: name: fold_sreg_64_lo16_to_sgpr_lo16 + ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1125912791875585 + ; GCN-NEXT: $sgpr0 = S_MOV_B32 1 + ; GCN-NEXT: SI_RETURN_TO_EPILOG $sgpr0_lo16 + %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 1125912791875585 + $sgpr0_lo16 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG $sgpr0_lo16 + +... + +--- +name: fold_sreg_64_hi16_to_sgpr_lo16 +body: | + bb.0: + + ; GCN-LABEL: name: fold_sreg_64_hi16_to_sgpr_lo16 + ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1125912791875585 + ; GCN-NEXT: $sgpr0 = S_MOV_B32 2 + ; GCN-NEXT: SI_RETURN_TO_EPILOG $sgpr0_lo16 + %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 1125912791875585 + $sgpr0_lo16 = COPY killed %0.hi16 + SI_RETURN_TO_EPILOG $sgpr0_lo16 + +... + +--- +name: fold_sreg_64_sub1_lo16_to_sgpr_lo16 +body: | + bb.0: + + ; GCN-LABEL: name: fold_sreg_64_sub1_lo16_to_sgpr_lo16 + ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1125912791875585 + ; GCN-NEXT: $sgpr0 = S_MOV_B32 3 + ; GCN-NEXT: SI_RETURN_TO_EPILOG $sgpr0_lo16 + %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 1125912791875585 + $sgpr0_lo16 = COPY killed %0.sub1_lo16 + SI_RETURN_TO_EPILOG $sgpr0_lo16 + +... + +--- +name: fold_sreg_64_sub1_hi16_to_sgpr_lo16 +body: | + bb.0: + + ; GCN-LABEL: name: fold_sreg_64_sub1_hi16_to_sgpr_lo16 + ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1125912791875585 + ; GCN-NEXT: $sgpr0 = S_MOV_B32 4 + ; GCN-NEXT: SI_RETURN_TO_EPILOG $sgpr0_lo16 + %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 1125912791875585 + $sgpr0_lo16 = COPY killed %0.sub1_hi16 + SI_RETURN_TO_EPILOG $sgpr0_lo16 + +... + +--- +name: fmac_sreg_64_sub0_src0_to_fmamk +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: fmac_sreg_64_sub0_src0_to_fmamk + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[V_FMAMK_F32_:%[0-9]+]]:vgpr_32 = V_FMAMK_F32 [[DEF]], 2882399984, [[DEF1]], implicit $mode, implicit $exec + ; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMAMK_F32_]] + %0:vgpr_32 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200 + %3:vgpr_32 = V_FMAC_F32_e64 0, %2.sub0, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec + SI_RETURN_TO_EPILOG %3 +... + +--- +name: fmac_sreg_64_sub1_src0_to_fmamk +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: fmac_sreg_64_sub1_src0_to_fmamk + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[V_FMAMK_F32_:%[0-9]+]]:vgpr_32 = V_FMAMK_F32 [[DEF]], 305419896, [[DEF1]], implicit $mode, implicit $exec + ; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMAMK_F32_]] + %0:vgpr_32 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200 + %3:vgpr_32 = V_FMAC_F32_e64 0, %2.sub1, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec + SI_RETURN_TO_EPILOG %3 +... + +--- +name: fmac_sreg_64_sub1_src1_to_fmaak +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: fmac_sreg_64_sub1_src1_to_fmaak + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[V_FMAMK_F32_:%[0-9]+]]:vgpr_32 = V_FMAMK_F32 [[DEF]], 305419896, [[DEF1]], implicit $mode, implicit $exec + ; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMAMK_F32_]] + %0:vgpr_32 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200 + %3:vgpr_32 = V_FMAC_F32_e64 0, %0, 0, %2.sub1, 0, %1, 0, 0, implicit $mode, implicit $exec + SI_RETURN_TO_EPILOG %3 +... + +--- +name: fma_sreg_64_sub0_to_fmaak +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: fma_sreg_64_sub0_to_fmaak + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[V_FMAAK_F32_:%[0-9]+]]:vgpr_32 = V_FMAAK_F32 [[DEF]], [[DEF1]], 2882399984, implicit $mode, implicit $exec + ; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMAAK_F32_]] + %0:vgpr_32 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200 + %3:vgpr_32 = V_FMA_F32_e64 0, %0, 0, %1, 0, %2.sub0, 0, 0, implicit $mode, implicit $exec + SI_RETURN_TO_EPILOG %3 +... + +--- +name: fma_sreg_64_sub1_to_fmaak +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: fma_sreg_64_sub1_to_fmaak + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[V_FMAAK_F32_:%[0-9]+]]:vgpr_32 = V_FMAAK_F32 [[DEF]], [[DEF1]], 305419896, implicit $mode, implicit $exec + ; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMAAK_F32_]] + %0:vgpr_32 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200 + %3:vgpr_32 = V_FMA_F32_e64 0, %0, 0, %1, 0, %2.sub1, 0, 0, implicit $mode, implicit $exec + SI_RETURN_TO_EPILOG %3 +... diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll index a462c19ce645d..17b387be79258 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll @@ -573,8 +573,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX900-NEXT: v_mov_b32_e32 v3, s35 ; GFX900-NEXT: v_add_co_u32_e32 v1, vcc, s34, v1 ; GFX900-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v3, vcc -; GFX900-NEXT: s_movk_i32 s0, 0x5000 -; GFX900-NEXT: v_add_co_u32_e32 v1, vcc, s0, v1 +; GFX900-NEXT: v_add_co_u32_e32 v1, vcc, 0x5000, v1 ; GFX900-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc ; GFX900-NEXT: s_movk_i32 s2, 0x7f @@ -805,8 +804,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX90A-NEXT: v_mov_b32_e32 v2, s35 ; GFX90A-NEXT: v_add_co_u32_e32 v1, vcc, s34, v1 ; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v2, vcc -; GFX90A-NEXT: s_movk_i32 s0, 0x5000 -; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, s0, v1 +; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, 0x5000, v1 ; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX90A-NEXT: s_movk_i32 s2, 0x7f ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], 0, 0 From 761c9dd92789b744607dc9c8c5071fef340fd86f Mon Sep 17 00:00:00 2001 From: Peiming Liu <36770114+PeimingLiu@users.noreply.github.com> Date: Tue, 17 Oct 2023 10:54:44 -0700 Subject: [PATCH 366/720] [mlir][sparse] implementating stageSparseOpPass as an interface (#69022) --- .../Dialect/SparseTensor/IR/CMakeLists.txt | 6 + .../Dialect/SparseTensor/IR/SparseTensor.h | 1 + .../SparseTensor/IR/SparseTensorInterfaces.h | 31 +++ .../SparseTensor/IR/SparseTensorInterfaces.td | 45 ++++ .../SparseTensor/IR/SparseTensorOps.td | 18 +- .../Dialect/SparseTensor/IR/CMakeLists.txt | 1 + .../SparseTensor/IR/SparseTensorDialect.cpp | 31 ++- .../IR/SparseTensorInterfaces.cpp | 55 +++++ .../Transforms/SparseTensorRewriting.cpp | 225 +++++++----------- .../Transforms/StageSparseOperations.cpp | 53 +---- .../llvm-project-overlay/mlir/BUILD.bazel | 30 +++ 11 files changed, 299 insertions(+), 197 deletions(-) create mode 100644 mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.h create mode 100644 mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.td create mode 100644 mlir/lib/Dialect/SparseTensor/IR/SparseTensorInterfaces.cpp diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/SparseTensor/IR/CMakeLists.txt index 25a2e4869cc78..54ad9491cce51 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/CMakeLists.txt @@ -12,3 +12,9 @@ set(LLVM_TARGET_DEFINITIONS SparseTensorTypes.td) mlir_tablegen(SparseTensorTypes.h.inc -gen-typedef-decls) mlir_tablegen(SparseTensorTypes.cpp.inc -gen-typedef-defs) add_public_tablegen_target(MLIRSparseTensorTypesIncGen) + +set(LLVM_TARGET_DEFINITIONS SparseTensorInterfaces.td) +mlir_tablegen(SparseTensorInterfaces.h.inc -gen-op-interface-decls) +mlir_tablegen(SparseTensorInterfaces.cpp.inc -gen-op-interface-defs) +add_public_tablegen_target(MLIRSparseTensorInterfacesIncGen) +add_dependencies(mlir-headers MLIRSparseTensorInterfacesIncGen) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h index 3eb9ce010cb00..cbca0a7f8cc0e 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h @@ -11,6 +11,7 @@ #include "mlir/Bytecode/BytecodeOpInterface.h" #include "mlir/Dialect/SparseTensor/IR/Enums.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.h new file mode 100644 index 0000000000000..ebbc522123a59 --- /dev/null +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.h @@ -0,0 +1,31 @@ +//===- SparseTensorInterfaces.h - sparse tensor operations +//interfaces-------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_SPARSETENSOR_IR_SPARSETENSORINTERFACES_H_ +#define MLIR_DIALECT_SPARSETENSOR_IR_SPARSETENSORINTERFACES_H_ + +#include "mlir/IR/OpDefinition.h" + +namespace mlir { +class PatternRewriter; + +namespace sparse_tensor { +class StageWithSortSparseOp; + +namespace detail { +LogicalResult stageWithSortImpl(sparse_tensor::StageWithSortSparseOp op, + PatternRewriter &rewriter); +} // namespace detail +} // namespace sparse_tensor +} // namespace mlir + +/// Include the generated interface declarations. +#include "mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.h.inc" + +#endif // MLIR_DIALECT_SPARSETENSOR_IR_SPARSETENSORINTERFACES_H_ diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.td new file mode 100644 index 0000000000000..1379363ff75f4 --- /dev/null +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.td @@ -0,0 +1,45 @@ +//===- SparseTensorInterfaces.td --------------------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SPARSETENSOR_IR_SPARSETENSORINTERFACES +#define SPARSETENSOR_IR_SPARSETENSORINTERFACES + +include "mlir/IR/OpBase.td" + +def StageWithSortSparseOpInterface : OpInterface<"StageWithSortSparseOp"> { + let description = [{ + A stage-with-sort sparse tensor operation is an operation that produces + unordered intermediate output. An extra sort is required to obtain the final + ordered result. + + E.g., convert csr -> csc need to be implemented as + convert csr -> unordered coo -> sort by column -> csc; and + concatenate csr, csc -> csr can be staged into + concatenate csr, csr -> unordered coo -> sort by row -> csr. + }]; + let cppNamespace = "::mlir::sparse_tensor"; + let methods = [ + InterfaceMethod< + /*desc=*/"Return true if the operation needs an extra sort to produce the final result.", + /*retTy=*/"bool", + /*methodName=*/"needsExtraSort", + /*args=*/(ins), + /*methodBody=*/"">, + InterfaceMethod< + /*desc=*/"Stage the operation, return the final result value after staging.", + /*retTy=*/"::mlir::LogicalResult", + /*methodName=*/"stageWithSort", + /*args=*/(ins "::mlir::PatternRewriter &":$rewriter), + /*methodBody=*/[{ + return detail::stageWithSortImpl($_op, rewriter); + }]>, + ]; +} + + +#endif // SPARSETENSOR_IR_SPARSETENSORINTERFACES diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td index 9016634fa3be8..3d1807094797e 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -12,6 +12,7 @@ include "mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td" include "mlir/Dialect/SparseTensor/IR/SparseTensorBase.td" include "mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td" +include "mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.td" include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" @@ -153,7 +154,7 @@ def SparseTensor_DisassembleOp : SparseTensor_Op<"disassemble", [Pure, SameVaria } def SparseTensor_ConvertOp : SparseTensor_Op<"convert", - [Pure]>, + [Pure, StageWithSortSparseOpInterface]>, Arguments<(ins AnyTensor:$source)>, Results<(outs AnyTensor:$dest)> { string summary = "Converts between different tensor types"; @@ -197,9 +198,9 @@ def SparseTensor_ConvertOp : SparseTensor_Op<"convert", }]; let extraClassDeclaration = [{ - // Whether the convert can be done by a single step (either a sort or a foreach), - // or it would require a tmp buffer (sort, then foreach). - bool directConvertable(); + // Whether the convert can be done by a single step or it would require + // an extra sort. Inherited from StageWithSortSparseOpInterface. + bool needsExtraSort(); }]; let assemblyFormat = "$source attr-dict `:` type($source) `to` type($dest)"; @@ -334,7 +335,8 @@ def SparseTensor_NumberOfEntriesOp : SparseTensor_Op<"number_of_entries", [Pure] let assemblyFormat = "$tensor attr-dict `:` type($tensor)"; } -def SparseTensor_ConcatenateOp : SparseTensor_Op<"concatenate", [Pure]>, +def SparseTensor_ConcatenateOp : SparseTensor_Op<"concatenate", + [Pure, StageWithSortSparseOpInterface]>, Arguments<(ins Variadic:$inputs, DimensionAttr:$dimension)>, Results<(outs AnyRankedTensor:$result)> { @@ -357,6 +359,12 @@ def SparseTensor_ConcatenateOp : SparseTensor_Op<"concatenate", [Pure]>, ``` }]; + let extraClassDeclaration = [{ + // Whether the concatenate can be done by a single step or it would require + // an extra sort. Inherited from StageWithSortSparseOpInterface. + bool needsExtraSort(); + }]; + let assemblyFormat = "$inputs attr-dict `:` type($inputs) `to` type($result)"; let hasVerifier = 1; } diff --git a/mlir/lib/Dialect/SparseTensor/IR/CMakeLists.txt b/mlir/lib/Dialect/SparseTensor/IR/CMakeLists.txt index b22194d45062a..dd6f1037f71b5 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/SparseTensor/IR/CMakeLists.txt @@ -29,6 +29,7 @@ endif() add_mlir_dialect_library(MLIRSparseTensorDialect SparseTensorDialect.cpp + SparseTensorInterfaces.cpp Detail/Var.cpp Detail/DimLvlMap.cpp Detail/LvlTypeParser.cpp diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 61522fb0dcd24..cd1e585438dda 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -1065,18 +1065,18 @@ OpFoldResult ConvertOp::fold(FoldAdaptor adaptor) { return {}; } -bool ConvertOp::directConvertable() { +bool ConvertOp::needsExtraSort() { SparseTensorType srcStt = getSparseTensorType(getSource()); SparseTensorType dstStt = getSparseTensorType(getDest()); - // We can always directly convert to unordered sparse tensor or dense tensor - // since dense tensor support random access. + // We do not need an extra sort when returning unordered sparse tensors or + // dense tensor since dense tensor support random access. if (dstStt.isAllDense() || !dstStt.isAllOrdered()) - return true; + return false; if (srcStt.isAllOrdered() && dstStt.isAllOrdered() && srcStt.hasSameDimToLvl(dstStt)) { - return true; + return false; } // Source and dest tensors are ordered in different ways. We only do direct @@ -1086,9 +1086,9 @@ bool ConvertOp::directConvertable() { // performance. if (auto constOp = getSource().getDefiningOp()) if (isa(constOp.getValue())) - return true; + return false; - return false; + return true; } LogicalResult ToPositionsOp::verify() { @@ -1248,6 +1248,23 @@ LogicalResult UnaryOp::verify() { return success(); } +bool ConcatenateOp::needsExtraSort() { + SparseTensorType dstStt = getSparseTensorType(*this); + if (dstStt.isAllDense() || !dstStt.isAllOrdered()) + return false; + + bool allSameOrdered = llvm::all_of(getInputs(), [dstStt](Value op) { + return getSparseTensorType(op).hasSameDimToLvl(dstStt); + }); + // TODO: When conDim != 0, as long as conDim corresponding to the first level + // in all input/output buffers, and all input/output buffers have the same + // dimToLvl, the tmp COO buffer is still unnecessary (e.g, concatenate + // CSC matrices along column). + bool directLowerable = + allSameOrdered && getDimension() == 0 && dstStt.isIdentity(); + return !directLowerable; +} + LogicalResult ConcatenateOp::verify() { const auto dstTp = getSparseTensorType(*this); const Dimension concatDim = getDimension(); diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorInterfaces.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorInterfaces.cpp new file mode 100644 index 0000000000000..d8769eacc44f3 --- /dev/null +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorInterfaces.cpp @@ -0,0 +1,55 @@ +//===- SparseTensorInterfaces.cpp - SparseTensor interfaces impl ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" +#include "mlir/IR/PatternMatch.h" + +using namespace mlir; +using namespace mlir::sparse_tensor; + +#include "mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.cpp.inc" + +LogicalResult +sparse_tensor::detail::stageWithSortImpl(StageWithSortSparseOp op, + PatternRewriter &rewriter) { + if (!op.needsExtraSort()) + return failure(); + + Location loc = op.getLoc(); + Type finalTp = op->getOpResult(0).getType(); + SparseTensorType dstStt(finalTp.cast()); + + Type srcCOOTp = getCOOFromTypeWithOrdering( + dstStt.getRankedTensorType(), dstStt.getDimToLvl(), /*ordered=*/false); + + // Clones the original operation but changing the output to an unordered COO. + Operation *cloned = rewriter.clone(*op.getOperation()); + rewriter.updateRootInPlace(cloned, [cloned, srcCOOTp]() { + cloned->getOpResult(0).setType(srcCOOTp); + }); + Value srcCOO = cloned->getOpResult(0); + + // -> sort + Type dstCOOTp = getCOOFromTypeWithOrdering( + dstStt.getRankedTensorType(), dstStt.getDimToLvl(), /*ordered=*/true); + Value dstCOO = rewriter.create( + loc, dstCOOTp, srcCOO, SparseTensorSortKind::HybridQuickSort); + + // -> dest. + if (dstCOO.getType() == finalTp) { + rewriter.replaceOp(op, dstCOO); + } else { + // Need an extra conversion if the target type is not COO. + rewriter.replaceOpWithNewOp(op, finalTp, dstCOO); + } + // TODO: deallocate extra COOs, we should probably delegate it to buffer + // deallocation pass. + return success(); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp index a1ab2495f5f7b..1bfee3aa1d7ee 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -829,10 +829,56 @@ struct ReshapeRewriter : public OpRewritePattern { } }; +struct TensorLike { + TensorLike(OpBuilder &builder, Location loc, RankedTensorType rtt, + ValueRange sizes) + : isSparse(rtt.getEncoding() != nullptr) { + SmallVector dynSzs; + getDynamicSizes(rtt, sizes, dynSzs); + + if (isSparse) + val = builder.create(loc, rtt, dynSzs); + else + val = allocDenseTensor(builder, loc, rtt, sizes); + }; + + void insertOrStore(OpBuilder &builder, Location loc, Value v, + ValueRange crds) { + if (isSparse) + val = builder.create(loc, v, val, crds); + else + builder.create(loc, v, val, crds); + } + + Value getSSA() const { + // We don't need to maintain the SSA chain for a memref value. + return isSparse ? val : nullptr; + } + + Value finalize(OpBuilder &builder, Location loc, RankedTensorType rtp) const { + if (isSparse) + return builder.create(loc, val, true); + return builder.create(loc, rtp, val); + } + + void updateSSA(Value v) { + // Dense memref is a non-SSA value. + assert(isSparse); + val = v; + } + +private: + bool isSparse; + Value val; // either a memref (for dense tensor) or a sparse tensor. +}; + struct ConcatenateRewriter : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(ConcatenateOp op, PatternRewriter &rewriter) const override { + if (op.needsExtraSort()) + op.emitError("ConcatenateOp not staged"); + const Location loc = op.getLoc(); const auto dstTp = getSparseTensorType(op); const Dimension dimRank = dstTp.getDimRank(); @@ -852,94 +898,54 @@ struct ConcatenateRewriter : public OpRewritePattern { // foreach in %s1 : insert d0, d1, %tmp // foreach in %s2 : insert d0, d1 + size(s1), %tmp // foreach in %s3 : insert d0, d1 + size(s1) + size(s2), %tmp - // %t = convert_to_dest_tensor(%tmp) - // - // NOTE: this cannot be `const` because it will be changed when - // `needTmpCOO`, but that's buried in the conditional below and - // thus not easily extracted. - auto encDst = dstTp.getEncoding(); - Value dst; // Destination tensor for inserting source tensor values. - bool needTmpCOO = true; - const bool allDense = dstTp.hasEncoding() && dstTp.isAllDense(); - Value annotatedDenseDst; - if (dstTp.hasEncoding()) { - bool allOrdered = false; - // When concatenating on dimension 0, and all inputs are sorted - // and have an identity dimToLvl, the concatenate will generate - // coords in lexOrder thus no need for the tmp COO buffer. - // TODO: When conDim != 0, as long as conDim is the first dimension - // in all input/output buffers, and all input/output buffers have the same - // dimToLvl, the tmp COO buffer is still unnecessary (e.g, concatenate - // CSC matrices along column). - if (!allDense && conDim == 0 && dstTp.isIdentity()) { - for (auto i : op.getInputs()) { - const auto stt = getSparseTensorType(i); - allOrdered = stt.isAllOrdered() && stt.isIdentity(); - if (!allOrdered) - break; - } - } - - needTmpCOO = !allDense && !allOrdered; - const RankedTensorType tp = getBufferType(dstTp, needTmpCOO); - encDst = needTmpCOO ? getSparseTensorEncoding(tp) : encDst; - SmallVector dynSizes; - getDynamicSizes(dstTp, sizes, dynSizes); - dst = rewriter.create(loc, tp, dynSizes).getResult(); - if (allDense) { - // Create a view of the values buffer to match the unannotated dense - // tensor. - Value valuesBuffer = genToValues(rewriter, loc, dst); - Value dimCoords = - genAlloca(rewriter, loc, dimRank, rewriter.getIndexType(), - /*staticShape=*/true); - annotatedDenseDst = dst; - dst = reshapeValuesToLevels(rewriter, loc, encDst, sizes, valuesBuffer, - dimCoords); - } - } else { - // TODO: Dense buffers should be allocated/deallocated via the callback - // in BufferizationOptions. - dst = allocDenseTensor(rewriter, loc, dstTp, sizes); - } + TensorLike dstBuf(rewriter, loc, dstTp.getRankedTensorType(), sizes); Value offset = constantIndex(rewriter, loc, 0); - SmallVector initArgs; - if (encDst && !allDense) - initArgs.push_back(dst); + Value iterArg = dstBuf.getSSA(); + ForeachOp foreachOp; for (Value input : op.getInputs()) { - // Build a for op for each input tensor to append new values into the + // Builds a for op for each input tensor to append new values into the // output tensor. foreachOp = rewriter.create( - loc, input, initArgs, + loc, input, iterArg ? ValueRange{iterArg} : ValueRange{}, [&](OpBuilder &builder, Location loc, ValueRange dcvs, Value v, ValueRange reduc) { SmallVector dstLcvs(dstTp.getLvlRank()); for (Dimension d = 0; d < dimRank; d++) { Value crd = dcvs[d]; + // Transforms coordinates for the concatenating dim. if (d == conDim) - // Transform coordinates for the concatenating dim. crd = builder.create(loc, crd, offset); // FIXME: `toStoredDim` is deprecated - dstLcvs[toStoredDim(encDst, d)] = crd; + dstLcvs[toStoredDim(dstTp.getEncoding(), d)] = crd; } - if (encDst && !allDense) { - Value cond = genIsNonzero(rewriter, loc, v); - scf::IfOp ifOp = builder.create( - loc, TypeRange(reduc.front().getType()), cond, /*else*/ true); + + if (!reduc.empty()) + dstBuf.updateSSA(reduc.front()); + + if (!dstTp.isAllDense()) { + Value cond = genIsNonzero(builder, loc, v); + auto ifOp = builder.create(loc, reduc.getTypes(), cond, + /*else*/ true); + builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); + builder.create(loc, dstBuf.getSSA()); + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - Value t = - builder.create(loc, v, reduc.front(), dstLcvs); - rewriter.create(loc, t); - rewriter.setInsertionPointToStart(&ifOp.getElseRegion().front()); - rewriter.create(loc, reduc.front()); - rewriter.setInsertionPointAfter(ifOp); - rewriter.create(loc, ifOp.getResult(0)); + dstBuf.insertOrStore(builder, loc, v, dstLcvs); + builder.create(loc, dstBuf.getSSA()); + + // Exits the ifOp, update the sparse tensor SSA value. + builder.setInsertionPointAfter(ifOp); + assert(!reduc.empty()); + dstBuf.updateSSA(ifOp.getResult(0)); } else { - builder.create(loc, v, dst, dstLcvs); - builder.create(loc); + dstBuf.insertOrStore(builder, loc, v, dstLcvs); } + if (reduc.empty()) + builder.create(loc); + else + builder.create(loc, dstBuf.getSSA()); }); // Accumulates the offset. Note that only static-shaped inputs are allowed // by concatenate op verifier, which saves us from computing the offset @@ -948,88 +954,27 @@ struct ConcatenateRewriter : public OpRewritePattern { assert(sh.has_value()); offset = rewriter.create( loc, offset, constantIndex(rewriter, loc, *sh)); - if (encDst && !allDense) { - dst = foreachOp.getResult(0); - initArgs[0] = dst; - } - } - // Temp variable to avoid needing to call `getRankedTensorType` - // in the three use-sites below. - const RankedTensorType dstRTT = dstTp; - if (!encDst) { - rewriter.replaceOpWithNewOp(op, dstRTT, dst); - } else if (allDense) { - rewriter.replaceOp( - op, rewriter.create(loc, dstRTT, annotatedDenseDst) - .getResult()); - } else { - dst = rewriter.create(loc, dst, true); - if (needTmpCOO) { - Value tmpCoo = dst; - Type dstCooTp = getCOOType(dstRTT, true); - // TODO: this should be a sort_coo operation. - dst = rewriter - .create(loc, dstCooTp, tmpCoo, - SparseTensorSortKind::HybridQuickSort) - .getResult(); - dst = rewriter.create(loc, dstRTT, dst).getResult(); - rewriter.create(loc, tmpCoo); + if (!foreachOp.getResults().empty()) { + iterArg = foreachOp.getResult(0); + dstBuf.updateSSA(iterArg); } - rewriter.replaceOp(op, dst); } - return success(); - } -}; -struct TensorLike { - TensorLike(OpBuilder &builder, Location loc, RankedTensorType rtt, - ValueRange sizes) - : isSparse(rtt.getEncoding() != nullptr) { - SmallVector dynSzs; - getDynamicSizes(rtt, sizes, dynSzs); - - if (isSparse) - val = builder.create(loc, rtt, dynSzs); - else - val = allocDenseTensor(builder, loc, rtt, sizes); - }; - - void insertOrStore(OpBuilder &builder, Location loc, Value v, - ValueRange crds) { - if (isSparse) - val = builder.create(loc, v, val, crds); - else - builder.create(loc, v, val, crds); - } - - Value getSSA() const { - // We don't need to maintain the SSA chain for a memref value. - return isSparse ? val : nullptr; - } - - Value finalize(OpBuilder &builder, Location loc, RankedTensorType rtp) const { - if (isSparse) - return builder.create(loc, val, true); - return builder.create(loc, rtp, val); - } + if (!foreachOp.getResults().empty()) + dstBuf.updateSSA(iterArg); - void updateSSA(Value v) { - // Dense memref is a non-SSA value. - assert(isSparse); - val = v; + Value ret = dstBuf.finalize(rewriter, loc, dstTp.getRankedTensorType()); + rewriter.replaceOp(op, ret); + return success(); } - -private: - bool isSparse; - Value val; // either a memref (for dense tensor) or a sparse tensor. }; struct DirectConvertRewriter : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(ConvertOp op, PatternRewriter &rewriter) const override { - if (!op.directConvertable()) + if (op.needsExtraSort()) return op.emitError("ConvertOp not staged."); // TODO: Maybe we want a different operation for this too. diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/StageSparseOperations.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/StageSparseOperations.cpp index 4c163ea6e067b..5875cd4f9fd9d 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/StageSparseOperations.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/StageSparseOperations.cpp @@ -15,56 +15,19 @@ using namespace mlir::sparse_tensor; namespace { -struct StageUnorderedConvert : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; +template +struct StageUnorderedSparseOps : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(ConvertOp op, + LogicalResult matchAndRewrite(StageWithSortOp op, PatternRewriter &rewriter) const override { - // TODO: Implement it as an Interface, this can be reused from other - // operations too (e.g., concatenate, reshape, etc). - if (op.directConvertable()) - return failure(); - - Location loc = op.getLoc(); - SparseTensorType srcStt = getSparseTensorType(op.getSource()); - SparseTensorType dstStt = getSparseTensorType(op.getDest()); - - // Just to make sure that convert to dense tensor is always direct. - assert(!dstStt.isAllDense()); - - // source -> coo - // The tmp COO must be unordered, otherwise it is a direct conversion. - assert(!(srcStt.hasSameDimToLvl(dstStt) && srcStt.isAllOrdered())); - (void)srcStt; // to silence warning when assertion is disabled - - Type srcCOOTp = getCOOFromTypeWithOrdering( - dstStt.getRankedTensorType(), dstStt.getDimToLvl(), /*ordered=*/false); - Value srcCOO = op.getSource(); - if (srcCOO.getType() != srcCOOTp) - srcCOO = rewriter.create(loc, srcCOOTp, op.getSource()); - - // -> sort - Type dstCOOTp = getCOOFromTypeWithOrdering( - dstStt.getRankedTensorType(), dstStt.getDimToLvl(), /*ordered=*/true); - Value dstCOO = rewriter.create( - loc, dstCOOTp, srcCOO, SparseTensorSortKind::HybridQuickSort); - - // -> dest. - if (dstCOO.getType() == op.getType()) { - rewriter.replaceOp(op, dstCOO); - } else { - // Need an extra conversion if the target type is not COO. - rewriter.replaceOpWithNewOp(op, op.getDest().getType(), - dstCOO); - } - // TODO: deallocate extra COOs, we should probably delegate it to buffer - // deallocation pass. - - return success(); + return llvm::cast(op.getOperation()) + .stageWithSort(rewriter); } }; } // namespace void mlir::populateStageSparseOperationsPatterns(RewritePatternSet &patterns) { - patterns.add(patterns.getContext()); + patterns.add, + StageUnorderedSparseOps>(patterns.getContext()); } diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 09cf01e73ed8c..eb670ad50163c 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -2683,6 +2683,7 @@ td_library( srcs = [ "include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td", "include/mlir/Dialect/SparseTensor/IR/SparseTensorBase.td", + "include/mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.td", "include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td", "include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td", ], @@ -2694,6 +2695,15 @@ td_library( ], ) +td_library( + name = "SparseTensorInterfacesTdFiles", + srcs = [ + "include/mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.td", + ], + includes = ["include"], + deps = [":OpBaseTdFiles"], +) + gentbl_cc_library( name = "SparseTensorAttrDefsIncGen", tbl_outs = [ @@ -2801,6 +2811,23 @@ gentbl_cc_library( deps = [":PassBaseTdFiles"], ) +gentbl_cc_library( + name = "SparseTensorInterfacesIncGen", + tbl_outs = [ + ( + ["-gen-op-interface-decls"], + "include/mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.h.inc", + ), + ( + ["-gen-op-interface-defs"], + "include/mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.cpp.inc", + ), + ], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.td", + deps = [":SparseTensorInterfacesTdFiles"], +) + # This library is shared by both SparseTensorDialect and # SparseTensorRuntime, so it must not depend on any of the MLIR/LLVM # internals or else mlir_c_runner_utils will inherit that dependency. @@ -2823,9 +2850,11 @@ cc_library( "lib/Dialect/SparseTensor/IR/Detail/Var.cpp", "lib/Dialect/SparseTensor/IR/Detail/Var.h", "lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp", + "lib/Dialect/SparseTensor/IR/SparseTensorInterfaces.cpp", ], hdrs = [ "include/mlir/Dialect/SparseTensor/IR/SparseTensor.h", + "include/mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.h", "include/mlir/Dialect/SparseTensor/IR/SparseTensorStorageLayout.h", "include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h", ], @@ -2837,6 +2866,7 @@ cc_library( ":InferTypeOpInterface", ":SparseTensorAttrDefsIncGen", ":SparseTensorEnums", + ":SparseTensorInterfacesIncGen", ":SparseTensorOpsIncGen", ":SparseTensorTypesIncGen", "//llvm:Support", From ddc30ff802eb135934fc7b785d33c05217ab9e39 Mon Sep 17 00:00:00 2001 From: Joseph Huber <35342157+jhuber6@users.noreply.github.com> Date: Tue, 17 Oct 2023 14:02:31 -0400 Subject: [PATCH 367/720] [libc] Implement the 'ungetc' function on the GPU (#69248) Summary: This function follows closely with the pattern of all the other functions. That is, making a new opcode and forwarding the call to the host. However, this also required modifying the test somewhat. It seems that not all `libc` implementations follow the same error rules as are tested here, and it is not explicit in the standard, so we simply disable these EOF checks when targeting the GPU. --- libc/config/gpu/entrypoints.txt | 1 + libc/docs/gpu/support.rst | 1 + libc/include/llvm-libc-types/rpc_opcodes_t.h | 1 + libc/src/stdio/CMakeLists.txt | 13 +-------- libc/src/stdio/generic/CMakeLists.txt | 12 ++++++++ libc/src/stdio/{ => generic}/ungetc.cpp | 0 libc/src/stdio/gpu/CMakeLists.txt | 11 ++++++++ libc/src/stdio/gpu/ungetc.cpp | 29 ++++++++++++++++++++ libc/test/src/stdio/ungetc_test.cpp | 8 ++++++ libc/utils/gpu/server/rpc_server.cpp | 7 +++++ 10 files changed, 71 insertions(+), 12 deletions(-) rename libc/src/stdio/{ => generic}/ungetc.cpp (100%) create mode 100644 libc/src/stdio/gpu/ungetc.cpp diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index ad68216a76b94..731508088cb6f 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -104,6 +104,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdio.fgetc libc.src.stdio.getc libc.src.stdio.getchar + libc.src.stdio.ungetc libc.src.stdio.stdin libc.src.stdio.stdout libc.src.stdio.stderr diff --git a/libc/docs/gpu/support.rst b/libc/docs/gpu/support.rst index fd27273ed562e..806af5f219dfb 100644 --- a/libc/docs/gpu/support.rst +++ b/libc/docs/gpu/support.rst @@ -134,6 +134,7 @@ ftell |check| |check| fflush |check| |check| fgetc |check| |check| fgets |check| |check| +ungetc |check| |check| getc |check| |check| getchar |check| |check| puts |check| |check| diff --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/include/llvm-libc-types/rpc_opcodes_t.h index 61e17756fa647..2fd318f06a7db 100644 --- a/libc/include/llvm-libc-types/rpc_opcodes_t.h +++ b/libc/include/llvm-libc-types/rpc_opcodes_t.h @@ -29,6 +29,7 @@ typedef enum { RPC_FSEEK, RPC_FTELL, RPC_FFLUSH, + RPC_UNGETC, RPC_LAST = 0xFFFF, } rpc_opcode_t; diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt index 169bc592dee48..380474ce27118 100644 --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -54,18 +54,6 @@ add_entrypoint_object( libc.src.__support.File.platform_file ) -add_entrypoint_object( - ungetc - SRCS - ungetc.cpp - HDRS - ungetc.h - DEPENDS - libc.include.stdio - libc.src.__support.File.file - libc.src.__support.File.platform_file -) - add_entrypoint_object( fopencookie SRCS @@ -286,6 +274,7 @@ add_stdio_entrypoint_object(getc_unlocked) add_stdio_entrypoint_object(getchar) add_stdio_entrypoint_object(getchar_unlocked) add_stdio_entrypoint_object(fgets) +add_stdio_entrypoint_object(ungetc) add_stdio_entrypoint_object(stdin) add_stdio_entrypoint_object(stdout) add_stdio_entrypoint_object(stderr) diff --git a/libc/src/stdio/generic/CMakeLists.txt b/libc/src/stdio/generic/CMakeLists.txt index 282d056bba712..2ecef879eb4bb 100644 --- a/libc/src/stdio/generic/CMakeLists.txt +++ b/libc/src/stdio/generic/CMakeLists.txt @@ -342,6 +342,18 @@ add_entrypoint_object( libc.src.__support.File.platform_file ) +add_entrypoint_object( + ungetc + SRCS + ungetc.cpp + HDRS + ../ungetc.h + DEPENDS + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + add_entrypoint_object( stdin SRCS diff --git a/libc/src/stdio/ungetc.cpp b/libc/src/stdio/generic/ungetc.cpp similarity index 100% rename from libc/src/stdio/ungetc.cpp rename to libc/src/stdio/generic/ungetc.cpp diff --git a/libc/src/stdio/gpu/CMakeLists.txt b/libc/src/stdio/gpu/CMakeLists.txt index 047b68931bce5..1b1e2a903cc0b 100644 --- a/libc/src/stdio/gpu/CMakeLists.txt +++ b/libc/src/stdio/gpu/CMakeLists.txt @@ -251,6 +251,17 @@ add_entrypoint_object( .ferror ) +add_entrypoint_object( + ungetc + SRCS + ungetc.cpp + HDRS + ../ungetc.h + DEPENDS + libc.include.stdio + .gpu_file +) + add_entrypoint_object( stdin SRCS diff --git a/libc/src/stdio/gpu/ungetc.cpp b/libc/src/stdio/gpu/ungetc.cpp new file mode 100644 index 0000000000000..373164a0c53a3 --- /dev/null +++ b/libc/src/stdio/gpu/ungetc.cpp @@ -0,0 +1,29 @@ +//===-- Implementation of ungetc ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/ungetc.h" +#include "file.h" + +#include + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(int, ungetc, (int c, ::FILE *stream)) { + int ret; + rpc::Client::Port port = rpc::client.open(); + port.send_and_recv( + [=](rpc::Buffer *buffer) { + buffer->data[0] = c; + buffer->data[1] = file::from_stream(stream); + }, + [&](rpc::Buffer *buffer) { ret = static_cast(buffer->data[0]); }); + port.close(); + return ret; +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/test/src/stdio/ungetc_test.cpp b/libc/test/src/stdio/ungetc_test.cpp index 75eecc87ef265..c98995ff0811b 100644 --- a/libc/test/src/stdio/ungetc_test.cpp +++ b/libc/test/src/stdio/ungetc_test.cpp @@ -24,12 +24,16 @@ TEST(LlvmLibcUngetcTest, UngetAndReadBack) { constexpr size_t CONTENT_SIZE = sizeof(CONTENT); ASSERT_EQ(CONTENT_SIZE, LIBC_NAMESPACE::fwrite(CONTENT, 1, CONTENT_SIZE, file)); +#ifndef LIBC_TARGET_ARCH_IS_GPU // Behavior varies between libc implementations. // Cannot unget to an un-readable file. ASSERT_EQ(EOF, LIBC_NAMESPACE::ungetc('1', file)); +#endif ASSERT_EQ(0, LIBC_NAMESPACE::fclose(file)); file = LIBC_NAMESPACE::fopen(FILENAME, "r+"); ASSERT_FALSE(file == nullptr); + // Calling with an EOF should always return EOF without doing anything. + ASSERT_EQ(EOF, LIBC_NAMESPACE::ungetc(EOF, file)); char c; ASSERT_EQ(LIBC_NAMESPACE::fread(&c, 1, 1, file), size_t(1)); ASSERT_EQ(c, CONTENT[0]); @@ -43,8 +47,10 @@ TEST(LlvmLibcUngetcTest, UngetAndReadBack) { // ungetc should not fail after a seek operation. int unget_char = 'z'; ASSERT_EQ(unget_char, LIBC_NAMESPACE::ungetc(unget_char, file)); +#ifndef LIBC_TARGET_ARCH_IS_GPU // Behavior varies between libc implementations. // Another unget should fail. ASSERT_EQ(EOF, LIBC_NAMESPACE::ungetc(unget_char, file)); +#endif // ungetting a char at the beginning of the file will allow us to fetch // one additional character. char new_data[CONTENT_SIZE + 1]; @@ -53,8 +59,10 @@ TEST(LlvmLibcUngetcTest, UngetAndReadBack) { ASSERT_STREQ("zabcdef", new_data); ASSERT_EQ(size_t(1), LIBC_NAMESPACE::fwrite("x", 1, 1, file)); +#ifndef LIBC_TARGET_ARCH_IS_GPU // Behavior varies between libc implementations. // unget should fail after a write operation. ASSERT_EQ(EOF, LIBC_NAMESPACE::ungetc('1', file)); +#endif ASSERT_EQ(0, LIBC_NAMESPACE::fclose(file)); } diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp index 1c1c9f1ae9e6b..0550115f7cd1a 100644 --- a/libc/utils/gpu/server/rpc_server.cpp +++ b/libc/utils/gpu/server/rpc_server.cpp @@ -186,6 +186,13 @@ struct Server { }); break; } + case RPC_UNGETC: { + port->recv_and_send([](rpc::Buffer *buffer) { + buffer->data[0] = ungetc(static_cast(buffer->data[0]), + file::to_stream(buffer->data[1])); + }); + break; + } case RPC_NOOP: { port->recv([](rpc::Buffer *) {}); break; From b33723710f5194080e8bfab9f21c8445647c976b Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Tue, 17 Oct 2023 11:06:11 -0700 Subject: [PATCH 368/720] [NVPTX] Fixed few more corner cases for v4i8 lowering. (#69263) Fixes https://github.com/llvm/llvm-project/issues/69124 --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 22 ++- llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 6 + llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 3 + llvm/test/CodeGen/NVPTX/f16x2-instructions.ll | 6 +- llvm/test/CodeGen/NVPTX/i8x4-instructions.ll | 154 ++++++++++++++++++ llvm/test/CodeGen/NVPTX/param-load-store.ll | 5 - 6 files changed, 180 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 36da2e7b40efa..a935c0e16a552 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -504,13 +504,21 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, // Only logical ops can be done on v4i8 directly, others must be done // elementwise. setOperationAction( - {ISD::ADD, ISD::MUL, ISD::ABS, ISD::SMIN, - ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::CTPOP, - ISD::CTLZ, ISD::ADD, ISD::SUB, ISD::MUL, - ISD::SHL, ISD::SREM, ISD::UREM, ISD::SDIV, - ISD::UDIV, ISD::SRA, ISD::SRL, ISD::MULHS, - ISD::MULHU, ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, - ISD::UINT_TO_FP}, + {ISD::ABS, ISD::ADD, ISD::ADDC, ISD::ADDE, + ISD::BITREVERSE, ISD::CTLZ, ISD::CTPOP, ISD::CTTZ, + ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FSHL, ISD::FSHR, + ISD::MUL, ISD::MULHS, ISD::MULHU, ISD::PARITY, + ISD::ROTL, ISD::ROTR, ISD::SADDO, ISD::SADDO_CARRY, + ISD::SADDSAT, ISD::SDIV, ISD::SDIVREM, ISD::SELECT_CC, + ISD::SETCC, ISD::SHL, ISD::SINT_TO_FP, ISD::SMAX, + ISD::SMIN, ISD::SMULO, ISD::SMUL_LOHI, ISD::SRA, + ISD::SREM, ISD::SRL, ISD::SSHLSAT, ISD::SSUBO, + ISD::SSUBO_CARRY, ISD::SSUBSAT, ISD::SUB, ISD::SUBC, + ISD::SUBE, ISD::UADDO, ISD::UADDO_CARRY, ISD::UADDSAT, + ISD::UDIV, ISD::UDIVREM, ISD::UINT_TO_FP, ISD::UMAX, + ISD::UMIN, ISD::UMULO, ISD::UMUL_LOHI, ISD::UREM, + ISD::USHLSAT, ISD::USUBO, ISD::USUBO_CARRY, ISD::VSELECT, + ISD::USUBSAT}, MVT::v4i8, Expand); // Operations not directly supported by NVPTX. diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index 5c7c10965e2f2..f6932db2aeb0b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -586,6 +586,12 @@ class NVPTXTargetLowering : public TargetLowering { AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override { + // There's rarely any point of packing something into a vector type if we + // already have the source data. + return true; + } + private: const NVPTXSubtarget &STI; // cache the subtarget here SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 84ed953ad18a9..b0b96b94a1257 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -3485,6 +3485,9 @@ def : Pat<(v2bf16 (build_vector (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))), def : Pat<(v2i16 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b))), (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>; +def: Pat<(v2i16 (scalar_to_vector (i16 Int16Regs:$a))), + (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; + // Count leading zeros let hasSideEffects = false in { def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll index 18788c776ffbd..464b3a754804f 100644 --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -1319,10 +1319,8 @@ define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 { ; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768; ; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]]; ; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]]; -; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: mov.b32 {[[RX0:%rs[0-9]+]], [[RX1:%rs[0-9]+]]}, [[R]] -; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[RX0]]; -; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[RX1]]; +; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[R0]]; +; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[R1]]; ; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]}; ; CHECK: ret; define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 { diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll index fd48313ad6848..ddad374a4dc11 100644 --- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -1269,4 +1269,158 @@ define <4 x i8> @test_fptoui_2xhalf_to_2xi8(<4 x half> %a) #0 { ret <4 x i8> %r } +define void @test_srem_v4i8(ptr %a, ptr %b, ptr %c) { +; CHECK-LABEL: test_srem_v4i8( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<13>; +; CHECK-NEXT: .reg .b32 %r<18>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ld.param.u64 %rd3, [test_srem_v4i8_param_2]; +; CHECK-NEXT: ld.param.u64 %rd2, [test_srem_v4i8_param_1]; +; CHECK-NEXT: ld.param.u64 %rd1, [test_srem_v4i8_param_0]; +; CHECK-NEXT: ld.u32 %r1, [%rd1]; +; CHECK-NEXT: ld.u32 %r2, [%rd2]; +; CHECK-NEXT: bfe.s32 %r3, %r2, 0, 8; +; CHECK-NEXT: cvt.s8.s32 %rs1, %r3; +; CHECK-NEXT: bfe.s32 %r4, %r1, 0, 8; +; CHECK-NEXT: cvt.s8.s32 %rs2, %r4; +; CHECK-NEXT: rem.s16 %rs3, %rs2, %rs1; +; CHECK-NEXT: cvt.u32.u16 %r5, %rs3; +; CHECK-NEXT: bfe.s32 %r6, %r2, 8, 8; +; CHECK-NEXT: cvt.s8.s32 %rs4, %r6; +; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; +; CHECK-NEXT: cvt.s8.s32 %rs5, %r7; +; CHECK-NEXT: rem.s16 %rs6, %rs5, %rs4; +; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; +; CHECK-NEXT: bfi.b32 %r9, %r8, %r5, 8, 8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 16, 8; +; CHECK-NEXT: cvt.s8.s32 %rs7, %r10; +; CHECK-NEXT: bfe.s32 %r11, %r1, 16, 8; +; CHECK-NEXT: cvt.s8.s32 %rs8, %r11; +; CHECK-NEXT: rem.s16 %rs9, %rs8, %rs7; +; CHECK-NEXT: cvt.u32.u16 %r12, %rs9; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r9, 16, 8; +; CHECK-NEXT: bfe.s32 %r14, %r2, 24, 8; +; CHECK-NEXT: cvt.s8.s32 %rs10, %r14; +; CHECK-NEXT: bfe.s32 %r15, %r1, 24, 8; +; CHECK-NEXT: cvt.s8.s32 %rs11, %r15; +; CHECK-NEXT: rem.s16 %rs12, %rs11, %rs10; +; CHECK-NEXT: cvt.u32.u16 %r16, %rs12; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r13, 24, 8; +; CHECK-NEXT: st.u32 [%rd3], %r17; +; CHECK-NEXT: ret; +entry: + %t57 = load <4 x i8>, ptr %a, align 4 + %t59 = load <4 x i8>, ptr %b, align 4 + %x = srem <4 x i8> %t57, %t59 + store <4 x i8> %x, ptr %c, align 4 + ret void +} + +;; v3i8 lowering, especially for unaligned loads is terrible. We end up doing +;; tons of pointless scalar_to_vector/bitcast/extract_elt on v2i16/v4i8, which +;; is further complicated by LLVM trying to use i16 as an intermediate type, +;; because we don't have i8 registers. It's a mess. +;; Ideally we want to split it into element-wise ops, but legalizer can't handle +;; odd-sized vectors. TL;DR; don't use odd-sized vectors of v8. +define void @test_srem_v3i8(ptr %a, ptr %b, ptr %c) { +; CHECK-LABEL: test_srem_v3i8( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<20>; +; CHECK-NEXT: .reg .b32 %r<16>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ld.param.u64 %rd3, [test_srem_v3i8_param_2]; +; CHECK-NEXT: ld.param.u64 %rd2, [test_srem_v3i8_param_1]; +; CHECK-NEXT: ld.param.u64 %rd1, [test_srem_v3i8_param_0]; +; CHECK-NEXT: ld.u8 %rs1, [%rd1]; +; CHECK-NEXT: ld.u8 %rs2, [%rd1+1]; +; CHECK-NEXT: shl.b16 %rs3, %rs2, 8; +; CHECK-NEXT: or.b16 %rs4, %rs3, %rs1; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs4; +; CHECK-NEXT: ld.s8 %rs5, [%rd1+2]; +; CHECK-NEXT: ld.u8 %rs6, [%rd2]; +; CHECK-NEXT: ld.u8 %rs7, [%rd2+1]; +; CHECK-NEXT: shl.b16 %rs8, %rs7, 8; +; CHECK-NEXT: or.b16 %rs9, %rs8, %rs6; +; CHECK-NEXT: cvt.u32.u16 %r3, %rs9; +; CHECK-NEXT: ld.s8 %rs10, [%rd2+2]; +; CHECK-NEXT: bfe.s32 %r5, %r3, 0, 8; +; CHECK-NEXT: cvt.s8.s32 %rs11, %r5; +; CHECK-NEXT: bfe.s32 %r6, %r1, 0, 8; +; CHECK-NEXT: cvt.s8.s32 %rs12, %r6; +; CHECK-NEXT: rem.s16 %rs13, %rs12, %rs11; +; CHECK-NEXT: cvt.u32.u16 %r7, %rs13; +; CHECK-NEXT: bfe.s32 %r8, %r3, 8, 8; +; CHECK-NEXT: cvt.s8.s32 %rs14, %r8; +; CHECK-NEXT: bfe.s32 %r9, %r1, 8, 8; +; CHECK-NEXT: cvt.s8.s32 %rs15, %r9; +; CHECK-NEXT: rem.s16 %rs16, %rs15, %rs14; +; CHECK-NEXT: cvt.u32.u16 %r10, %rs16; +; CHECK-NEXT: bfi.b32 %r11, %r10, %r7, 8, 8; +; CHECK-NEXT: // implicit-def: %r13 +; CHECK-NEXT: bfi.b32 %r12, %r13, %r11, 16, 8; +; CHECK-NEXT: // implicit-def: %r15 +; CHECK-NEXT: bfi.b32 %r14, %r15, %r12, 24, 8; +; CHECK-NEXT: rem.s16 %rs17, %rs5, %rs10; +; CHECK-NEXT: cvt.u16.u32 %rs18, %r14; +; CHECK-NEXT: st.u8 [%rd3], %rs18; +; CHECK-NEXT: shr.u16 %rs19, %rs18, 8; +; CHECK-NEXT: st.u8 [%rd3+1], %rs19; +; CHECK-NEXT: st.u8 [%rd3+2], %rs17; +; CHECK-NEXT: ret; +entry: + %t57 = load <3 x i8>, ptr %a, align 1 + %t59 = load <3 x i8>, ptr %b, align 1 + %x = srem <3 x i8> %t57, %t59 + store <3 x i8> %x, ptr %c, align 1 + ret void +} + +define void @test_sext_v4i1_to_v4i8(ptr %a, ptr %b, ptr %c) { +; CHECK-LABEL: test_sext_v4i1_to_v4i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<18>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ld.param.u64 %rd3, [test_sext_v4i1_to_v4i8_param_2]; +; CHECK-NEXT: ld.param.u64 %rd2, [test_sext_v4i1_to_v4i8_param_1]; +; CHECK-NEXT: ld.param.u64 %rd1, [test_sext_v4i1_to_v4i8_param_0]; +; CHECK-NEXT: ld.u32 %r1, [%rd1]; +; CHECK-NEXT: ld.u32 %r2, [%rd2]; +; CHECK-NEXT: bfe.s32 %r3, %r2, 24, 8; +; CHECK-NEXT: bfe.s32 %r4, %r1, 24, 8; +; CHECK-NEXT: setp.hi.u32 %p1, %r4, %r3; +; CHECK-NEXT: bfe.s32 %r5, %r2, 16, 8; +; CHECK-NEXT: bfe.s32 %r6, %r1, 16, 8; +; CHECK-NEXT: setp.hi.u32 %p2, %r6, %r5; +; CHECK-NEXT: bfe.s32 %r7, %r2, 8, 8; +; CHECK-NEXT: bfe.s32 %r8, %r1, 8, 8; +; CHECK-NEXT: setp.hi.u32 %p3, %r8, %r7; +; CHECK-NEXT: bfe.s32 %r9, %r2, 0, 8; +; CHECK-NEXT: bfe.s32 %r10, %r1, 0, 8; +; CHECK-NEXT: setp.hi.u32 %p4, %r10, %r9; +; CHECK-NEXT: selp.s32 %r11, -1, 0, %p4; +; CHECK-NEXT: selp.s32 %r12, -1, 0, %p3; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 8, 8; +; CHECK-NEXT: selp.s32 %r14, -1, 0, %p2; +; CHECK-NEXT: bfi.b32 %r15, %r14, %r13, 16, 8; +; CHECK-NEXT: selp.s32 %r16, -1, 0, %p1; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r15, 24, 8; +; CHECK-NEXT: st.u32 [%rd3], %r17; +; CHECK-NEXT: ret; +entry: + %t1 = load <4 x i8>, ptr %a, align 4 + %t2 = load <4 x i8>, ptr %b, align 4 + %t5 = icmp ugt <4 x i8> %t1, %t2 + %t6 = sext <4 x i1> %t5 to <4 x i8> + store <4 x i8> %t6, ptr %c, align 4 + ret void +} + attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll index b4208c691c91d..c14dc88431d31 100644 --- a/llvm/test/CodeGen/NVPTX/param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll @@ -364,10 +364,6 @@ define <4 x i16> @test_v4i16(<4 x i16> %a) { ; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16] ; CHECK-DAG: ld.param.u16 [[E4:%rs[0-9]+]], [test_v5i16_param_0+8]; ; CHECK-DAG: ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0] -; CHECK-DAG: mov.b32 [[R0:%r[0-9]+]], {[[E0]], [[E1]]}; -; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[R0]]; -; CHECK-DAG: mov.b32 [[R1:%r[0-9]+]], {[[E2]], [[E3]]}; -; CHECK-DAG: mov.b32 {[[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [[R1]]; ; CHECK: .param .align 16 .b8 param0[16]; ; CHECK-DAG: st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; ; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; @@ -496,7 +492,6 @@ define <4 x half> @test_v4f16(<4 x half> %a) { ; CHECK-LABEL: test_v5f16( ; CHECK: .param .align 16 .b8 test_v5f16_param_0[16] ; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5f16_param_0]; -; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[HH01]]; ; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5f16_param_0+8]; ; CHECK: .param .align 16 .b8 param0[16]; ; CHECK-DAG: st.param.v4.b16 [param0+0], From dd64c82cbc9c69924b5c6df059e5b065fa29d185 Mon Sep 17 00:00:00 2001 From: Haowei Date: Tue, 17 Oct 2023 11:15:46 -0700 Subject: [PATCH 369/720] [unittest] Allow LLVM unit test to run under a wrapper program. (#66821) This patch add CMake option "LLVM_GTEST_RUN_UNDER" to LLVM unittest configuration. When specified, LLVM unittest will be run under the wrapper program specified by this option. This feature can simplify the setup to run LLVM unittest on a target platform that is different than host. --- llvm/CMakeLists.txt | 3 +++ llvm/test/Unit/lit.cfg.py | 6 +++++- llvm/test/Unit/lit.site.cfg.py.in | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index ef2f2146a0364..82d4beea91e34 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -1219,6 +1219,9 @@ if( LLVM_INCLUDE_EXAMPLES ) endif() if( LLVM_INCLUDE_TESTS ) + set(LLVM_GTEST_RUN_UNDER + "" CACHE STRING + "Define the wrapper program that LLVM unit tests should be run under.") if(EXISTS ${LLVM_MAIN_SRC_DIR}/projects/test-suite AND TARGET clang) include(LLVMExternalProjectUtils) llvm_ExternalProject_Add(test-suite ${LLVM_MAIN_SRC_DIR}/projects/test-suite diff --git a/llvm/test/Unit/lit.cfg.py b/llvm/test/Unit/lit.cfg.py index f15c30dbcdb0a..61296d7ea0032 100644 --- a/llvm/test/Unit/lit.cfg.py +++ b/llvm/test/Unit/lit.cfg.py @@ -19,7 +19,11 @@ config.test_source_root = config.test_exec_root # testFormat: The test format to use to interpret tests. -config.test_format = lit.formats.GoogleTest(config.llvm_build_mode, "Tests") +config.test_format = lit.formats.GoogleTest( + config.llvm_build_mode, + "Tests", + run_under=config.gtest_run_under, +) # Propagate the temp directory. Windows requires this because it uses \Windows\ # if none of these are present. diff --git a/llvm/test/Unit/lit.site.cfg.py.in b/llvm/test/Unit/lit.site.cfg.py.in index 1d7d765801494..3536a34f796a2 100644 --- a/llvm/test/Unit/lit.site.cfg.py.in +++ b/llvm/test/Unit/lit.site.cfg.py.in @@ -7,6 +7,7 @@ config.llvm_obj_root = path(r"@LLVM_BINARY_DIR@") config.llvm_tools_dir = lit_config.substitute(path(r"@LLVM_TOOLS_DIR@")) config.llvm_build_mode = lit_config.substitute("@LLVM_BUILD_MODE@") config.shlibdir = lit_config.substitute(path(r"@SHLIBDIR@")) +config.gtest_run_under = lit_config.substitute(r"@LLVM_GTEST_RUN_UNDER@") # Let the main config do the real work. lit_config.load_config( From fd311126349b8fe1684d62154a9fa5a7bbb0b713 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 17 Oct 2023 19:17:40 +0100 Subject: [PATCH 370/720] [VPlan] Insert Trunc/Exts for reductions directly in VPlan. Update the code to create Trunc/Ext recipes directly in adjustRecipesForReductions instead of fixing it up later in fixReductions. This explicitly models the required conversions and also makes sure they are generated at the right place (instead of after the exit condition), hence the changes in a few tests. --- .../Transforms/Vectorize/LoopVectorize.cpp | 67 ++++++++++--------- .../epilog-vectorization-reductions.ll | 8 +-- .../LoopVectorize/reduction-small-size.ll | 8 +-- .../scalable-reduction-inloop.ll | 8 +-- 4 files changed, 47 insertions(+), 44 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index aa435b0d47aa5..14c5c0d18a4db 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3792,8 +3792,6 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, State.setDebugLocFrom(I->getDebugLoc()); VPValue *LoopExitInstDef = PhiR->getBackedgeValue(); - // This is the vector-clone of the value that leaves the loop. - Type *VecTy = State.get(LoopExitInstDef, 0)->getType(); // Before each round, move the insertion point right between // the PHIs and the values we are going to write. @@ -3805,10 +3803,6 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, State.setDebugLocFrom(LoopExitInst->getDebugLoc()); Type *PhiTy = OrigPhi->getType(); - - VPBasicBlock *LatchVPBB = - PhiR->getParent()->getEnclosingLoopRegion()->getExitingBasicBlock(); - BasicBlock *VectorLoopLatch = State.CFG.VPBB2IRBB[LatchVPBB]; // If tail is folded by masking, the vector value to leave the loop should be // a Select choosing between the vectorized LoopExitInst and vectorized Phi, // instead of the former. For an inloop reduction the reduction will already @@ -3834,23 +3828,12 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, // then extend the loop exit value to enable InstCombine to evaluate the // entire expression in the smaller type. if (VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { - assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!"); - Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF); - Builder.SetInsertPoint(VectorLoopLatch->getTerminator()); - for (unsigned Part = 0; Part < UF; ++Part) { - Value *Trunc = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); - Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy) - : Builder.CreateZExt(Trunc, VecTy); - for (User *U : llvm::make_early_inc_range(RdxParts[Part]->users())) - if (U != Trunc) { - U->replaceUsesOfWith(RdxParts[Part], Extnd); - RdxParts[Part] = Extnd; - } - } Builder.SetInsertPoint(LoopMiddleBlock, LoopMiddleBlock->getFirstInsertionPt()); - for (unsigned Part = 0; Part < UF; ++Part) + Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF); + for (unsigned Part = 0; Part < UF; ++Part) { RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); + } } // Reduce all of the unrolled parts into a single vector. @@ -9155,18 +9138,19 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( PreviousLink = RedRecipe; } } - - // If tail is folded by masking, introduce selects between the phi - // and the live-out instruction of each reduction, at the beginning of the - // dedicated latch block. - if (CM.foldTailByMasking()) { Builder.setInsertPoint(&*LatchVPBB->begin()); for (VPRecipeBase &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { - VPReductionPHIRecipe *PhiR = dyn_cast(&R); - if (!PhiR || PhiR->isInLoop()) - continue; - const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); + VPReductionPHIRecipe *PhiR = dyn_cast(&R); + if (!PhiR || PhiR->isInLoop()) + continue; + + const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); + auto *Result = PhiR->getBackedgeValue()->getDefiningRecipe(); + // If tail is folded by masking, introduce selects between the phi + // and the live-out instruction of each reduction, at the beginning of the + // dedicated latch block. + if (CM.foldTailByMasking()) { VPValue *Cond = RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), *Plan); VPValue *Red = PhiR->getBackedgeValue(); @@ -9174,16 +9158,35 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( "reduction recipe must be defined before latch"); FastMathFlags FMFs = RdxDesc.getFastMathFlags(); Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType(); - auto *Select = + Result = PhiTy->isFloatingPointTy() ? new VPInstruction(Instruction::Select, {Cond, Red, PhiR}, FMFs) : new VPInstruction(Instruction::Select, {Cond, Red, PhiR}); - Select->insertBefore(&*Builder.getInsertPoint()); + Result->insertBefore(&*Builder.getInsertPoint()); if (PreferPredicatedReductionSelect || TTI.preferPredicatedReductionSelect( PhiR->getRecurrenceDescriptor().getOpcode(), PhiTy, TargetTransformInfo::ReductionFlags())) - PhiR->setOperand(1, Select); + PhiR->setOperand(1, Result->getVPSingleValue()); + } + // If the vector reduction can be performed in a smaller type, we truncate + // then extend the loop exit value to enable InstCombine to evaluate the + // entire expression in the smaller type. + Type *PhiTy = PhiR->getStartValue()->getLiveInIRValue()->getType(); + if (PhiTy != RdxDesc.getRecurrenceType()) { + assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!"); + Type *RdxTy = RdxDesc.getRecurrenceType(); + auto *Trunc = new VPWidenCastRecipe(Instruction::Trunc, + Result->getVPSingleValue(), RdxTy); + auto *Extnd = + RdxDesc.isSigned() + ? new VPWidenCastRecipe(Instruction::SExt, Trunc, PhiTy) + : new VPWidenCastRecipe(Instruction::ZExt, Trunc, PhiTy); + + Trunc->insertAfter(Result); + Extnd->insertAfter(Trunc); + Result->getVPSingleValue()->replaceAllUsesWith(Extnd); + Trunc->setOperand(0, Result->getVPSingleValue()); } } diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll index 7a3c7d6fbfea7..03903d80cfd6e 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll @@ -207,10 +207,10 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[WIDE_LOAD]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i16> ; CHECK-NEXT: [[TMP8]] = zext <4 x i16> [[TMP7]] to <4 x i32> +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i32> [[TMP8]] to <4 x i16> @@ -234,10 +234,10 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) { ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i16>, ptr [[TMP16]], align 2 ; CHECK-NEXT: [[TMP17:%.*]] = zext <4 x i16> [[WIDE_LOAD4]] to <4 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i32> [[TMP14]], [[TMP17]] -; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i32 [[INDEX2]], 4 -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT5]], 256 ; CHECK-NEXT: [[TMP20:%.*]] = trunc <4 x i32> [[TMP18]] to <4 x i16> ; CHECK-NEXT: [[TMP21]] = zext <4 x i16> [[TMP20]] to <4 x i32> +; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i32 [[INDEX2]], 4 +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT5]], 256 ; CHECK-NEXT: br i1 [[TMP19]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP22:%.*]] = trunc <4 x i32> [[TMP21]] to <4 x i16> diff --git a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll index 837d663f4a926..a4a075463b1b0 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll @@ -22,10 +22,10 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i8> ; CHECK-NEXT: [[TMP5]] = zext <4 x i8> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i8> @@ -99,10 +99,10 @@ define i32 @PR35734(i32 %x, i32 %y) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP2]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i1> ; CHECK-NEXT: [[TMP7]] = sext <4 x i1> [[TMP6]] to <4 x i32> +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i1> diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll index 3cc6e5fa7b8d5..afe16c71f7f9c 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll @@ -17,14 +17,14 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP27:%.*]] = zext [[WIDE_LOAD2]] to ; CHECK-NEXT: [[TMP28:%.*]] = add [[TMP14]], [[TMP26]] ; CHECK-NEXT: [[TMP29:%.*]] = add [[TMP15]], [[TMP27]] +; CHECK-NEXT: [[TMP33:%.*]] = trunc [[TMP28]] to +; CHECK-NEXT: [[TMP35:%.*]] = trunc [[TMP29]] to +; CHECK-NEXT: [[TMP34]] = zext [[TMP33]] to +; CHECK-NEXT: [[TMP36]] = zext [[TMP35]] to ; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], 16 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP31]] ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], {{%.*}} -; CHECK-NEXT: [[TMP33:%.*]] = trunc [[TMP28]] to -; CHECK-NEXT: [[TMP34]] = zext [[TMP33]] to -; CHECK-NEXT: [[TMP35:%.*]] = trunc [[TMP29]] to -; CHECK-NEXT: [[TMP36]] = zext [[TMP35]] to ; CHECK: middle.block: ; CHECK-NEXT: [[TMP37:%.*]] = trunc [[TMP34]] to ; CHECK-NEXT: [[TMP38:%.*]] = trunc [[TMP36]] to From 71c97c735c10dd8040f721f93a0b7be0cc58d3ef Mon Sep 17 00:00:00 2001 From: Peiming Liu <36770114+PeimingLiu@users.noreply.github.com> Date: Tue, 17 Oct 2023 11:34:06 -0700 Subject: [PATCH 371/720] =?UTF-8?q?[mlir][sparse]=20avoid=20tensor=20to=20?= =?UTF-8?q?memref=20conversion=20in=20sparse=20tensor=20rewri=E2=80=A6=20(?= =?UTF-8?q?#69362)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …ting rules. --- .../Transforms/SparseTensorRewriting.cpp | 107 +++++-------- .../SparseTensor/convert_sparse2dense.mlir | 35 ++--- .../Dialect/SparseTensor/sparse_concat.mlir | 148 +++++++++--------- 3 files changed, 132 insertions(+), 158 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp index 1bfee3aa1d7ee..e50b14975e83d 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -829,47 +829,40 @@ struct ReshapeRewriter : public OpRewritePattern { } }; +// A trivial wrapper to help generate different operations for dense/sparse +// tensors. struct TensorLike { TensorLike(OpBuilder &builder, Location loc, RankedTensorType rtt, - ValueRange sizes) - : isSparse(rtt.getEncoding() != nullptr) { + ValueRange sizes) { SmallVector dynSzs; getDynamicSizes(rtt, sizes, dynSzs); - if (isSparse) - val = builder.create(loc, rtt, dynSzs); - else - val = allocDenseTensor(builder, loc, rtt, sizes); - }; - - void insertOrStore(OpBuilder &builder, Location loc, Value v, - ValueRange crds) { - if (isSparse) - val = builder.create(loc, v, val, crds); - else - builder.create(loc, v, val, crds); + val = builder.create(loc, rtt, dynSzs); + if (!isSparse()) { + Value c0 = constantZero(builder, loc, rtt.getElementType()); + val = builder.create(loc, c0, val).getResult(0); + } } - Value getSSA() const { - // We don't need to maintain the SSA chain for a memref value. - return isSparse ? val : nullptr; + void insert(OpBuilder &builder, Location loc, Value v, ValueRange crds) { + // TODO: Unify these two. + if (isSparse()) + val = builder.create(loc, v, val, crds); + else + val = builder.create(loc, v, val, crds); } Value finalize(OpBuilder &builder, Location loc, RankedTensorType rtp) const { - if (isSparse) + if (isSparse()) return builder.create(loc, val, true); - return builder.create(loc, rtp, val); + return val; } - void updateSSA(Value v) { - // Dense memref is a non-SSA value. - assert(isSparse); - val = v; + bool isSparse() const { + return getSparseTensorEncoding(val.getType()) != nullptr; } -private: - bool isSparse; - Value val; // either a memref (for dense tensor) or a sparse tensor. + Value val; }; struct ConcatenateRewriter : public OpRewritePattern { @@ -901,14 +894,14 @@ struct ConcatenateRewriter : public OpRewritePattern { TensorLike dstBuf(rewriter, loc, dstTp.getRankedTensorType(), sizes); Value offset = constantIndex(rewriter, loc, 0); - Value iterArg = dstBuf.getSSA(); + Value iterArg = dstBuf.val; ForeachOp foreachOp; for (Value input : op.getInputs()) { // Builds a for op for each input tensor to append new values into the // output tensor. foreachOp = rewriter.create( - loc, input, iterArg ? ValueRange{iterArg} : ValueRange{}, + loc, input, iterArg, [&](OpBuilder &builder, Location loc, ValueRange dcvs, Value v, ValueRange reduc) { SmallVector dstLcvs(dstTp.getLvlRank()); @@ -920,32 +913,26 @@ struct ConcatenateRewriter : public OpRewritePattern { // FIXME: `toStoredDim` is deprecated dstLcvs[toStoredDim(dstTp.getEncoding(), d)] = crd; } - - if (!reduc.empty()) - dstBuf.updateSSA(reduc.front()); - + // Enters foreach, updates the SSA chain. + dstBuf.val = reduc.front(); if (!dstTp.isAllDense()) { Value cond = genIsNonzero(builder, loc, v); auto ifOp = builder.create(loc, reduc.getTypes(), cond, /*else*/ true); builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); - builder.create(loc, dstBuf.getSSA()); + builder.create(loc, dstBuf.val); builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - dstBuf.insertOrStore(builder, loc, v, dstLcvs); - builder.create(loc, dstBuf.getSSA()); + dstBuf.insert(builder, loc, v, dstLcvs); + builder.create(loc, dstBuf.val); // Exits the ifOp, update the sparse tensor SSA value. builder.setInsertionPointAfter(ifOp); - assert(!reduc.empty()); - dstBuf.updateSSA(ifOp.getResult(0)); + dstBuf.val = ifOp.getResult(0); } else { - dstBuf.insertOrStore(builder, loc, v, dstLcvs); + dstBuf.insert(builder, loc, v, dstLcvs); } - if (reduc.empty()) - builder.create(loc); - else - builder.create(loc, dstBuf.getSSA()); + builder.create(loc, dstBuf.val); }); // Accumulates the offset. Note that only static-shaped inputs are allowed // by concatenate op verifier, which saves us from computing the offset @@ -955,15 +942,11 @@ struct ConcatenateRewriter : public OpRewritePattern { offset = rewriter.create( loc, offset, constantIndex(rewriter, loc, *sh)); - if (!foreachOp.getResults().empty()) { - iterArg = foreachOp.getResult(0); - dstBuf.updateSSA(iterArg); - } + iterArg = foreachOp.getResult(0); + dstBuf.val = iterArg; } - if (!foreachOp.getResults().empty()) - dstBuf.updateSSA(iterArg); - + dstBuf.val = iterArg; Value ret = dstBuf.finalize(rewriter, loc, dstTp.getRankedTensorType()); rewriter.replaceOp(op, ret); return success(); @@ -1010,15 +993,12 @@ struct DirectConvertRewriter : public OpRewritePattern { ValueRange vs; TensorLike dstBuf(rewriter, loc, dstStt.getRankedTensorType(), sizes); - Value iterArg = dstBuf.getSSA(); auto foreachOp = rewriter.create( - loc, src, iterArg ? ValueRange{iterArg} : ValueRange{}, foreachOrder, + loc, src, dstBuf.val, foreachOrder, [&](OpBuilder &builder, Location loc, ValueRange dcvs, Value v, ValueRange reduc) { // Enters the loop, update the SSA value for insertion chain. - if (!reduc.empty()) - dstBuf.updateSSA(reduc.front()); - + dstBuf.val = reduc.front(); const Dimension dimRank = dstStt.getDimRank(); const Level lvlRank = dstStt.getLvlRank(); SmallVector lcvs(lvlRank); @@ -1028,34 +1008,29 @@ struct DirectConvertRewriter : public OpRewritePattern { } if (!skipZeroCheck) { - assert(!reduc.empty()); Value cond = genIsNonzero(builder, loc, v); auto ifOp = builder.create(loc, reduc.getTypes(), cond, /*else*/ true); builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); - builder.create(loc, dstBuf.getSSA()); + builder.create(loc, dstBuf.val); builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - dstBuf.insertOrStore(builder, loc, v, lcvs); - builder.create(loc, dstBuf.getSSA()); + dstBuf.insert(builder, loc, v, lcvs); + builder.create(loc, dstBuf.val); // Exits the ifOp, update the sparse tensor SSA value. builder.setInsertionPointAfter(ifOp); - dstBuf.updateSSA(ifOp.getResult(0)); + dstBuf.val = ifOp.getResult(0); } else { - dstBuf.insertOrStore(builder, loc, v, lcvs); + dstBuf.insert(builder, loc, v, lcvs); } - if (reduc.empty()) - builder.create(loc); - else - builder.create(loc, dstBuf.getSSA()); + builder.create(loc, dstBuf.val); }); rewriter.setInsertionPointAfter(foreachOp); // Exits the for loop, links the SSA chain. - if (!foreachOp.getResults().empty()) - dstBuf.updateSSA(foreachOp.getResult(0)); + dstBuf.val = foreachOp.getResult(0); Value ret = dstBuf.finalize(rewriter, loc, dstStt.getRankedTensorType()); rewriter.replaceOp(op, ret); diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir index c22f051a0d585..e2dcb068e1185 100644 --- a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir @@ -14,11 +14,10 @@ // CHECK-LABEL: func.func @sparse_convert_1d // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_1d(%arg0: tensor<13xi32, #SparseVector>) -> tensor<13xi32> { %0 = sparse_tensor.convert %arg0 : tensor<13xi32, #SparseVector> to tensor<13xi32> return %0 : tensor<13xi32> @@ -26,11 +25,10 @@ func.func @sparse_convert_1d(%arg0: tensor<13xi32, #SparseVector>) -> tensor<13x // CHECK-LABEL: func.func @sparse_convert_1d_dyn // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_1d_dyn(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor @@ -38,11 +36,10 @@ func.func @sparse_convert_1d_dyn(%arg0: tensor) -> tensor< // CHECK-LABEL: func.func @sparse_convert_2d // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_2d(%arg0: tensor<2x4xf64, #SparseMatrix>) -> tensor<2x4xf64> { %0 = sparse_tensor.convert %arg0 : tensor<2x4xf64, #SparseMatrix> to tensor<2x4xf64> return %0 : tensor<2x4xf64> @@ -50,11 +47,10 @@ func.func @sparse_convert_2d(%arg0: tensor<2x4xf64, #SparseMatrix>) -> tensor<2x // CHECK-LABEL: func.func @sparse_convert_2d_dyn // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_2d_dyn0(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor @@ -62,11 +58,10 @@ func.func @sparse_convert_2d_dyn0(%arg0: tensor) -> tens // CHECK-LABEL: func.func @sparse_convert_2d_dyn1 // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_2d_dyn1(%arg0: tensor<2x?xf64, #SparseMatrix>) -> tensor<2x?xf64> { %0 = sparse_tensor.convert %arg0 : tensor<2x?xf64, #SparseMatrix> to tensor<2x?xf64> return %0 : tensor<2x?xf64> @@ -74,11 +69,10 @@ func.func @sparse_convert_2d_dyn1(%arg0: tensor<2x?xf64, #SparseMatrix>) -> tens // CHECK-LABEL: func.func @sparse_convert_2d_dyn2 // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_2d_dyn2(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor @@ -86,11 +80,10 @@ func.func @sparse_convert_2d_dyn2(%arg0: tensor) -> tens // CHECK-LABEL: func.func @sparse_convert_3d // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_3d(%arg0: tensor<2x3x4xf64, #SparseTensor>) -> tensor<2x3x4xf64> { %0 = sparse_tensor.convert %arg0 : tensor<2x3x4xf64, #SparseTensor> to tensor<2x3x4xf64> return %0 : tensor<2x3x4xf64> diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir index bdfab54dc6dae..f3d3dd28563e8 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir @@ -176,77 +176,83 @@ func.func @concat_sparse_sparse_dynamic(%arg0: tensor<2x4xf64, #DCSR>, return %0 : tensor } -// CHECK-LABEL: @concat_sparse_sparse_dense( -// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<2x4xf64, #sparse_tensor -// CHECK-SAME: %[[TMP_arg1:.*]]: tensor<3x4xf64, #sparse_tensor -// CHECK-SAME: %[[TMP_arg2:.*]]: tensor<4x4xf64, #sparse_tensor -// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[TMP_c5:.*]] = arith.constant 5 : index -// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[TMP_c9:.*]] = arith.constant 9 : index -// CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[TMP_d0:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK: %[[A:.*]] = memref.alloc(%[[TMP_c9]], %[[TMP_c4]]) : memref -// CHECK: linalg.fill ins(%[[TMP_d0]] : f64) outs(%[[A]] : memref) -// CHECK: %[[TMP_1:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_2:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_3:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_4:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_1]][%[[TMP_c0]]] : memref -// CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_1]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] -// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_2]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_3]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index -// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_3]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] -// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_5]][%[[TMP_arg4]]] : memref -// CHECK: memref.store %[[TMP_28]], %[[A]]{{\[}}%[[TMP_23]], %[[TMP_27]]] : memref -// CHECK: } -// CHECK: } -// CHECK: %[[TMP_8:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_9:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_10:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_11:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref -// CHECK: %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] -// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_9]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_10]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index -// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_10]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] -// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_12]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index -// CHECK: memref.store %[[TMP_28]], %[[A]]{{\[}}%[[TMP_29]], %[[TMP_27]]] : memref -// CHECK: } -// CHECK: } -// CHECK: %[[TMP_15:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_16:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_17:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_18:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_20:.*]] = memref.load %[[TMP_15]][%[[TMP_c0]]] : memref -// CHECK: %[[TMP_21:.*]] = memref.load %[[TMP_15]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] -// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_16]][%[[TMP_arg3]]] : memref -// CHECK: %[[TMP_25:.*]] = memref.load %[[TMP_17]][%[[TMP_arg3]]] : memref -// CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index -// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_17]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] -// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_19]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c5]] : index -// CHECK: memref.store %[[TMP_28]], %[[A]]{{\[}}%[[TMP_29]], %[[TMP_27]]] : memref -// CHECK: } -// CHECK: } -// CHECK: %[[R:.*]] = bufferization.to_tensor %[[A]] : memref -// CHECK: return %[[R]] : tensor +// CHECK-LABEL: func.func @concat_sparse_sparse_dense( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<2x4xf64, #sparse_tensor +// CHECK-SAME: %[[VAL_1:.*]]: tensor<3x4xf64, #sparse_tensor +// CHECK-SAME: %[[VAL_2:.*]]: tensor<4x4xf64, #sparse_tensor +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 4 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 9 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 5 : index +// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_10:.*]] = bufferization.alloc_tensor(%[[VAL_4]], %[[VAL_3]]) : tensor +// CHECK: %[[VAL_11:.*]] = linalg.fill ins(%[[VAL_6]] : f64) outs(%[[VAL_10]] : tensor) -> tensor +// CHECK: %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_7]]] : memref +// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_8]]] : memref +// CHECK: %[[VAL_19:.*]] = scf.for %[[VAL_20:.*]] = %[[VAL_17]] to %[[VAL_18]] step %[[VAL_8]] iter_args(%[[VAL_21:.*]] = %[[VAL_11]]) -> (tensor) { +// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_20]]] : memref +// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_20]]] : memref +// CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_20]], %[[VAL_8]] : index +// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_24]]] : memref +// CHECK: %[[VAL_26:.*]] = scf.for %[[VAL_27:.*]] = %[[VAL_23]] to %[[VAL_25]] step %[[VAL_8]] iter_args(%[[VAL_28:.*]] = %[[VAL_21]]) -> (tensor) { +// CHECK: %[[VAL_29:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_27]]] : memref +// CHECK: %[[VAL_30:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_27]]] : memref +// CHECK: %[[VAL_31:.*]] = tensor.insert %[[VAL_30]] into %[[VAL_28]]{{\[}}%[[VAL_22]], %[[VAL_29]]] : tensor +// CHECK: scf.yield %[[VAL_31]] : tensor +// CHECK: } +// CHECK: scf.yield %[[VAL_26]] : tensor +// CHECK: } +// CHECK: %[[VAL_32:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[VAL_33:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[VAL_34:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[VAL_35:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[VAL_36:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[VAL_37:.*]] = memref.load %[[VAL_32]]{{\[}}%[[VAL_7]]] : memref +// CHECK: %[[VAL_38:.*]] = memref.load %[[VAL_32]]{{\[}}%[[VAL_8]]] : memref +// CHECK: %[[VAL_39:.*]] = scf.for %[[VAL_40:.*]] = %[[VAL_37]] to %[[VAL_38]] step %[[VAL_8]] iter_args(%[[VAL_41:.*]] = %[[VAL_19]]) -> (tensor) { +// CHECK: %[[VAL_42:.*]] = memref.load %[[VAL_33]]{{\[}}%[[VAL_40]]] : memref +// CHECK: %[[VAL_43:.*]] = memref.load %[[VAL_34]]{{\[}}%[[VAL_40]]] : memref +// CHECK: %[[VAL_44:.*]] = arith.addi %[[VAL_40]], %[[VAL_8]] : index +// CHECK: %[[VAL_45:.*]] = memref.load %[[VAL_34]]{{\[}}%[[VAL_44]]] : memref +// CHECK: %[[VAL_46:.*]] = scf.for %[[VAL_47:.*]] = %[[VAL_43]] to %[[VAL_45]] step %[[VAL_8]] iter_args(%[[VAL_48:.*]] = %[[VAL_41]]) -> (tensor) { +// CHECK: %[[VAL_49:.*]] = memref.load %[[VAL_35]]{{\[}}%[[VAL_47]]] : memref +// CHECK: %[[VAL_50:.*]] = memref.load %[[VAL_36]]{{\[}}%[[VAL_47]]] : memref +// CHECK: %[[VAL_51:.*]] = arith.addi %[[VAL_42]], %[[VAL_9]] : index +// CHECK: %[[VAL_52:.*]] = tensor.insert %[[VAL_50]] into %[[VAL_48]]{{\[}}%[[VAL_51]], %[[VAL_49]]] : tensor +// CHECK: scf.yield %[[VAL_52]] : tensor +// CHECK: } +// CHECK: scf.yield %[[VAL_46]] : tensor +// CHECK: } +// CHECK: %[[VAL_53:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[VAL_54:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[VAL_55:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[VAL_56:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[VAL_57:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[VAL_58:.*]] = memref.load %[[VAL_53]]{{\[}}%[[VAL_7]]] : memref +// CHECK: %[[VAL_59:.*]] = memref.load %[[VAL_53]]{{\[}}%[[VAL_8]]] : memref +// CHECK: %[[VAL_60:.*]] = scf.for %[[VAL_61:.*]] = %[[VAL_58]] to %[[VAL_59]] step %[[VAL_8]] iter_args(%[[VAL_62:.*]] = %[[VAL_39]]) -> (tensor) { +// CHECK: %[[VAL_63:.*]] = memref.load %[[VAL_54]]{{\[}}%[[VAL_61]]] : memref +// CHECK: %[[VAL_64:.*]] = memref.load %[[VAL_55]]{{\[}}%[[VAL_61]]] : memref +// CHECK: %[[VAL_65:.*]] = arith.addi %[[VAL_61]], %[[VAL_8]] : index +// CHECK: %[[VAL_66:.*]] = memref.load %[[VAL_55]]{{\[}}%[[VAL_65]]] : memref +// CHECK: %[[VAL_67:.*]] = scf.for %[[VAL_68:.*]] = %[[VAL_64]] to %[[VAL_66]] step %[[VAL_8]] iter_args(%[[VAL_69:.*]] = %[[VAL_62]]) -> (tensor) { +// CHECK: %[[VAL_70:.*]] = memref.load %[[VAL_56]]{{\[}}%[[VAL_68]]] : memref +// CHECK: %[[VAL_71:.*]] = memref.load %[[VAL_57]]{{\[}}%[[VAL_68]]] : memref +// CHECK: %[[VAL_72:.*]] = arith.addi %[[VAL_63]], %[[VAL_5]] : index +// CHECK: %[[VAL_73:.*]] = tensor.insert %[[VAL_71]] into %[[VAL_69]]{{\[}}%[[VAL_72]], %[[VAL_70]]] : tensor +// CHECK: scf.yield %[[VAL_73]] : tensor +// CHECK: } +// CHECK: scf.yield %[[VAL_67]] : tensor +// CHECK: } +// CHECK: return %[[VAL_60]] : tensor +// CHECK: } func.func @concat_sparse_sparse_dense(%arg0: tensor<2x4xf64, #DCSR>, %arg1: tensor<3x4xf64, #DCSR>, %arg2: tensor<4x4xf64, #DCSR>) From 31512811b8c0f8fd328fba585640992c39218f1e Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Tue, 17 Oct 2023 20:46:01 +0200 Subject: [PATCH 372/720] [clang-tidy] Add check to diagnose coroutine-hostile RAII objects (#68738) This check detects **hostile-RAII** objects which should not **persist across a suspension point in a coroutine**. Some objects require that they be destroyed on the same thread that created them. Traditionally this requirement was often phrased as "must be a local variable", under the assumption that local variables always work this way. However this is incorrect with **C++20 coroutines**, since an intervening `co_await` may cause the coroutine to suspend and later be resumed on another thread. The lifetime of an object that requires being destroyed on the same thread must not encompass a `co_await` or `co_yield` point. If you create/destroy an object, you must do so without allowing the coroutine to suspend in the meantime. The check considers the following type as hostile: - **Scoped-lockable types**: A scoped-lockable object persisting across a suspension point is problematic as the lock held by this object could be unlocked by a different thread. This would be undefined behaviour. - Types belonging to a configurable **denylist**. ```cpp // Call some async API while holding a lock. const my::MutexLock l(&mu_); // Oops! The async Bar function may finish on a different // thread from the one that created the MutexLock object and therefore called // Mutex::Lock -- now Mutex::Unlock will be called on the wrong thread. co_await Bar(); ``` --- .../clang-tidy/misc/CMakeLists.txt | 1 + .../misc/CoroutineHostileRAIICheck.cpp | 98 +++++++++ .../misc/CoroutineHostileRAIICheck.h | 50 +++++ .../clang-tidy/misc/MiscTidyModule.cpp | 3 + clang-tools-extra/docs/ReleaseNotes.rst | 7 + .../docs/clang-tidy/checks/list.rst | 1 + .../checks/misc/coroutine-hostile-raii.rst | 50 +++++ .../checkers/misc/coroutine-hostile-raii.cpp | 192 ++++++++++++++++++ 8 files changed, 402 insertions(+) create mode 100644 clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp create mode 100644 clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h create mode 100644 clang-tools-extra/docs/clang-tidy/checks/misc/coroutine-hostile-raii.rst create mode 100644 clang-tools-extra/test/clang-tidy/checkers/misc/coroutine-hostile-raii.cpp diff --git a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt index 2e88e68a54478..d9ec268650c05 100644 --- a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt @@ -18,6 +18,7 @@ add_custom_target(genconfusable DEPENDS Confusables.inc) add_clang_library(clangTidyMiscModule ConstCorrectnessCheck.cpp + CoroutineHostileRAIICheck.cpp DefinitionsInHeadersCheck.cpp ConfusableIdentifierCheck.cpp HeaderIncludeCycleCheck.cpp diff --git a/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp new file mode 100644 index 0000000000000..e820cd39d83d2 --- /dev/null +++ b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp @@ -0,0 +1,98 @@ +//===--- CoroutineHostileRAII.cpp - clang-tidy ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CoroutineHostileRAIICheck.h" +#include "../utils/OptionsUtils.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/Type.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/ASTMatchers/ASTMatchersInternal.h" +#include "clang/Basic/AttrKinds.h" +#include "clang/Basic/DiagnosticIDs.h" + +using namespace clang::ast_matchers; +namespace clang::tidy::misc { +namespace { +using clang::ast_matchers::internal::BoundNodesTreeBuilder; + +AST_MATCHER_P(Stmt, forEachPrevStmt, ast_matchers::internal::Matcher, + InnerMatcher) { + DynTypedNode P; + bool IsHostile = false; + for (const Stmt *Child = &Node; Child; Child = P.get()) { + auto Parents = Finder->getASTContext().getParents(*Child); + if (Parents.empty()) + break; + P = *Parents.begin(); + auto *PCS = P.get(); + if (!PCS) + continue; + for (const auto &Sibling : PCS->children()) { + // Child contains suspension. Siblings after Child do not persist across + // this suspension. + if (Sibling == Child) + break; + // In case of a match, add the bindings as a separate match. Also don't + // clear the bindings if a match is not found (unlike Matcher::matches). + BoundNodesTreeBuilder SiblingBuilder; + if (InnerMatcher.matches(*Sibling, Finder, &SiblingBuilder)) { + Builder->addMatch(SiblingBuilder); + IsHostile = true; + } + } + } + return IsHostile; +} +} // namespace + +CoroutineHostileRAIICheck::CoroutineHostileRAIICheck(StringRef Name, + ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + RAIITypesList(utils::options::parseStringList( + Options.get("RAIITypesList", "std::lock_guard;std::scoped_lock"))) {} + +void CoroutineHostileRAIICheck::registerMatchers(MatchFinder *Finder) { + // A suspension happens with co_await or co_yield. + auto ScopedLockable = varDecl(hasType(hasCanonicalType(hasDeclaration( + hasAttr(attr::Kind::ScopedLockable))))) + .bind("scoped-lockable"); + auto OtherRAII = varDecl(hasType(hasCanonicalType(hasDeclaration( + namedDecl(hasAnyName(RAIITypesList)))))) + .bind("raii"); + Finder->addMatcher(expr(anyOf(coawaitExpr(), coyieldExpr()), + forEachPrevStmt(declStmt(forEach( + varDecl(anyOf(ScopedLockable, OtherRAII)))))) + .bind("suspension"), + this); +} + +void CoroutineHostileRAIICheck::check(const MatchFinder::MatchResult &Result) { + if (const auto *VD = Result.Nodes.getNodeAs("scoped-lockable")) + diag(VD->getLocation(), + "%0 holds a lock across a suspension point of coroutine and could be " + "unlocked by a different thread") + << VD; + if (const auto *VD = Result.Nodes.getNodeAs("raii")) + diag(VD->getLocation(), + "%0 persists across a suspension point of coroutine") + << VD; + if (const auto *Suspension = Result.Nodes.getNodeAs("suspension")) + diag(Suspension->getBeginLoc(), "suspension point is here", + DiagnosticIDs::Note); +} + +void CoroutineHostileRAIICheck::storeOptions( + ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "RAIITypesList", + utils::options::serializeStringList(RAIITypesList)); +} +} // namespace clang::tidy::misc diff --git a/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h new file mode 100644 index 0000000000000..a5e9cb89ef676 --- /dev/null +++ b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h @@ -0,0 +1,50 @@ +//===--- CoroutineHostileRAIICheck.h - clang-tidy ----------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_COROUTINESHOSTILERAIICHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_COROUTINESHOSTILERAIICHECK_H + +#include "../ClangTidyCheck.h" +#include "clang/AST/ASTTypeTraits.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "llvm/ADT/StringRef.h" +#include + +namespace clang::tidy::misc { + +/// Detects when objects of certain hostile RAII types persists across +/// suspension points in a coroutine. Such hostile types include scoped-lockable +/// types and types belonging to a configurable denylist. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/misc/coroutine-hostile-raii.html +class CoroutineHostileRAIICheck : public ClangTidyCheck { +public: + CoroutineHostileRAIICheck(llvm::StringRef Name, ClangTidyContext *Context); + + bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { + return LangOpts.CPlusPlus20; + } + + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + + std::optional getCheckTraversalKind() const override { + return TK_AsIs; + } + +private: + // List of fully qualified types which should not persist across a suspension + // point in a coroutine. + std::vector RAIITypesList; +}; + +} // namespace clang::tidy::misc + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_COROUTINESHOSTILERAIICHECK_H diff --git a/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp b/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp index 92590506e1ec1..d8a88324ee63e 100644 --- a/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp @@ -11,6 +11,7 @@ #include "../ClangTidyModuleRegistry.h" #include "ConfusableIdentifierCheck.h" #include "ConstCorrectnessCheck.h" +#include "CoroutineHostileRAIICheck.h" #include "DefinitionsInHeadersCheck.h" #include "HeaderIncludeCycleCheck.h" #include "IncludeCleanerCheck.h" @@ -41,6 +42,8 @@ class MiscModule : public ClangTidyModule { "misc-confusable-identifiers"); CheckFactories.registerCheck( "misc-const-correctness"); + CheckFactories.registerCheck( + "misc-coroutine-hostile-raii"); CheckFactories.registerCheck( "misc-definitions-in-headers"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index af164d0462d52..3e1fbe091c9ff 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -163,6 +163,13 @@ New checks Flags coroutines that suspend while a lock guard is in scope at the suspension point. +- New :doc:`misc-coroutine-hostile-raii + ` check. + + Detects when objects of certain hostile RAII types persists across suspension + points in a coroutine. Such hostile types include scoped-lockable types and + types belonging to a configurable denylist. + - New :doc:`modernize-use-constraints ` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 2125ebd7a213c..819e3974e3f13 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -241,6 +241,7 @@ Clang-Tidy Checks :doc:`llvmlibc-restrict-system-libc-headers `, "Yes" :doc:`misc-confusable-identifiers `, :doc:`misc-const-correctness `, "Yes" + :doc:`misc-coroutine-hostile-raii `_, :doc:`misc-definitions-in-headers `, "Yes" :doc:`misc-header-include-cycle `, :doc:`misc-include-cleaner `, "Yes" diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc/coroutine-hostile-raii.rst b/clang-tools-extra/docs/clang-tidy/checks/misc/coroutine-hostile-raii.rst new file mode 100644 index 0000000000000..dcb9f399774cb --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/misc/coroutine-hostile-raii.rst @@ -0,0 +1,50 @@ +.. title:: clang-tidy - misc-coroutine-hostile-raii + +misc-coroutine-hostile-raii +==================== + +Detects when objects of certain hostile RAII types persists across suspension +points in a coroutine. Such hostile types include scoped-lockable types and +types belonging to a configurable denylist. + +Some objects require that they be destroyed on the same thread that created them. +Traditionally this requirement was often phrased as "must be a local variable", +under the assumption that local variables always work this way. However this is +incorrect with C++20 coroutines, since an intervening ``co_await`` may cause the +coroutine to suspend and later be resumed on another thread. + +The lifetime of an object that requires being destroyed on the same thread must +not encompass a ``co_await`` or ``co_yield`` point. If you create/destroy an object, +you must do so without allowing the coroutine to suspend in the meantime. + +Following types are considered as hostile: + + - Scoped-lockable types: A scoped-lockable object persisting across a suspension + point is problematic as the lock held by this object could be unlocked by a + different thread. This would be undefined behaviour. + This includes all types annotated with the ``scoped_lockable`` attribute. + + - Types belonging to a configurable denylist. + +.. code-block:: c++ + + // Call some async API while holding a lock. + { + const my::MutexLock l(&mu_); + + // Oops! The async Bar function may finish on a different + // thread from the one that created the MutexLock object and therefore called + // Mutex::Lock -- now Mutex::Unlock will be called on the wrong thread. + co_await Bar(); + } + + +Options +------- + +.. option:: RAIITypesList + + A semicolon-separated list of qualified types which should not be allowed to + persist across suspension points. + Eg: ``my::lockable; a::b;::my::other::lockable;`` + The default value of this option is `"std::lock_guard;std::scoped_lock"`. \ No newline at end of file diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/coroutine-hostile-raii.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/coroutine-hostile-raii.cpp new file mode 100644 index 0000000000000..2d022e21c85d5 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/coroutine-hostile-raii.cpp @@ -0,0 +1,192 @@ +// RUN: %check_clang_tidy -std=c++20 %s misc-coroutine-hostile-raii %t \ +// RUN: -config="{CheckOptions: \ +// RUN: {misc-coroutine-hostile-raii.RAIITypesList: \ +// RUN: 'my::Mutex; ::my::other::Mutex'}}" + +namespace std { + +template struct coroutine_traits { + using promise_type = typename R::promise_type; +}; + +template struct coroutine_handle; + +template <> struct coroutine_handle { + static coroutine_handle from_address(void *addr) noexcept { + coroutine_handle me; + me.ptr = addr; + return me; + } + void operator()() { resume(); } + void *address() const noexcept { return ptr; } + void resume() const { } + void destroy() const { } + bool done() const { return true; } + coroutine_handle &operator=(decltype(nullptr)) { + ptr = nullptr; + return *this; + } + coroutine_handle(decltype(nullptr)) : ptr(nullptr) {} + coroutine_handle() : ptr(nullptr) {} + // void reset() { ptr = nullptr; } // add to P0057? + explicit operator bool() const { return ptr; } + +protected: + void *ptr; +}; + +template struct coroutine_handle : coroutine_handle<> { + using coroutine_handle<>::operator=; + + static coroutine_handle from_address(void *addr) noexcept { + coroutine_handle me; + me.ptr = addr; + return me; + } + + Promise &promise() const { + return *reinterpret_cast( + __builtin_coro_promise(ptr, alignof(Promise), false)); + } + static coroutine_handle from_promise(Promise &promise) { + coroutine_handle p; + p.ptr = __builtin_coro_promise(&promise, alignof(Promise), true); + return p; + } +}; + +struct suspend_always { + bool await_ready() noexcept { return false; } + void await_suspend(std::coroutine_handle<>) noexcept {} + void await_resume() noexcept {} +}; +} // namespace std + +struct ReturnObject { + struct promise_type { + ReturnObject get_return_object() { return {}; } + std::suspend_always initial_suspend() { return {}; } + std::suspend_always final_suspend() noexcept { return {}; } + void unhandled_exception() {} + std::suspend_always yield_value(int value) { return {}; } + }; +}; + +#define SCOPED_LOCKABLE __attribute__ ((scoped_lockable)) + +namespace absl { +class SCOPED_LOCKABLE Mutex {}; +using Mutex2 = Mutex; +} // namespace absl + +ReturnObject BasicWarning() { + absl::Mutex mtx; + // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: 'mtx' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + int no_warning; + { + co_yield 1; + // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here + } +} + +ReturnObject BasicNoWarning() { + co_yield 1; + { absl::Mutex no_warning; } + int no_warning; + { + co_yield 1; + absl::Mutex no_warning; + } + co_yield 1; +} + +ReturnObject scopedLockableTest() { + co_yield 0; + absl::Mutex a; + // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 'a' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + absl::Mutex2 b; + // CHECK-MESSAGES: :[[@LINE-1]]:18: warning: 'b' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + { + absl::Mutex no_warning_1; + { absl::Mutex no_warning_2; } + } + + co_yield 1; + // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here + absl::Mutex c; + // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 'c' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + co_await std::suspend_always{}; + // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here + for(int i=1; i<=10; ++i ) { + absl::Mutex d; + // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: 'd' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + co_await std::suspend_always{}; + // CHECK-MESSAGES: :[[@LINE-1]]:7: note: suspension point is here + co_yield 1; + absl::Mutex no_warning_3; + } + if (true) { + absl::Mutex e; + // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: 'e' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + co_yield 1; + // CHECK-MESSAGES: :[[@LINE-1]]:7: note: suspension point is here + absl::Mutex no_warning_4; + } + absl::Mutex no_warning_5; +} + +void lambda() { + absl::Mutex no_warning; + auto lambda = []() -> ReturnObject { + co_await std::suspend_always{}; + absl::Mutex a; + // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 'a' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + co_yield 1; + // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here + co_await std::suspend_always{}; + co_yield 1; + }; + absl::Mutex no_warning_2; +} + +template +ReturnObject raii_in_template(){ + T a; + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'a' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + co_yield 1; + // CHECK-MESSAGES: :[[@LINE-1]]:3: note: suspension point is here +} +void foo_template() { raii_in_template(); } + +namespace my { +class Mutex{}; +namespace other { +class Mutex{}; +} // namespace other + +using Mutex2 = Mutex; +} // namespace my + +ReturnObject denyListTest() { + my::Mutex a; + // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: 'a' persists across a suspension point of coroutine [misc-coroutine-hostile-raii] + my::other::Mutex b; + // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: 'b' persists across a suspension point of coroutine [misc-coroutine-hostile-raii] + my::Mutex2 c; + // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: 'c' persists across a suspension point of coroutine [misc-coroutine-hostile-raii] + co_yield 1; + // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here +} + +ReturnObject referenceTest(my::Mutex& ref) { + my::Mutex& a = ref; + co_yield 1; +} +ReturnObject pointerTest(my::Mutex* ref) { + my::Mutex* a = ref; + co_yield 1; +} + +ReturnObject functionArgTest(my::Mutex ref) { + co_yield 1; +} From e6d0b126c824222fca2f31a2ba571c2ee2bb4760 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Tue, 17 Oct 2023 20:53:42 +0200 Subject: [PATCH 373/720] Correctly compute conversion seq for args to fn with reversed param order (#68999) We associated conversion seq for args (when reversed) to the wrong index. This lead to clang believing reversed `operator==` a worse overload candidate than the `operator==` without reversed args when both these candidate were ambiguous. Fixes https://github.com/llvm/llvm-project/issues/53954 --- clang/docs/ReleaseNotes.rst | 2 ++ clang/lib/Sema/SemaOverload.cpp | 2 +- .../over.match.oper/p3-2a.cpp | 35 +++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 81cbfd90155fe..443325bb0d1e1 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -117,6 +117,8 @@ C++ Language Changes C++20 Feature Support ^^^^^^^^^^^^^^^^^^^^^ +- Fix a bug in conversion sequence of arguments to a function with reversed parameter order. + Fixes `GH `_. C++23 Feature Support ^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index ce78994e65538..c271cebb9eb63 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -7688,7 +7688,7 @@ bool Sema::CheckNonDependentConversions( QualType ParamType = ParamTypes[I + Offset]; if (!ParamType->isDependentType()) { unsigned ConvIdx = PO == OverloadCandidateParamOrder::Reversed - ? 0 + ? Args.size() - 1 - (ThisConversions + I) : (ThisConversions + I); Conversions[ConvIdx] = TryCopyInitialization(*this, Args[I], ParamType, diff --git a/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p3-2a.cpp b/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p3-2a.cpp index 5c6804eb7726b..02fe37dc1be50 100644 --- a/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p3-2a.cpp +++ b/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p3-2a.cpp @@ -324,6 +324,41 @@ bool x = X() == X(); // expected-warning {{ambiguous}} } } // namespace P2468R2 +namespace GH53954{ +namespace test1 { +struct P { + template + friend bool operator==(const P&, const T&); // expected-note {{candidate}} \ + // expected-note {{reversed parameter order}} +}; +struct A : public P {}; +struct B : public P {}; +bool check(A a, B b) { return a == b; } // expected-error {{ '==' is ambiguous}} +} + +namespace test2 { +struct P { + template + friend bool operator==(const T&, const P&); // expected-note {{candidate}} \ + // expected-note {{reversed parameter order}} +}; +struct A : public P {}; +struct B : public P {}; +bool check(A a, B b) { return a == b; } // expected-error {{ '==' is ambiguous}} +} + +namespace test3 { +struct P { + template + bool operator==(const S &) const; // expected-note {{candidate}} \ + // expected-note {{reversed parameter order}} +}; +struct A : public P {}; +struct B : public P {}; +bool check(A a, B b) { return a == b; } // expected-error {{ '==' is ambiguous}} +} +} + #else // NO_ERRORS namespace problem_cases { From fbf0a77e80f18a6d0fd8a28833b0bc87a99b1b2f Mon Sep 17 00:00:00 2001 From: Bill Wendling <5993918+bwendling@users.noreply.github.com> Date: Tue, 17 Oct 2023 12:03:26 -0700 Subject: [PATCH 374/720] [CodeGen] Avoid potential sideeffects from XOR (#67193) XOR may change flag values (e.g. for X86 gprs). In the case where that's not desirable, specify that buildClearRegister() should use MOV instead. --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 7 +++-- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 6 ++-- llvm/lib/Target/AArch64/AArch64InstrInfo.h | 4 +-- llvm/lib/Target/X86/X86InstrInfo.cpp | 33 ++++++++++++++------ llvm/lib/Target/X86/X86InstrInfo.h | 4 +-- 5 files changed, 36 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 14e27abe882b0..6c3e02b2f5940 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -2093,10 +2093,13 @@ class TargetInstrInfo : public MCInstrInfo { "Target didn't implement TargetInstrInfo::insertOutlinedCall!"); } - /// Insert an architecture-specific instruction to clear a register. + /// Insert an architecture-specific instruction to clear a register. If you + /// need to avoid sideeffects (e.g. avoid XOR on x86, which sets EFLAGS), set + /// \p AllowSideEffects to \p false. virtual void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, - DebugLoc &DL) const { + DebugLoc &DL, + bool AllowSideEffects = true) const { llvm_unreachable( "Target didn't implement TargetInstrInfo::buildClearRegister!"); } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 05c79b610cb36..7dcf24c26e124 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -9134,13 +9134,15 @@ bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault( void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, - DebugLoc &DL) const { + DebugLoc &DL, + bool AllowSideEffects) const { const MachineFunction &MF = *MBB.getParent(); const AArch64Subtarget &STI = MF.getSubtarget(); const AArch64RegisterInfo &TRI = *STI.getRegisterInfo(); if (TRI.isGeneralPurposeRegister(MF, Reg)) { - BuildMI(MBB, Iter, DL, get(AArch64::MOVi64imm), Reg) + BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg) + .addImm(0) .addImm(0); } else if (STI.hasSVE()) { BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 4a40b2fa12215..a934103c90cbf 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -333,8 +333,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override; void buildClearRegister(Register Reg, MachineBasicBlock &MBB, - MachineBasicBlock::iterator Iter, - DebugLoc &DL) const override; + MachineBasicBlock::iterator Iter, DebugLoc &DL, + bool AllowSideEffects = true) const override; /// Returns the vector element size (B, H, S or D) of an SVE opcode. uint64_t getElementSizeForOpcode(unsigned Opc) const; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index f0c46419ab351..4c6854da0ada3 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -10130,27 +10130,36 @@ X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB, return It; } -void X86InstrInfo::buildClearRegister(Register Reg, - MachineBasicBlock &MBB, +void X86InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, - DebugLoc &DL) const { + DebugLoc &DL, + bool AllowSideEffects) const { const MachineFunction &MF = *MBB.getParent(); const X86Subtarget &ST = MF.getSubtarget(); const TargetRegisterInfo &TRI = getRegisterInfo(); if (ST.hasMMX() && X86::VR64RegClass.contains(Reg)) - // FIXME: Ignore MMX registers? + // FIXME: Should we ignore MMX registers? return; if (TRI.isGeneralPurposeRegister(MF, Reg)) { - BuildMI(MBB, Iter, DL, get(X86::XOR32rr), Reg) - .addReg(Reg, RegState::Undef) - .addReg(Reg, RegState::Undef); + // Convert register to the 32-bit version. Both 'movl' and 'xorl' clear the + // upper bits of a 64-bit register automagically. + Reg = getX86SubSuperRegister(Reg, 32); + + if (!AllowSideEffects) + // XOR affects flags, so use a MOV instead. + BuildMI(MBB, Iter, DL, get(X86::MOV32ri), Reg).addImm(0); + else + BuildMI(MBB, Iter, DL, get(X86::XOR32rr), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); } else if (X86::VR128RegClass.contains(Reg)) { // XMM# if (!ST.hasSSE1()) return; + // PXOR is safe to use because it doesn't affect flags. BuildMI(MBB, Iter, DL, get(X86::PXORrr), Reg) .addReg(Reg, RegState::Undef) .addReg(Reg, RegState::Undef); @@ -10159,6 +10168,7 @@ void X86InstrInfo::buildClearRegister(Register Reg, if (!ST.hasAVX()) return; + // VPXOR is safe to use because it doesn't affect flags. BuildMI(MBB, Iter, DL, get(X86::VPXORrr), Reg) .addReg(Reg, RegState::Undef) .addReg(Reg, RegState::Undef); @@ -10167,6 +10177,7 @@ void X86InstrInfo::buildClearRegister(Register Reg, if (!ST.hasAVX512()) return; + // VPXORY is safe to use because it doesn't affect flags. BuildMI(MBB, Iter, DL, get(X86::VPXORYrr), Reg) .addReg(Reg, RegState::Undef) .addReg(Reg, RegState::Undef); @@ -10178,9 +10189,11 @@ void X86InstrInfo::buildClearRegister(Register Reg, if (!ST.hasVLX()) return; - BuildMI(MBB, Iter, DL, get(ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr), Reg) - .addReg(Reg, RegState::Undef) - .addReg(Reg, RegState::Undef); + // KXOR is safe to use because it doesn't affect flags. + unsigned Op = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr; + BuildMI(MBB, Iter, DL, get(Op), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); } } diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 4d261a803421c..e1199e20c318e 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -583,8 +583,8 @@ class X86InstrInfo final : public X86GenInstrInfo { outliner::Candidate &C) const override; void buildClearRegister(Register Reg, MachineBasicBlock &MBB, - MachineBasicBlock::iterator Iter, - DebugLoc &DL) const override; + MachineBasicBlock::iterator Iter, DebugLoc &DL, + bool AllowSideEffects = true) const override; bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override; From ab91e05e48d9ea47b60858dc259bdbf00dfde7fa Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 17 Oct 2023 12:16:45 -0700 Subject: [PATCH 375/720] [mlgo] Fix tests post 760e7d0 --- .../MLRegAlloc/Inputs/reference-log-noml.txt | 72 +++++++++---------- .../Inputs/reference-prio-log-noml.txt | 12 ++-- .../MLRegAlloc/dev-mode-prio-logging.ll | 2 +- 3 files changed, 43 insertions(+), 43 deletions(-) diff --git a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt index 0c024ad2b2e1b..a5ccdde751ed5 100644 --- a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt +++ b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt @@ -16,8 +16,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7265065908432007,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.3333333432674408,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.23831403255462646,0.07943800836801529,0.07943800836801529,0.07943800836801529,0.9912577867507935,0.07069581001996994,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.7940031290054321,0.7908878326416016,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7352024912834167 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.01417447254061699,0.014231426641345024,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4279724359512329 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7939082384109497,0.7907436490058899,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7436708807945251 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.014218696393072605,0.014276761561632156,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4243086874485016 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -40,8 +40,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.0,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.0,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2404157966375351,0.08013860136270523,0.0,0.08013860136270523,1.0,0.07131929695606232,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08013860136270523 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.0,0.7908878326416016,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7940031290054321 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.0,0.014231426641345024,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01417447254061699 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.0,0.7907436490058899,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7939082384109497 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.0,0.014276761561632156,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014218696393072605 max_stage: 0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -64,8 +64,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.1666666716337204,0.3333333432674408,0.3333333432674408,0.3333333432674408,0.3333333432674408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.01989283785223961,0.02235277369618416,0.2813863754272461,0.02235277369618416,0.27892643213272095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9061526656150818,0.9591121673583984,0.7352024912834167,0.7908878326416016,0.7379283308982849,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6725077629089355 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.0737093985080719,0.01772311143577099,0.4279724359512329,0.014231426641345024,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4858442544937134 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9173259735107422,0.9647942781448364,0.7436708807945251,0.7907436490058899,0.7401107549667358,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6831487417221069 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.07275574654340744,0.017619721591472626,0.4243086874485016,0.014276761561632156,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.47955840826034546 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -88,8 +88,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.3333333432674408,0.0,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.9760092496871948,0.0,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.23831403255462646,0.07943800836801529,1.0,0.0,0.9912577867507935,0.07069581001996994,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07943800836801529 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.7352024912834167,0.0,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7908878326416016 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.4279724359512329,0.0,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014231426641345024 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7436708807945251,0.0,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7907436490058899 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.4243086874485016,0.0,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014276761561632156 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -112,8 +112,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.02235277369618416,0.2813863754272461,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01117638684809208 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.7352024912834167,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6693925261497498 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.4279724359512329,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00449750293046236 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7436708807945251,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6799841523170471 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.4243086874485016,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004439314361661673 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -136,8 +136,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.02235277369618416,0.2813863754272461,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01822916604578495 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.7352024912834167,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6662772297859192 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.4279724359512329,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008109557442367077 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7436708807945251,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6768196225166321 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.4243086874485016,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008004635572433472 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -160,8 +160,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688586473465,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.1666666716337204,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.3333333432674408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.01989283785223961,0.02235277369618416,0.2813863754272461,1.0,0.27892643213272095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2631579041481018 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9061526656150818,0.9591121673583984,0.7352024912834167,0.6725077629089355,0.7379283308982849,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6631619930267334 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.0737093985080719,0.01772311143577099,0.4279724359512329,0.4858442544937134,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07601386308670044 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9173259735107422,0.9647942781448364,0.7436708807945251,0.6831487417221069,0.7401107549667358,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.673655092716217 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.07275574654340744,0.017619721591472626,0.4243086874485016,0.47955840826034546,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07503040134906769 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -184,8 +184,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.3333333432674408,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.2724486181679993e-10 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.0,0.2813863754272461,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.7352024912834167,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9591121673583984 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.0,0.4279724359512329,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01772311143577099 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.7436708807945251,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9647942781448364 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.0,0.4243086874485016,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017619721591472626 max_stage: 0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -208,8 +208,8 @@ hint_weights_by_max: 1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853, start_bb_freq_by_max: 0.3333333432674408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.1666666716337204,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204 end_bb_freq_by_max: 0.9760092496871948,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5110765099525452 hottest_bb_freq_by_max: 0.2813863754272461,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.2631579041481018,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.27892643213272095 -liverange_size: 0.7352024912834167,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.6631619930267334,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.46105918288230896 -use_def_density: 0.42606985569000244,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05081497132778168,0.07567594200372696,0.48368439078330994,0.9955543875694275,0.07338171452283859,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 +liverange_size: 0.7436708807945251,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.673655092716217,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4683544337749481 +use_def_density: 0.4243086874485016,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.07503040134906769,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.99146968126297 max_stage: 1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -232,8 +232,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7265065908432007, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.1666666716337204,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.2631579041481018,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.6631619930267334,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7352024912834167 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.07601386308670044,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4279724359512329 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.673655092716217,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7436708807945251 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.07503040134906769,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4243086874485016 max_stage: 0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -256,8 +256,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204,0.0,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948,0.0,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2631579041481018,0.0,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6631619930267334,0.0,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07601386308670044,0.0,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.673655092716217,0.0,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07503040134906769,0.0,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038 max_stage: 0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -280,8 +280,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,0.35764437913894653,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.482131917196483e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2631579041481018 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.811345100402832,0.7318435907363892,0.5088096261024475,0.7421572804450989,0.8143532276153564,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2819080352783203 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7674928903579712,0.05657143518328667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8106942772865295,0.7343682646751404,0.510564923286438,0.744717538356781,0.8068132996559143,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2828805446624756 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7775528430938721,0.05657143518328667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -304,8 +304,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.5,0.5,0.5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.5236390233039856,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.3283064365386963e-10 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.0,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01989283785223961 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.811345100402832,0.0,0.7318435907363892,0.5088096261024475,0.7421572804450989,0.8143532276153564,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.42606985569000244,0.0,0.07567594200372696,1.0,0.48368439078330994,0.9955543875694275,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07338171452283859 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8106942772865295,0.0,0.7343682646751404,0.510564923286438,0.744717538356781,0.8068132996559143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4243086874485016,0.0,0.07503040134906769,0.99146968126297,0.47955840826034546,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07275574654340744 max_stage: 0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -328,8 +328,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.9982500076293 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6522180438041687,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015647225081920624 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.2631579041481018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.8219112157821655,0.5714285969734192,0.8334941864013672,0.9145752787590027,0.31660231947898865,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9652509689331055 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7674928903579712,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016097404062747955 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.8283073902130127,0.575875461101532,0.8399805426597595,0.9100194573402405,0.3190661370754242,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9688715934753418 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7775528430938721,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016164302825927734 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 progress: 0.2222222238779068 @@ -352,8 +352,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.9 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.778997310048936e-10 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.2631579041481018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8797762989997864,0.7935694456100464,0.5517241358757019,0.8047530055046082,0.8830382227897644,0.30568498373031616,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7674928903579712,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008228360675275326 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8826290965080261,0.7995305061340332,0.5558685660362244,0.8107981085777283,0.8784037828445435,0.3079812228679657,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7775528430938721,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00826177466660738 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 progress: 0.1944444477558136 @@ -376,8 +376,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9434669613838196, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931,1.0,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.35764437913894653 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,1.0,1.482131917196483e-10,0.3333333432674408,0.6365708112716675,0.6365708112716675,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.778997310048936e-10 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8342907428741455,0.2898806929588318,1.0,0.5231993198394775,0.7631462812423706,0.8373839855194092,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9253203868865967 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,1.0,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7674928903579712,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015127303078770638 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8337028622627258,0.290909081697464,1.0,0.5250554084777832,0.7658536434173584,0.8297117352485657,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9250554442405701 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,1.0,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7775528430938721,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015127303078770638 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 progress: 0.1388888955116272 @@ -400,8 +400,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.07419288158416748 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.2631579041481018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9451456069946289,0.5747572779655457,0.8383495211601257,0.9199029207229614,0.3184466063976288,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9839805960655212 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.07900823652744293,0.7709200978279114,0.37288200855255127,0.7674928903579712,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.006958406884223223 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9521695971488953,0.5838264226913452,0.8515779376029968,0.922583818435669,0.32347139716148376,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9837278127670288 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.07900823652744293,0.7709200978279114,0.37288200855255127,0.7775528430938721,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.007072303909808397 max_stage: 0,0,0,0,0,0,0,0,0,4,4,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 progress: 0.1111111119389534 @@ -424,8 +424,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.07419288158416748 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.2631579041481018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9451456069946289,0.5747572779655457,0.8383495211601257,0.9199029207229614,0.3184466063976288,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9849514365196228 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.07900823652744293,0.7709200978279114,0.37288200855255127,0.7674928903579712,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.006951410323381424 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9521695971488953,0.5838264226913452,0.8515779376029968,0.922583818435669,0.32347139716148376,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9847140312194824 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.07900823652744293,0.7709200978279114,0.37288200855255127,0.7775528430938721,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.007065076380968094 max_stage: 0,0,0,0,0,0,0,0,0,4,4,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 progress: 0.0833333358168602 diff --git a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt index beb0c5205979c..01b4a3835c978 100644 --- a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt +++ b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt @@ -171,7 +171,7 @@ observation: 28 li_size: 0 stage: 0 weight: 0.0 -priority: 2147485184.0 +priority: 2147484928.0 reward: 0.0 observation: 29 li_size: 0 @@ -237,7 +237,7 @@ observation: 39 li_size: 0 stage: 0 weight: 0.0 -priority: 3598.0 +priority: 3534.0 reward: 0.0 observation: 40 li_size: 0 @@ -249,7 +249,7 @@ observation: 41 li_size: 0 stage: 0 weight: 0.0 -priority: 3582.0 +priority: 3518.0 reward: 0.0 observation: 42 li_size: 0 @@ -273,7 +273,7 @@ observation: 45 li_size: 0 stage: 0 weight: 0.0 -priority: 4078.0 +priority: 4046.0 reward: 0.0 observation: 46 li_size: 0 @@ -291,7 +291,7 @@ observation: 48 li_size: 0 stage: 0 weight: 0.0 -priority: 4384.0 +priority: 4304.0 reward: 0.0 observation: 49 li_size: 0 @@ -309,7 +309,7 @@ observation: 51 li_size: 0 stage: 0 weight: 0.0 -priority: 2684358144.0 +priority: 2684357888.0 reward: 0.0 observation: 52 li_size: 0 diff --git a/llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll b/llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll index 21bb75278874a..6b013b55df77a 100644 --- a/llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll +++ b/llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll @@ -24,5 +24,5 @@ ; CHECK-NOT: nan ; CHECK-LABEL: priority: ; NOML-SAME: 2684358144.0 -; ML-SAME: 3599 +; ML-SAME: 3535 ; CHECK-LABEL: reward: From f781508d319438d2c6d6bb264328b018c15b6946 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 17 Oct 2023 15:21:06 -0400 Subject: [PATCH 376/720] [gn] port dd64c82cbc9c6 --- llvm/utils/gn/secondary/llvm/test/BUILD.gn | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/test/BUILD.gn b/llvm/utils/gn/secondary/llvm/test/BUILD.gn index f859af249faf5..dd9fd0c10d53e 100644 --- a/llvm/utils/gn/secondary/llvm/test/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/test/BUILD.gn @@ -222,7 +222,10 @@ write_lit_config("lit_site_cfg") { write_lit_config("lit_unit_site_cfg") { input = "//llvm/test/Unit/lit.site.cfg.py.in" output = llvm_lit_unit_site_cfg_file - extra_values = [ "LLVM_BUILD_MODE=." ] + extra_values = [ + "LLVM_BUILD_MODE=.", + "LLVM_GTEST_RUN_UNDER=", + ] } # This target should contain all dependencies of check-llvm. From c0f3478934bec4a585cd1ed973a0ee39e0ceb7be Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 17 Oct 2023 19:22:25 +0000 Subject: [PATCH 377/720] [gn build] Port 31512811b8c0 --- .../gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn index 8a811bc990d41..36957f502c323 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn @@ -34,6 +34,7 @@ static_library("misc") { sources = [ "ConfusableIdentifierCheck.cpp", "ConstCorrectnessCheck.cpp", + "CoroutineHostileRAIICheck.cpp", "DefinitionsInHeadersCheck.cpp", "HeaderIncludeCycleCheck.cpp", "IncludeCleanerCheck.cpp", From 7dc644fc463a8f42f54d63a99c3a4579df2c3859 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 17 Oct 2023 12:31:34 -0700 Subject: [PATCH 378/720] [CodeGen] Temporary disable the unreachable It should be there, but we need all platforms that use stack protectors to implement it first. --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 6c3e02b2f5940..8e7499ac626a7 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -2100,8 +2100,12 @@ class TargetInstrInfo : public MCInstrInfo { MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects = true) const { +#if 0 + // FIXME: This should exist once all platforms that use stack protectors + // implements it. llvm_unreachable( "Target didn't implement TargetInstrInfo::buildClearRegister!"); +#endif } /// Return true if the function can safely be outlined from. From 389958a9f67ae35dde9c46205bb032842f0cad6a Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 17 Oct 2023 12:35:30 -0700 Subject: [PATCH 379/720] [CodeGen][NFC] Fix formatting This fixes the formatting introduced by fbf0a77e80f18a6d0fd8a28833b0bc87a99b1b2f. --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 7dcf24c26e124..7f1421549b149 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -9141,9 +9141,7 @@ void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB, const AArch64RegisterInfo &TRI = *STI.getRegisterInfo(); if (TRI.isGeneralPurposeRegister(MF, Reg)) { - BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg) - .addImm(0) - .addImm(0); + BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0); } else if (STI.hasSVE()) { BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg) .addImm(0) From 0996ceece605ccba3f4c0079e0204e3c0b068d0e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 17 Oct 2023 12:49:17 -0700 Subject: [PATCH 380/720] [ELF][test] Improve relocatable link & /DISCARD/ test Check that #69295 will fix symbols referenced by relocations that are defined in discarded sections. --- lld/test/ELF/linkerscript/discard-section.s | 22 ++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/lld/test/ELF/linkerscript/discard-section.s b/lld/test/ELF/linkerscript/discard-section.s index 9e021ac83f563..0ede36c7351f2 100644 --- a/lld/test/ELF/linkerscript/discard-section.s +++ b/lld/test/ELF/linkerscript/discard-section.s @@ -6,7 +6,27 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64 b.s -o b.o # RUN: ld.lld -T a.lds a.o b.o -z undefs -o /dev/null 2>&1 | count 0 # RUN: ld.lld -T a.lds a.o b.o -o /dev/null 2>&1 | count 0 -# RUN: ld.lld -r -T a.lds a.o b.o -o /dev/null 2>&1 | count 0 +# RUN: ld.lld -r -T a.lds a.o b.o -o a.ro 2>&1 | count 0 +# RUN: llvm-readelf -r -s a.ro | FileCheck %s --check-prefix=RELOC + +# RELOC: Relocation section '.rela.bbb' at offset {{.*}} contains 1 entries: +# RELOC-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +# RELOC-NEXT: 0000000000000000 0000000000000000 R_X86_64_NONE 0 +# RELOC-EMPTY: +# RELOC-NEXT: Relocation section '.rela.data' at offset {{.*}} contains 4 entries: +# RELOC-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +# RELOC-NEXT: 0000000000000000 0000000000000001 R_X86_64_64 0 +# RELOC-NEXT: 0000000000000008 0000000000000001 R_X86_64_64 0 +# RELOC-NEXT: 0000000000000010 0000000000000001 R_X86_64_64 0 +# RELOC-NEXT: 0000000000000018 0000000000000001 R_X86_64_64 0 + +# RELOC: Num: Value Size Type Bind Vis Ndx Name +# RELOC-NEXT: 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND +# RELOC-NEXT: 1: 0000000000000000 0 SECTION LOCAL DEFAULT 1 .text +# RELOC-NEXT: 2: 0000000000000000 0 SECTION LOCAL DEFAULT 2 .bbb +# RELOC-NEXT: 3: 0000000000000000 0 SECTION LOCAL DEFAULT 4 .data +# RELOC-NEXT: 4: 0000000000000000 0 NOTYPE GLOBAL DEFAULT 1 _start +# RELOC-EMPTY: #--- a.s .globl _start From 122064a6303eb9c06e0af231f5a4ce145d9a2e67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 17 Oct 2023 22:49:52 +0300 Subject: [PATCH 381/720] [libcxx] [test] Add a test parameter for disabling memory intensive tests (#68214) Specifically, the test std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp allocates a std::string with INT_MAX-1 elements, and then writes this to a std::stringstream. On Linux, running this test consumes around 5.0 GB of memory; on Windows, it ends up using up to 6.8 GB of memory. This limits whether such tests can run on e.g. GitHub Actions runners, where the free runners are limited to 8 GB of memory. This is somewhat similar to, but still notably different, from the existing test parameter long_tests. --- .../stringstream/stringstream.members/gcount.pass.cpp | 1 + libcxx/utils/libcxx/test/params.py | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp index 3a5edac6c58b4..8dc74421e7895 100644 --- a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: 32-bit-pointer +// REQUIRES: large_tests // Test that tellp() does not break the stringstream after INT_MAX, due to use // of pbump() that accept int. diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py index c3732560f5e46..e34fd0387f4f5 100644 --- a/libcxx/utils/libcxx/test/params.py +++ b/libcxx/utils/libcxx/test/params.py @@ -276,6 +276,14 @@ def getStdFlag(cfg, std): help="Whether to enable tests that take longer to run. This can be useful when running on a very slow device.", actions=lambda enabled: [] if not enabled else [AddFeature("long_tests")], ), + Parameter( + name="large_tests", + choices=[True, False], + type=bool, + default=True, + help="Whether to enable tests that use a lot of memory. This can be useful when running on a device with limited amounts of memory.", + actions=lambda enabled: [] if not enabled else [AddFeature("large_tests")], + ), Parameter( name="hardening_mode", choices=["unchecked", "hardened", "safe", "debug"], From 9922aadf9e9d1b9d10dd69882d8515757f127a91 Mon Sep 17 00:00:00 2001 From: Shraiysh Date: Tue, 17 Oct 2023 15:00:39 -0500 Subject: [PATCH 382/720] [OpenMPIRBuilder] Added `if` clause for `teams` (#69139) This patch adds support for the `if` clause on `teams` construct. The value of the argument must be an integer value. If the value evaluates to true (non-zero) integer, then the number of threads is determined by `num_threads` clause (or default and ICV if `num_threads` is absent). When the condition evaluates to false (zero), then the bounds are set to 1. ([OpenMP 5.2 Section 10.2](https://www.openmp.org/spec-html/5.2/openmpse58.html)) This essentially means that ``` upperbound = ifexpr ? upperbound : 1 lowerbound = ifexpr ? lowerbound : 1 ``` --- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 11 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 21 ++- .../Frontend/OpenMPIRBuilderTest.cpp | 146 +++++++++++++++++- 3 files changed, 165 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 9d2adf229b786..00b4707a7f820 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1923,11 +1923,12 @@ class OpenMPIRBuilder { /// \param NumTeamsUpper Upper bound on the number of teams. /// \param ThreadLimit on the number of threads that may participate in a /// contention group created by each team. - InsertPointTy createTeams(const LocationDescription &Loc, - BodyGenCallbackTy BodyGenCB, - Value *NumTeamsLower = nullptr, - Value *NumTeamsUpper = nullptr, - Value *ThreadLimit = nullptr); + /// \param IfExpr is the integer argument value of the if condition on the + /// teams clause. + InsertPointTy + createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, + Value *NumTeamsLower = nullptr, Value *NumTeamsUpper = nullptr, + Value *ThreadLimit = nullptr, Value *IfExpr = nullptr); /// Generate conditional branch and relevant BasicBlocks through which private /// threads copy the 'copyin' variables from Master copy to threadprivate diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index a658990f2d453..5b24e9fe2e0c5 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -5734,7 +5734,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare( OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower, - Value *NumTeamsUpper, Value *ThreadLimit) { + Value *NumTeamsUpper, Value *ThreadLimit, + Value *IfExpr) { if (!updateToLocation(Loc)) return InsertPointTy(); @@ -5773,7 +5774,7 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, splitBB(Builder, /*CreateBranch=*/true, "teams.alloca"); // Push num_teams - if (NumTeamsLower || NumTeamsUpper || ThreadLimit) { + if (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr) { assert((NumTeamsLower == nullptr || NumTeamsUpper != nullptr) && "if lowerbound is non-null, then upperbound must also be non-null " "for bounds on num_teams"); @@ -5784,6 +5785,22 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, if (NumTeamsLower == nullptr) NumTeamsLower = NumTeamsUpper; + if (IfExpr) { + assert(IfExpr->getType()->isIntegerTy() && + "argument to if clause must be an integer value"); + + // upper = ifexpr ? upper : 1 + if (IfExpr->getType() != Int1) + IfExpr = Builder.CreateICmpNE(IfExpr, + ConstantInt::get(IfExpr->getType(), 0)); + NumTeamsUpper = Builder.CreateSelect( + IfExpr, NumTeamsUpper, Builder.getInt32(1), "numTeamsUpper"); + + // lower = ifexpr ? lower : 1 + NumTeamsLower = Builder.CreateSelect( + IfExpr, NumTeamsLower, Builder.getInt32(1), "numTeamsLower"); + } + if (ThreadLimit == nullptr) ThreadLimit = Builder.getInt32(0); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index d770facc17302..97cfc339675f6 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -4033,7 +4033,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTeams) { }; OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); - Builder.restoreIP(OMPBuilder.createTeams(Builder, BodyGenCB)); + Builder.restoreIP(OMPBuilder.createTeams( + Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, /*NumTeamsUpper=*/nullptr, + /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr)); OMPBuilder.finalize(); Builder.CreateRetVoid(); @@ -4095,7 +4097,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithThreadLimit) { Builder.restoreIP(OMPBuilder.createTeams(/*=*/Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, /*NumTeamsUpper=*/nullptr, - /*ThreadLimit=*/F->arg_begin())); + /*ThreadLimit=*/F->arg_begin(), + /*IfExpr=*/nullptr)); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -4144,7 +4147,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsUpper) { // `num_teams` Builder.restoreIP(OMPBuilder.createTeams(Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, - /*NumTeamsUpper=*/F->arg_begin())); + /*NumTeamsUpper=*/F->arg_begin(), + /*ThreadLimit=*/nullptr, + /*IfExpr=*/nullptr)); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -4197,7 +4202,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsBoth) { // `F` already has an integer argument, so we use that as upper bound to // `num_teams` Builder.restoreIP( - OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper)); + OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper, + /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr)); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -4255,8 +4261,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsAndThreadLimit) { }; OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); - Builder.restoreIP(OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, - NumTeamsUpper, ThreadLimit)); + Builder.restoreIP(OMPBuilder.createTeams( + Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper, ThreadLimit, nullptr)); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -4284,6 +4290,134 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsAndThreadLimit) { OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)); } +TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfCondition) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> &Builder = OMPBuilder.Builder; + Builder.SetInsertPoint(BB); + + Value *IfExpr = Builder.CreateLoad(Builder.getInt1Ty(), + Builder.CreateAlloca(Builder.getInt1Ty())); + + Function *FakeFunction = + Function::Create(FunctionType::get(Builder.getVoidTy(), false), + GlobalValue::ExternalLinkage, "fakeFunction", M.get()); + + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + Builder.restoreIP(CodeGenIP); + Builder.CreateCall(FakeFunction, {}); + }; + + // `F` already has an integer argument, so we use that as upper bound to + // `num_teams` + Builder.restoreIP(OMPBuilder.createTeams( + Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, /*NumTeamsUpper=*/nullptr, + /*ThreadLimit=*/nullptr, IfExpr)); + + Builder.CreateRetVoid(); + OMPBuilder.finalize(); + + ASSERT_FALSE(verifyModule(*M)); + + CallInst *PushNumTeamsCallInst = + findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); + ASSERT_NE(PushNumTeamsCallInst, nullptr); + Value *NumTeamsLower = PushNumTeamsCallInst->getArgOperand(2); + Value *NumTeamsUpper = PushNumTeamsCallInst->getArgOperand(3); + Value *ThreadLimit = PushNumTeamsCallInst->getArgOperand(4); + + // Check the lower_bound + ASSERT_NE(NumTeamsLower, nullptr); + SelectInst *NumTeamsLowerSelectInst = dyn_cast(NumTeamsLower); + ASSERT_NE(NumTeamsLowerSelectInst, nullptr); + EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExpr); + EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), Builder.getInt32(0)); + EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1)); + + // Check the upper_bound + ASSERT_NE(NumTeamsUpper, nullptr); + SelectInst *NumTeamsUpperSelectInst = dyn_cast(NumTeamsUpper); + ASSERT_NE(NumTeamsUpperSelectInst, nullptr); + EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExpr); + EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), Builder.getInt32(0)); + EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1)); + + // Check thread_limit + EXPECT_EQ(ThreadLimit, Builder.getInt32(0)); +} + +TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfConditionAndNumTeams) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> &Builder = OMPBuilder.Builder; + Builder.SetInsertPoint(BB); + + Value *IfExpr = Builder.CreateLoad( + Builder.getInt32Ty(), Builder.CreateAlloca(Builder.getInt32Ty())); + Value *NumTeamsLower = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5)); + Value *NumTeamsUpper = + Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10)); + Value *ThreadLimit = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(20)); + + Function *FakeFunction = + Function::Create(FunctionType::get(Builder.getVoidTy(), false), + GlobalValue::ExternalLinkage, "fakeFunction", M.get()); + + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + Builder.restoreIP(CodeGenIP); + Builder.CreateCall(FakeFunction, {}); + }; + + // `F` already has an integer argument, so we use that as upper bound to + // `num_teams` + Builder.restoreIP(OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, + NumTeamsUpper, ThreadLimit, IfExpr)); + + Builder.CreateRetVoid(); + OMPBuilder.finalize(); + + ASSERT_FALSE(verifyModule(*M)); + + CallInst *PushNumTeamsCallInst = + findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); + ASSERT_NE(PushNumTeamsCallInst, nullptr); + Value *NumTeamsLowerArg = PushNumTeamsCallInst->getArgOperand(2); + Value *NumTeamsUpperArg = PushNumTeamsCallInst->getArgOperand(3); + Value *ThreadLimitArg = PushNumTeamsCallInst->getArgOperand(4); + + // Get the boolean conversion of if expression + ASSERT_EQ(IfExpr->getNumUses(), 1U); + User *IfExprInst = IfExpr->user_back(); + ICmpInst *IfExprCmpInst = dyn_cast(IfExprInst); + ASSERT_NE(IfExprCmpInst, nullptr); + EXPECT_EQ(IfExprCmpInst->getPredicate(), ICmpInst::Predicate::ICMP_NE); + EXPECT_EQ(IfExprCmpInst->getOperand(0), IfExpr); + EXPECT_EQ(IfExprCmpInst->getOperand(1), Builder.getInt32(0)); + + // Check the lower_bound + ASSERT_NE(NumTeamsLowerArg, nullptr); + SelectInst *NumTeamsLowerSelectInst = dyn_cast(NumTeamsLowerArg); + ASSERT_NE(NumTeamsLowerSelectInst, nullptr); + EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExprCmpInst); + EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), NumTeamsLower); + EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1)); + + // Check the upper_bound + ASSERT_NE(NumTeamsUpperArg, nullptr); + SelectInst *NumTeamsUpperSelectInst = dyn_cast(NumTeamsUpperArg); + ASSERT_NE(NumTeamsUpperSelectInst, nullptr); + EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExprCmpInst); + EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), NumTeamsUpper); + EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1)); + + // Check thread_limit + EXPECT_EQ(ThreadLimitArg, ThreadLimit); +} + /// Returns the single instruction of InstTy type in BB that uses the value V. /// If there is more than one such instruction, returns null. template From d4088e7d5f4849a4385a568b675d8c99c986d581 Mon Sep 17 00:00:00 2001 From: Yinying Li <107574043+yinying-lisa-li@users.noreply.github.com> Date: Tue, 17 Oct 2023 16:09:39 -0400 Subject: [PATCH 383/720] [mlir][sparse] Populate lvlToDim (#68937) Updates: 1. Infer lvlToDim from dimToLvl 2. Add more tests for block sparsity 3. Finish TODOs related to lvlToDim, including adding lvlToDim to python binding Verification of lvlToDim that user provides will be implemented in the next PR. --- mlir/include/mlir-c/Dialect/SparseTensor.h | 3 +- .../Dialect/SparseTensor/IR/SparseTensor.h | 13 ++++ .../SparseTensor/IR/SparseTensorAttrDefs.td | 3 + .../Bindings/Python/DialectSparseTensor.cpp | 17 +++- mlir/lib/CAPI/Dialect/SparseTensor.cpp | 7 +- .../SparseTensor/IR/SparseTensorDialect.cpp | 77 ++++++++++++++++++- mlir/test/CAPI/sparse_tensor.c | 5 +- .../SparseTensor/roundtrip_encoding.mlir | 52 +++++++++++++ .../Dialect/SparseTensor/python/test_SDDMM.py | 2 +- .../Dialect/SparseTensor/python/test_SpMM.py | 2 +- .../SparseTensor/python/test_output.py | 2 +- .../SparseTensor/python/test_stress.py | 2 +- .../python/dialects/sparse_tensor/dialect.py | 14 +++- 13 files changed, 177 insertions(+), 22 deletions(-) diff --git a/mlir/include/mlir-c/Dialect/SparseTensor.h b/mlir/include/mlir-c/Dialect/SparseTensor.h index 7e47e54e7361d..859a4f0dd9f52 100644 --- a/mlir/include/mlir-c/Dialect/SparseTensor.h +++ b/mlir/include/mlir-c/Dialect/SparseTensor.h @@ -51,11 +51,10 @@ MLIR_CAPI_EXPORTED bool mlirAttributeIsASparseTensorEncodingAttr(MlirAttribute attr); /// Creates a `sparse_tensor.encoding` attribute with the given parameters. -/// TODO: add a version that supplied lvlToDim when it cannot be inferred MLIR_CAPI_EXPORTED MlirAttribute mlirSparseTensorEncodingAttrGet( MlirContext ctx, intptr_t lvlRank, enum MlirSparseTensorDimLevelType const *lvlTypes, MlirAffineMap dimToLvl, - int posWidth, int crdWidth); + MlirAffineMap lvlTodim, int posWidth, int crdWidth); /// Returns the level-rank of the `sparse_tensor.encoding` attribute. MLIR_CAPI_EXPORTED intptr_t diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h index cbca0a7f8cc0e..6e834426b4417 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h @@ -160,6 +160,19 @@ inline bool hasAnySparseOperandOrResult(Operation *op) { return hasAnySparseOperand(op) || hasAnySparseResult(op); } +// +// Inference. +// + +/// Given the dimToLvl map, infers the lvlToDim map, or returns +/// empty Affine map when inference fails. +AffineMap inferLvlToDim(AffineMap dimToLvl, MLIRContext *context); + +/// Returns the lvlToDim map for the given dimToLvl map specific +/// to the block sparse cases. +/// Asserts on failure (so only use when known to succeed). +AffineMap inverseBlockSparsity(AffineMap dimToLvl, MLIRContext *context); + // // Reordering. // diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td index 38c7200afb41f..47fd18a689d5a 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td @@ -307,6 +307,9 @@ def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding", "AffineMap":$lvlToDim, "unsigned":$posWidth, "unsigned":$crdWidth), [{ + if (!lvlToDim) { + lvlToDim = ::mlir::sparse_tensor::inferLvlToDim(dimToLvl, $_ctxt); + } return $_get($_ctxt, lvlTypes, dimToLvl, lvlToDim, posWidth, crdWidth, ArrayRef<::mlir::sparse_tensor::SparseTensorDimSliceAttr>{}); }]> diff --git a/mlir/lib/Bindings/Python/DialectSparseTensor.cpp b/mlir/lib/Bindings/Python/DialectSparseTensor.cpp index 8e9e0b6baf76c..9bde3a443ecfe 100644 --- a/mlir/lib/Bindings/Python/DialectSparseTensor.cpp +++ b/mlir/lib/Bindings/Python/DialectSparseTensor.cpp @@ -41,16 +41,17 @@ static void populateDialectSparseTensorSubmodule(const py::module &m) { .def_classmethod( "get", [](py::object cls, std::vector lvlTypes, - std::optional dimToLvl, int posWidth, int crdWidth, + std::optional dimToLvl, + std::optional lvlToDim, int posWidth, int crdWidth, MlirContext context) { - // TODO: provide dimToLvl return cls(mlirSparseTensorEncodingAttrGet( context, lvlTypes.size(), lvlTypes.data(), - dimToLvl ? *dimToLvl : MlirAffineMap{nullptr}, posWidth, + dimToLvl ? *dimToLvl : MlirAffineMap{nullptr}, + lvlToDim ? *lvlToDim : MlirAffineMap{nullptr}, posWidth, crdWidth)); }, py::arg("cls"), py::arg("lvl_types"), py::arg("dim_to_lvl"), - py::arg("pos_width"), py::arg("crd_width"), + py::arg("lvl_to_dim"), py::arg("pos_width"), py::arg("crd_width"), py::arg("context") = py::none(), "Gets a sparse_tensor.encoding from parameters.") .def_property_readonly( @@ -71,6 +72,14 @@ static void populateDialectSparseTensorSubmodule(const py::module &m) { return {}; return ret; }) + .def_property_readonly( + "lvl_to_dim", + [](MlirAttribute self) -> std::optional { + MlirAffineMap ret = mlirSparseTensorEncodingAttrGetLvlToDim(self); + if (mlirAffineMapIsNull(ret)) + return {}; + return ret; + }) .def_property_readonly("pos_width", mlirSparseTensorEncodingAttrGetPosWidth) .def_property_readonly("crd_width", diff --git a/mlir/lib/CAPI/Dialect/SparseTensor.cpp b/mlir/lib/CAPI/Dialect/SparseTensor.cpp index bf3a4ad5e7a16..c3ad95527df48 100644 --- a/mlir/lib/CAPI/Dialect/SparseTensor.cpp +++ b/mlir/lib/CAPI/Dialect/SparseTensor.cpp @@ -48,15 +48,14 @@ bool mlirAttributeIsASparseTensorEncodingAttr(MlirAttribute attr) { MlirAttribute mlirSparseTensorEncodingAttrGet(MlirContext ctx, intptr_t lvlRank, MlirSparseTensorDimLevelType const *lvlTypes, - MlirAffineMap dimToLvl, int posWidth, - int crdWidth) { + MlirAffineMap dimToLvl, MlirAffineMap lvlToDim, + int posWidth, int crdWidth) { SmallVector cppLvlTypes; cppLvlTypes.reserve(lvlRank); for (intptr_t l = 0; l < lvlRank; ++l) cppLvlTypes.push_back(static_cast(lvlTypes[l])); - mlir::AffineMap lvlToDim; // TODO: provide in API return wrap(SparseTensorEncodingAttr::get(unwrap(ctx), cppLvlTypes, - unwrap(dimToLvl), lvlToDim, + unwrap(dimToLvl), unwrap(lvlToDim), posWidth, crdWidth)); } diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index cd1e585438dda..fd87bbfa905ed 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -293,9 +293,8 @@ Type SparseTensorEncodingAttr::getCrdType() const { SparseTensorEncodingAttr SparseTensorEncodingAttr::withDimToLvl(AffineMap dimToLvl) const { assert(getImpl() && "Uninitialized SparseTensorEncodingAttr"); - // TODO: infer lvlToDim return SparseTensorEncodingAttr::get(getContext(), getLvlTypes(), dimToLvl, - /*lvlToDim*/ AffineMap(), getPosWidth(), + getLvlToDim(), getPosWidth(), getCrdWidth()); } @@ -583,7 +582,8 @@ Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) { #undef RETURN_ON_FAIL // Construct struct-like storage for attribute. - AffineMap lvlToDim; // TODO: infer + // TODO: Fetch lvlToDim if user provides one + AffineMap lvlToDim = inferLvlToDim(dimToLvl, parser.getContext()); return parser.getChecked( parser.getContext(), lvlTypes, dimToLvl, lvlToDim, posWidth, crdWidth, dimSlices); @@ -749,6 +749,75 @@ mlir::sparse_tensor::getSparseTensorEncoding(Type type) { return nullptr; } +AffineMap mlir::sparse_tensor::inferLvlToDim(AffineMap dimToLvl, + MLIRContext *context) { + auto map = static_cast(dimToLvl); + AffineMap lvlToDim; + // Return an empty lvlToDim when inference is not successful. + if (!map || map.getNumSymbols() != 0) { + lvlToDim = AffineMap(); + } else if (map.isPermutation()) { + lvlToDim = inversePermutation(map); + } else { + // TODO: check if it's block sparsity + lvlToDim = inverseBlockSparsity(map, context); + } + return lvlToDim; +} + +AffineMap mlir::sparse_tensor::inverseBlockSparsity(AffineMap dimToLvl, + MLIRContext *context) { + SmallVector lvlExprs; + auto numLvls = dimToLvl.getNumResults(); + lvlExprs.reserve(numLvls); + // lvlExprComponents stores information of the floordiv and mod operations + // applied to the same dimension, so as to build the lvlToDim map. + std::map> lvlExprComponents; + for (unsigned i = 0, n = numLvls; i < n; i++) { + auto result = dimToLvl.getResult(i); + if (auto binOp = result.dyn_cast()) { + if (result.getKind() == AffineExprKind::FloorDiv) { + // Position of the dimension in dimToLvl. + auto pos = binOp.getLHS().dyn_cast().getPosition(); + assert(lvlExprComponents.find(pos) == lvlExprComponents.end() && + "expected only one floordiv for each dimension"); + SmallVector components; + // Level variable for floordiv. + components.push_back(getAffineDimExpr(i, context)); + // Multiplier. + components.push_back(binOp.getRHS()); + // Map key is the position of the dimension. + lvlExprComponents[pos] = components; + } else if (result.getKind() == AffineExprKind::Mod) { + auto pos = binOp.getLHS().dyn_cast().getPosition(); + assert(lvlExprComponents.find(pos) != lvlExprComponents.end() && + "expected floordiv before mod"); + // Add level variable for mod to the same vector + // of the corresponding floordiv. + lvlExprComponents[pos].push_back(getAffineDimExpr(i, context)); + } else { + assert(false && "expected floordiv or mod"); + } + } else { + lvlExprs.push_back(getAffineDimExpr(i, context)); + } + } + // Build lvlExprs from lvlExprComponents. + // For example, for il = i floordiv 2 and ii = i mod 2, the components + // would be [il, 2, ii]. It could be used to build the AffineExpr + // i = il * 2 + ii in lvlToDim. + for (auto &components : lvlExprComponents) { + assert(components.second.size() == 3 && + "expected 3 components to build lvlExprs"); + auto mulOp = getAffineBinaryOpExpr( + AffineExprKind::Mul, components.second[0], components.second[1]); + auto addOp = + getAffineBinaryOpExpr(AffineExprKind::Add, mulOp, components.second[2]); + lvlExprs.push_back(addOp); + } + return dimToLvl.get(dimToLvl.getNumResults(), 0, lvlExprs, context); +} + bool mlir::sparse_tensor::isCOOType(SparseTensorEncodingAttr enc, Level startLvl, bool isUnique) { if (!enc || @@ -811,7 +880,7 @@ RankedTensorType sparse_tensor::getCOOFromTypeWithOrdering(RankedTensorType rtt, // default value. unsigned posWidth = src.getPosWidth(); unsigned crdWidth = src.getCrdWidth(); - AffineMap invPerm; // TODO + AffineMap invPerm = src.getLvlToDim(); auto enc = SparseTensorEncodingAttr::get(src.getContext(), lvlTypes, lvlPerm, invPerm, posWidth, crdWidth); return RankedTensorType::get(src.getDimShape(), src.getElementType(), enc); diff --git a/mlir/test/CAPI/sparse_tensor.c b/mlir/test/CAPI/sparse_tensor.c index 33ee8e784096a..3bd1508cf299a 100644 --- a/mlir/test/CAPI/sparse_tensor.c +++ b/mlir/test/CAPI/sparse_tensor.c @@ -40,6 +40,8 @@ static int testRoundtripEncoding(MlirContext ctx) { // CHECK: level_type: 4 // CHECK: level_type: 8 // CHECK: level_type: 8 + MlirAffineMap lvlToDim = + mlirSparseTensorEncodingAttrGetLvlToDim(originalAttr); int lvlRank = mlirSparseTensorEncodingGetLvlRank(originalAttr); enum MlirSparseTensorDimLevelType *lvlTypes = malloc(sizeof(enum MlirSparseTensorDimLevelType) * lvlRank); @@ -53,9 +55,8 @@ static int testRoundtripEncoding(MlirContext ctx) { // CHECK: crdWidth: 64 int crdWidth = mlirSparseTensorEncodingAttrGetCrdWidth(originalAttr); fprintf(stderr, "crdWidth: %d\n", crdWidth); - // TODO: lvlToDim MlirAttribute newAttr = mlirSparseTensorEncodingAttrGet( - ctx, lvlRank, lvlTypes, dimToLvl, posWidth, crdWidth); + ctx, lvlRank, lvlTypes, dimToLvl, lvlToDim, posWidth, crdWidth); mlirAttributeDump(newAttr); // For debugging filecheck output. // CHECK: equal: 1 fprintf(stderr, "equal: %d\n", mlirAttributeEqual(originalAttr, newAttr)); diff --git a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir index ae3805d8b7741..ea8217ab6e3f2 100644 --- a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir @@ -160,6 +160,24 @@ func.func private @BSR(%arg0: tensor) { // ----- +#BCSR = #sparse_tensor.encoding<{ + map = ( i, j, k ) -> + ( i floordiv 2 : dense, + j floordiv 3 : dense, + k floordiv 4 : compressed, + i mod 2 : dense, + j mod 3 : dense, + k mod 4 : dense + ) +}> + +// CHECK-LABEL: func private @BCSR( +// CHECK-SAME: tensor (d0 floordiv 2 : dense, d1 floordiv 3 : dense, d2 floordiv 4 : compressed, d0 mod 2 : dense, d1 mod 3 : dense, d2 mod 4 : dense) }>> +func.func private @BCSR(%arg0: tensor) { + return +} +// ----- + #BSR_explicit = #sparse_tensor.encoding<{ map = {il, jl, ii, jj} @@ -194,3 +212,37 @@ func.func private @BSR_explicit(%arg0: tensor) { func.func private @NV_24(%arg0: tensor) { return } + +// ----- + +#NV_24 = #sparse_tensor.encoding<{ + map = ( i, j, k ) -> + ( i : dense, + j : dense, + k floordiv 4 : dense, + k mod 4 : block2_4 + ) +}> + +// CHECK-LABEL: func private @NV_24( +// CHECK-SAME: tensor (d0 : dense, d1 : dense, d2 floordiv 4 : dense, d2 mod 4 : block2_4) }>> +func.func private @NV_24(%arg0: tensor) { + return +} + +// ----- + +#NV_24 = #sparse_tensor.encoding<{ + map = ( i, j, k ) -> + ( i : dense, + k floordiv 4 : dense, + j : dense, + k mod 4 : block2_4 + ) +}> + +// CHECK-LABEL: func private @NV_24( +// CHECK-SAME: tensor (d0 : dense, d2 floordiv 4 : dense, d1 : dense, d2 mod 4 : block2_4) }>> +func.func private @NV_24(%arg0: tensor) { + return +} \ No newline at end of file diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_SDDMM.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_SDDMM.py index 0cdc7c88bd97f..1f9b636038318 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/python/test_SDDMM.py +++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_SDDMM.py @@ -155,7 +155,7 @@ def main(): for iwidth in [32]: for e in [True]: attr = st.EncodingAttr.get( - level, ordering, pwidth, iwidth + level, ordering, None, pwidth, iwidth ) opt = f"parallelization-strategy=none" compiler = sparse_compiler.SparseCompiler( diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py index 01d74a4dc82fa..69f6cdcea967f 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py +++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py @@ -145,7 +145,7 @@ def main(): for pwidth in bitwidths: for iwidth in bitwidths: attr = st.EncodingAttr.get( - level, ordering, pwidth, iwidth + level, ordering, None, pwidth, iwidth ) build_compile_and_run_SpMM(attr, compiler) count = count + 1 diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py index 8f3f4e5af1e58..7d77490080205 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py +++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py @@ -91,7 +91,7 @@ def main(): for level in levels: for ordering in orderings: for bwidth in bitwidths: - attr = st.EncodingAttr.get(level, ordering, bwidth, bwidth) + attr = st.EncodingAttr.get(level, ordering, None, bwidth, bwidth) build_compile_and_run_output(attr, compiler) count = count + 1 diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py index ef266672ce42a..841b02bc10c8b 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py +++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py @@ -227,7 +227,7 @@ def main(): for pwidth in bitwidths: for iwidth in bitwidths: attr = st.EncodingAttr.get( - level, ordering, pwidth, iwidth + level, ordering, None, pwidth, iwidth ) types.append(ir.RankedTensorType.get(shape, f64, attr)) # diff --git a/mlir/test/python/dialects/sparse_tensor/dialect.py b/mlir/test/python/dialects/sparse_tensor/dialect.py index d80b878323377..240db6ebd1d1e 100644 --- a/mlir/test/python/dialects/sparse_tensor/dialect.py +++ b/mlir/test/python/dialects/sparse_tensor/dialect.py @@ -32,12 +32,14 @@ def testEncodingAttr1D(): print(f"lvl_types: {casted.lvl_types}") # CHECK: dim_to_lvl: None print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: lvl_to_dim: None + print(f"lvl_to_dim: {casted.lvl_to_dim}") # CHECK: pos_width: 16 print(f"pos_width: {casted.pos_width}") # CHECK: crd_width: 32 print(f"crd_width: {casted.crd_width}") - created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) + created = st.EncodingAttr.get(casted.lvl_types, None, None, 0, 0) # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> print(created) # CHECK: created_equal: False @@ -72,12 +74,20 @@ def testEncodingAttr2D(): print(f"lvl_types: {casted.lvl_types}") # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: lvl_to_dim: (d0, d1) -> (d1, d0) + print(f"lvl_to_dim: {casted.lvl_to_dim}") # CHECK: pos_width: 8 print(f"pos_width: {casted.pos_width}") # CHECK: crd_width: 32 print(f"crd_width: {casted.crd_width}") - created = st.EncodingAttr.get(casted.lvl_types, casted.dim_to_lvl, 8, 32) + created = st.EncodingAttr.get( + casted.lvl_types, + casted.dim_to_lvl, + casted.lvl_to_dim, + 8, + 32, + ) # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> print(created) # CHECK: created_equal: True From e9b9a1d3202d86d9eb2b49c6463fde0f15f9dc94 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 17 Oct 2023 13:16:50 -0700 Subject: [PATCH 384/720] [ELF] Move demoteSymbols to Writer.cpp. NFC History of demoteSharedSymbols: * https://reviews.llvm.org/D45536 demotes SharedSymbol * https://reviews.llvm.org/D111365 demotes lazy symbols * The pending #69295 will demote symbols defined in discarded sections The pass is placed after markLive just to be clear that it needs `isNeeded` information computed by markLive. The remaining passes in Driver.cpp do not use symbol information. Move the pass to Writer.cpp to be closer to other symbol-related passes. --- lld/ELF/Driver.cpp | 19 ------------------- lld/ELF/Writer.cpp | 28 +++++++++++++++++++++++----- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index d082463d34e57..5f88389a58408 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -2248,24 +2248,6 @@ static void replaceCommonSymbols() { } } -// If all references to a DSO happen to be weak, the DSO is not added to -// DT_NEEDED. If that happens, replace ShardSymbol with Undefined to avoid -// dangling references to an unneeded DSO. Use a weak binding to avoid -// --no-allow-shlib-undefined diagnostics. Similarly, demote lazy symbols. -static void demoteSharedAndLazySymbols() { - llvm::TimeTraceScope timeScope("Demote shared and lazy symbols"); - for (Symbol *sym : symtab.getSymbols()) { - auto *s = dyn_cast(sym); - if (!(s && !cast(s->file)->isNeeded) && !sym->isLazy()) - continue; - - uint8_t binding = sym->isLazy() ? sym->binding : uint8_t(STB_WEAK); - Undefined(nullptr, sym->getName(), binding, sym->stOther, sym->type) - .overwrite(*sym); - sym->versionId = VER_NDX_GLOBAL; - } -} - // The section referred to by `s` is considered address-significant. Set the // keepUnique flag on the section if appropriate. static void markAddrsig(Symbol *s) { @@ -3023,7 +3005,6 @@ void LinkerDriver::link(opt::InputArgList &args) { // Garbage collection and removal of shared symbols from unused shared objects. invokeELFT(markLive,); - demoteSharedAndLazySymbols(); // Make copies of any input sections that need to be copied into each // partition. diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 5077c972658a1..5fc4412aa49f1 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -251,6 +251,23 @@ void elf::addReservedSymbols() { ElfSym::edata2 = add("_edata", -1); } +// If all references to a DSO happen to be weak, the DSO is not added to +// DT_NEEDED. If that happens, replace ShardSymbol with Undefined to avoid +// dangling references to an unneeded DSO. Use a weak binding to avoid +// --no-allow-shlib-undefined diagnostics. Similarly, demote lazy symbols. +static void demoteSymbols() { + llvm::TimeTraceScope timeScope("Demote symbols"); + for (Symbol *sym : symtab.getSymbols()) { + auto *s = dyn_cast(sym); + if (!(s && !cast(s->file)->isNeeded) && !sym->isLazy()) + continue; + uint8_t binding = sym->isLazy() ? sym->binding : uint8_t(STB_WEAK); + Undefined(nullptr, sym->getName(), binding, sym->stOther, sym->type) + .overwrite(*sym); + sym->versionId = VER_NDX_GLOBAL; + } +} + // Fully static executables don't support MTE globals at this point in time, as // we currently rely on: // - A dynamic loader to process relocations, and @@ -1935,12 +1952,13 @@ template void Writer::finalizeSections() { for (Partition &part : partitions) finalizeSynthetic(part.ehFrame.get()); } + } - if (config->hasDynSymTab) { - parallelForEach(symtab.getSymbols(), [](Symbol *sym) { - sym->isPreemptible = computeIsPreemptible(*sym); - }); - } + demoteSymbols(); + if (config->hasDynSymTab) { + parallelForEach(symtab.getSymbols(), [](Symbol *sym) { + sym->isPreemptible = computeIsPreemptible(*sym); + }); } // Change values of linker-script-defined symbols from placeholders (assigned From 3472d4d4c311bfed6fc316fb5834bfb50d409421 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 17 Oct 2023 13:50:01 -0700 Subject: [PATCH 385/720] [MLIR][Doc] Prepend "Variadic of" in front of variadic operands (#69285) Table of Operands for operations like: https://mlir.llvm.org/docs/Dialects/MemRef/#operands-6 Don't distinguish variadic ODS operands from others right now. After this change, it'll print: | Operand | Description | | dynamicSizes | Variadic of index | instead of: | Operand | Description | | dynamicSizes | index | --- flang/test/Fir/invalid.fir | 2 +- flang/test/HLFIR/invalid.fir | 4 ++-- mlir/include/mlir/IR/CommonTypeConstraints.td | 3 ++- mlir/test/Dialect/Affine/invalid.mlir | 2 +- mlir/test/Dialect/LLVMIR/invalid.mlir | 2 +- mlir/test/Dialect/Linalg/invalid.mlir | 2 +- mlir/test/IR/operand.mlir | 4 ++-- mlir/test/IR/result.mlir | 4 ++-- 8 files changed, 12 insertions(+), 11 deletions(-) diff --git a/flang/test/Fir/invalid.fir b/flang/test/Fir/invalid.fir index c3bfb6922deda..824aeec28b417 100644 --- a/flang/test/Fir/invalid.fir +++ b/flang/test/Fir/invalid.fir @@ -690,7 +690,7 @@ func.func @bad_array_modify(%arr1 : !fir.ref>, %m : index, % func.func @slice_must_be_integral() { %0 = arith.constant 42 : i32 %1 = fir.field_index field, !fir.type (%0 : i32) - // expected-error@+1 {{'fir.slice' op operand #0 must be any integer, but got '!fir.field'}} + // expected-error@+1 {{'fir.slice' op operand #0 must be variadic of any integer, but got '!fir.field'}} %2 = fir.slice %1, %1, %1 : (!fir.field, !fir.field, !fir.field) -> !fir.slice<1> return } diff --git a/flang/test/HLFIR/invalid.fir b/flang/test/HLFIR/invalid.fir index 49b6c1852b598..09165f09766b9 100644 --- a/flang/test/HLFIR/invalid.fir +++ b/flang/test/HLFIR/invalid.fir @@ -267,7 +267,7 @@ func.func @bad_concat(%arg0: !fir.ref>, %arg1: !fir.ref>>, %arg1: !fir.ref>>) { %c30 = arith.constant 30 : index - // expected-error@+1 {{'hlfir.concat' op operand #0 must be any character scalar type, but got '!fir.ref>>'}} + // expected-error@+1 {{'hlfir.concat' op operand #0 must be variadic of any character scalar type, but got '!fir.ref>>'}} %0 = hlfir.concat %arg0, %arg1 len %c30 : (!fir.ref>>, !fir.ref>>, index) -> (!hlfir.expr<100x!fir.char<1,30>>) return } @@ -275,7 +275,7 @@ func.func @bad_concat_2(%arg0: !fir.ref>>, %arg1: // ----- func.func @bad_concat_3(%arg0: !fir.ref>, %arg1: !fir.ref) { %c30 = arith.constant 30 : index - // expected-error@+1 {{'hlfir.concat' op operand #1 must be any character scalar type, but got '!fir.ref'}} + // expected-error@+1 {{'hlfir.concat' op operand #1 must be variadic of any character scalar type, but got '!fir.ref'}} %0 = hlfir.concat %arg0, %arg1 len %c30 : (!fir.ref>, !fir.ref, index) -> (!hlfir.expr>) return } diff --git a/mlir/include/mlir/IR/CommonTypeConstraints.td b/mlir/include/mlir/IR/CommonTypeConstraints.td index 59249349921a3..b0b5348baaad9 100644 --- a/mlir/include/mlir/IR/CommonTypeConstraints.td +++ b/mlir/include/mlir/IR/CommonTypeConstraints.td @@ -117,7 +117,8 @@ class DialectType : TypeConstraint : TypeConstraint { Type baseType = type; int minSize = 0; diff --git a/mlir/test/Dialect/Affine/invalid.mlir b/mlir/test/Dialect/Affine/invalid.mlir index 1bcb6fc4a365d..72864516b459a 100644 --- a/mlir/test/Dialect/Affine/invalid.mlir +++ b/mlir/test/Dialect/Affine/invalid.mlir @@ -5,7 +5,7 @@ func.func @affine_apply_operand_non_index(%arg0 : i32) { // Custom parser automatically assigns all arguments the `index` so we must // use the generic syntax here to exercise the verifier. - // expected-error@+1 {{op operand #0 must be index, but got 'i32'}} + // expected-error@+1 {{op operand #0 must be variadic of index, but got 'i32'}} %0 = "affine.apply"(%arg0) {map = affine_map<(d0) -> (d0)>} : (i32) -> (index) return } diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir index 6f119a140ba3c..2d0a68b8b6c94 100644 --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -306,7 +306,7 @@ func.func @call_non_llvm() { // ----- func.func @call_non_llvm_arg(%arg0 : tensor<*xi32>) { - // expected-error@+1 {{'llvm.call' op operand #0 must be LLVM dialect-compatible type}} + // expected-error@+1 {{'llvm.call' op operand #0 must be variadic of LLVM dialect-compatible type}} "llvm.call"(%arg0) : (tensor<*xi32>) -> () llvm.return } diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir index 09acce04cd6a1..56890df3f3ee5 100644 --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -345,7 +345,7 @@ func.func @illegal_fill_memref_with_tensor_return func.func @illegal_fill_tensor_with_memref_return (%arg0 : tensor, %arg1 : f32) -> memref { - // expected-error @+1 {{result #0 must be ranked tensor of any type values, but got 'memref'}} + // expected-error @+1 {{result #0 must be variadic of ranked tensor of any type values, but got 'memref'}} %0 = linalg.fill ins(%arg1 : f32) outs(%arg0 : tensor) -> memref return %0 : memref } diff --git a/mlir/test/IR/operand.mlir b/mlir/test/IR/operand.mlir index e44133f6baeef..507e37c775c0b 100644 --- a/mlir/test/IR/operand.mlir +++ b/mlir/test/IR/operand.mlir @@ -13,7 +13,7 @@ func.func @correct_variadic_operand(%arg0: tensor, %arg1: f32) { // ----- func.func @error_in_first_variadic_operand(%arg0: tensor, %arg1: f32) { - // expected-error @+1 {{operand #1 must be tensor of any type}} + // expected-error @+1 {{operand #1 must be variadic of tensor of any type}} "test.mixed_normal_variadic_operand"(%arg0, %arg1, %arg0, %arg0, %arg0) : (tensor, f32, tensor, tensor, tensor) -> () return } @@ -29,7 +29,7 @@ func.func @error_in_normal_operand(%arg0: tensor, %arg1: f32) { // ----- func.func @error_in_second_variadic_operand(%arg0: tensor, %arg1: f32) { - // expected-error @+1 {{operand #3 must be tensor of any type}} + // expected-error @+1 {{operand #3 must be variadic of tensor of any type}} "test.mixed_normal_variadic_operand"(%arg0, %arg0, %arg0, %arg1, %arg0) : (tensor, tensor, tensor, f32, tensor) -> () return } diff --git a/mlir/test/IR/result.mlir b/mlir/test/IR/result.mlir index e7d41a50a38f6..1e4eb3bede4c5 100644 --- a/mlir/test/IR/result.mlir +++ b/mlir/test/IR/result.mlir @@ -13,7 +13,7 @@ func.func @correct_variadic_result() -> tensor { // ----- func.func @error_in_first_variadic_result() -> tensor { - // expected-error @+1 {{result #1 must be tensor of any type}} + // expected-error @+1 {{result #1 must be variadic of tensor of any type}} %0:5 = "test.mixed_normal_variadic_result"() : () -> (tensor, f32, tensor, tensor, tensor) return %0#4 : tensor } @@ -29,7 +29,7 @@ func.func @error_in_normal_result() -> tensor { // ----- func.func @error_in_second_variadic_result() -> tensor { - // expected-error @+1 {{result #3 must be tensor of any type}} + // expected-error @+1 {{result #3 must be variadic of tensor of any type}} %0:5 = "test.mixed_normal_variadic_result"() : () -> (tensor, tensor, tensor, f32, tensor) return %0#4 : tensor } From fc5d815d547e534df8fdb997899e0cffc65b9e35 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 17 Oct 2023 13:52:08 -0700 Subject: [PATCH 386/720] [ELF] Merge demoteSymbols and isPreemptible computation. NFC Remove one iteration of symtab and slightly improve the performance. --- lld/ELF/Writer.cpp | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 5fc4412aa49f1..1b63a5c20c0bf 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -255,16 +255,19 @@ void elf::addReservedSymbols() { // DT_NEEDED. If that happens, replace ShardSymbol with Undefined to avoid // dangling references to an unneeded DSO. Use a weak binding to avoid // --no-allow-shlib-undefined diagnostics. Similarly, demote lazy symbols. -static void demoteSymbols() { +static void demoteSymbolsAndComputeIsPreemptible() { llvm::TimeTraceScope timeScope("Demote symbols"); for (Symbol *sym : symtab.getSymbols()) { auto *s = dyn_cast(sym); - if (!(s && !cast(s->file)->isNeeded) && !sym->isLazy()) - continue; - uint8_t binding = sym->isLazy() ? sym->binding : uint8_t(STB_WEAK); - Undefined(nullptr, sym->getName(), binding, sym->stOther, sym->type) - .overwrite(*sym); - sym->versionId = VER_NDX_GLOBAL; + if (sym->isLazy() || (s && !cast(s->file)->isNeeded)) { + uint8_t binding = sym->isLazy() ? sym->binding : uint8_t(STB_WEAK); + Undefined(nullptr, sym->getName(), binding, sym->stOther, sym->type) + .overwrite(*sym); + sym->versionId = VER_NDX_GLOBAL; + } + + if (config->hasDynSymTab) + sym->isPreemptible = computeIsPreemptible(*sym); } } @@ -1954,12 +1957,7 @@ template void Writer::finalizeSections() { } } - demoteSymbols(); - if (config->hasDynSymTab) { - parallelForEach(symtab.getSymbols(), [](Symbol *sym) { - sym->isPreemptible = computeIsPreemptible(*sym); - }); - } + demoteSymbolsAndComputeIsPreemptible(); // Change values of linker-script-defined symbols from placeholders (assigned // by declareSymbols) to actual definitions. From e90ec58b132a7244bdd8d45dd482fd78fe487f37 Mon Sep 17 00:00:00 2001 From: Vincent Lee Date: Tue, 17 Oct 2023 14:05:01 -0700 Subject: [PATCH 387/720] [CMake] Support per-target linker flags (#68393) `CMAKE_{C/CXX}_FLAGS` affects all targets in LLVM. This can be undesirable in situations, like the case of enabling thinLTO, where `-flto` is added to every source file. In reality, we only care about optimizing a select few of binaries, such as clang or lld, that dominate the compilation pipeline. Auxiliary binaries in a distribution and not on the critical path can be kept non-optimized. This PR adds support of per-target linker flags, which can solve the thinLTO problem by negating the effects of LTO via targeted linker flags on the targets. The example of negating thinLTO above can be done by doing the following: ``` set(LLVM_llvm-dwarfdump_LINKER_FLAGS "-Wl,--lto-O0" CACHE STRING "Custom linker flags to llvm-dwarfdump") set(LLVM_lldb_LINKER_FLAGS "-Wl,--lto-O0" CACHE STRING "Custom linker flags to lldb") ``` There's other applications where this could be used (e.g. avoid optimizing host tools for build speed improvement etc.). I've generalized this so that users can apply their desired flags to targets that are generated by `llvm_add_library` or `add_llvm_executable`. Internally, our toolchain builds were on average 1.4x faster when selectively choosing the binaries that we want optimized. --- llvm/cmake/modules/AddLLVM.cmake | 11 +++++++++++ llvm/docs/CMake.rst | 3 +++ 2 files changed, 14 insertions(+) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index 93011522e498e..72661594f643f 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -726,6 +726,8 @@ function(llvm_add_library name) endforeach() endif() + add_custom_linker_flags(${name}) + if(ARG_SHARED OR ARG_MODULE) llvm_externalize_debuginfo(${name}) llvm_codesign(${name} ENTITLEMENTS ${ARG_ENTITLEMENTS} BUNDLE_PATH ${ARG_BUNDLE_PATH}) @@ -1019,6 +1021,8 @@ macro(add_llvm_executable name) endforeach() endif( LLVM_COMMON_DEPENDS ) + add_custom_linker_flags(${name}) + if(NOT ARG_IGNORE_EXTERNALIZE_DEBUGINFO) llvm_externalize_debuginfo(${name}) endif() @@ -1524,6 +1528,13 @@ macro(add_llvm_tool_subdirectory name) add_llvm_external_project(${name}) endmacro(add_llvm_tool_subdirectory) +macro(add_custom_linker_flags name) + if (LLVM_${name}_LINKER_FLAGS) + message(STATUS "Applying ${LLVM_${name}_LINKER_FLAGS} to ${name}") + target_link_options(${name} PRIVATE ${LLVM_${name}_LINKER_FLAGS}) + endif() +endmacro() + function(get_project_name_from_src_var var output) string(REGEX MATCH "LLVM_EXTERNAL_(.*)_SOURCE_DIR" MACHED_TOOL "${var}") diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst index c70b6b8206c2f..c76cb8a789bee 100644 --- a/llvm/docs/CMake.rst +++ b/llvm/docs/CMake.rst @@ -428,6 +428,9 @@ enabled sub-projects. Nearly all of these variable names begin with $CMAKE_INSTALL_PREFIX/Toolchains containing an xctoolchain directory which can be used to override the default system tools. +**LLVM__LINKER_FLAGS**:STRING + Defines the set of linker flags that should be applied to a . + **LLVM_DEFAULT_TARGET_TRIPLE**:STRING LLVM target to use for code generation when no target is explicitly specified. It defaults to "host", meaning that it shall pick the architecture From ef0e0adccd94ffdb10546491ef2719669754d3c9 Mon Sep 17 00:00:00 2001 From: William Junda Huang Date: Tue, 17 Oct 2023 17:09:39 -0400 Subject: [PATCH 388/720] [llvm-profdata] Do not create numerical strings for MD5 function names read from a Sample Profile. (#66164) This is phase 2 of the MD5 refactoring on Sample Profile following https://reviews.llvm.org/D147740 In previous implementation, when a MD5 Sample Profile is read, the reader first converts the MD5 values to strings, and then create a StringRef as if the numerical strings are regular function names, and later on IPO transformation passes perform string comparison over these numerical strings for profile matching. This is inefficient since it causes many small heap allocations. In this patch I created a class `ProfileFuncRef` that is similar to `StringRef` but it can represent a hash value directly without any conversion, and it will be more efficient (I will attach some benchmark results later) when being used in associative containers. ProfileFuncRef guarantees the same function name in string form or in MD5 form has the same hash value, which also fix a few issue in IPO passes where function matching/lookup only check for function name string, while returns a no-match if the profile is MD5. When testing on an internal large profile (> 1 GB, with more than 10 million functions), the full profile load time is reduced from 28 sec to 25 sec in average, and reading function offset table from 0.78s to 0.7s --- llvm/include/llvm/ProfileData/FunctionId.h | 213 +++++++++++++ llvm/include/llvm/ProfileData/HashKeyMap.h | 129 ++++++++ llvm/include/llvm/ProfileData/SampleProf.h | 283 +++++++----------- .../llvm/ProfileData/SampleProfReader.h | 30 +- .../llvm/ProfileData/SampleProfWriter.h | 16 +- .../llvm/Transforms/IPO/ProfiledCallGraph.h | 33 +- .../Transforms/IPO/SampleContextTracker.h | 29 +- llvm/lib/ProfileData/SampleProf.cpp | 42 +-- llvm/lib/ProfileData/SampleProfReader.cpp | 98 +++--- llvm/lib/ProfileData/SampleProfWriter.cpp | 45 +-- llvm/lib/Target/X86/X86InsertPrefetch.cpp | 7 +- .../Transforms/IPO/SampleContextTracker.cpp | 67 ++--- llvm/lib/Transforms/IPO/SampleProfile.cpp | 149 +++++---- llvm/tools/llvm-profdata/llvm-profdata.cpp | 25 +- llvm/tools/llvm-profgen/CSPreInliner.cpp | 10 +- llvm/tools/llvm-profgen/CSPreInliner.h | 7 +- llvm/tools/llvm-profgen/CallContext.h | 2 +- llvm/tools/llvm-profgen/ProfileGenerator.cpp | 33 +- llvm/tools/llvm-profgen/ProfileGenerator.h | 2 +- llvm/tools/llvm-profgen/ProfiledBinary.cpp | 15 +- llvm/tools/llvm-profgen/ProfiledBinary.h | 22 +- llvm/unittests/ProfileData/SampleProfTest.cpp | 53 ++-- 22 files changed, 797 insertions(+), 513 deletions(-) create mode 100644 llvm/include/llvm/ProfileData/FunctionId.h create mode 100644 llvm/include/llvm/ProfileData/HashKeyMap.h diff --git a/llvm/include/llvm/ProfileData/FunctionId.h b/llvm/include/llvm/ProfileData/FunctionId.h new file mode 100644 index 0000000000000..0076cdc090459 --- /dev/null +++ b/llvm/include/llvm/ProfileData/FunctionId.h @@ -0,0 +1,213 @@ +//===--- FunctionId.h - Sample profile function object ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// Defines FunctionId class. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_FUNCTIONID_H +#define LLVM_PROFILEDATA_FUNCTIONID_H + +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace llvm { +namespace sampleprof { + +/// This class represents a function that is read from a sample profile. It +/// comes with two forms: a string or a hash code. The latter form is the 64-bit +/// MD5 of the function name for efficient storage supported by ExtBinary +/// profile format, and when reading the profile, this class can represent it +/// without converting it to a string first. +/// When representing a hash code, we utilize the LengthOrHashCode field to +/// store it, and Name is set to null. When representing a string, it is same as +/// StringRef. +class FunctionId { + + const char *Data = nullptr; + + // Use uint64_t instead of size_t so that it can also hold a MD5 value on + // 32-bit system. + uint64_t LengthOrHashCode = 0; + + /// Extension to memcmp to handle hash code representation. If both are hash + /// values, Lhs and Rhs are both null, function returns 0 (and needs an extra + /// comparison using getIntValue). If only one is hash code, it is considered + /// less than the StringRef one. Otherwise perform normal string comparison. + static int compareMemory(const char *Lhs, const char *Rhs, uint64_t Length) { + if (Lhs == Rhs) + return 0; + if (!Lhs) + return -1; + if (!Rhs) + return 1; + return ::memcmp(Lhs, Rhs, (size_t)Length); + } + +public: + FunctionId() = default; + + /// Constructor from a StringRef. + explicit FunctionId(StringRef Str) + : Data(Str.data()), LengthOrHashCode(Str.size()) { + } + + /// Constructor from a hash code. + explicit FunctionId(uint64_t HashCode) + : LengthOrHashCode(HashCode) { + assert(HashCode != 0); + } + + /// Check for equality. Similar to StringRef::equals, but will also cover for + /// the case where one or both are hash codes. Comparing their int values are + /// sufficient. A hash code FunctionId is considered not equal to a StringRef + /// FunctionId regardless of actual contents. + bool equals(const FunctionId &Other) const { + return LengthOrHashCode == Other.LengthOrHashCode && + compareMemory(Data, Other.Data, LengthOrHashCode) == 0; + } + + /// Total order comparison. If both FunctionId are StringRef, this is the same + /// as StringRef::compare. If one of them is StringRef, it is considered + /// greater than the hash code FunctionId. Otherwise this is the the same + /// as comparing their int values. + int compare(const FunctionId &Other) const { + auto Res = compareMemory( + Data, Other.Data, std::min(LengthOrHashCode, Other.LengthOrHashCode)); + if (Res != 0) + return Res; + if (LengthOrHashCode == Other.LengthOrHashCode) + return 0; + return LengthOrHashCode < Other.LengthOrHashCode ? -1 : 1; + } + + /// Convert to a string, usually for output purpose. Use caution on return + /// value's lifetime when converting to StringRef. + std::string str() const { + if (Data) + return std::string(Data, LengthOrHashCode); + if (LengthOrHashCode != 0) + return std::to_string(LengthOrHashCode); + return std::string(); + } + + /// Convert to StringRef. This is only allowed when it is known this object is + /// representing a StringRef, not a hash code. Calling this function on a hash + /// code is considered an error. + StringRef stringRef() const { + if (Data) + return StringRef(Data, LengthOrHashCode); + assert(LengthOrHashCode == 0 && + "Cannot convert MD5 FunctionId to StringRef"); + return StringRef(); + } + + friend raw_ostream &operator<<(raw_ostream &OS, const FunctionId &Obj); + + /// Get hash code of this object. Returns this object's hash code if it is + /// already representing one, otherwise returns the MD5 of its string content. + /// Note that it is not the same as std::hash because we want to keep the + /// consistency that the same sample profile function in string form or MD5 + /// form has the same hash code. + uint64_t getHashCode() const { + if (Data) + return MD5Hash(StringRef(Data, LengthOrHashCode)); + return LengthOrHashCode; + } + + bool empty() const { return LengthOrHashCode == 0; } + + /// Check if this object represents a StringRef, or a hash code. + bool isStringRef() const { return Data != nullptr; } +}; + +inline bool operator==(const FunctionId &LHS, const FunctionId &RHS) { + return LHS.equals(RHS); +} + +inline bool operator!=(const FunctionId &LHS, const FunctionId &RHS) { + return !LHS.equals(RHS); +} + +inline bool operator<(const FunctionId &LHS, const FunctionId &RHS) { + return LHS.compare(RHS) < 0; +} + +inline bool operator<=(const FunctionId &LHS, const FunctionId &RHS) { + return LHS.compare(RHS) <= 0; +} + +inline bool operator>(const FunctionId &LHS, const FunctionId &RHS) { + return LHS.compare(RHS) > 0; +} + +inline bool operator>=(const FunctionId &LHS, const FunctionId &RHS) { + return LHS.compare(RHS) >= 0; +} + +inline raw_ostream &operator<<(raw_ostream &OS, const FunctionId &Obj) { + if (Obj.Data) + return OS << StringRef(Obj.Data, Obj.LengthOrHashCode); + if (Obj.LengthOrHashCode != 0) + return OS << Obj.LengthOrHashCode; + return OS; +} + +inline uint64_t MD5Hash(const FunctionId &Obj) { + return Obj.getHashCode(); +} + +inline uint64_t hash_value(const FunctionId &Obj) { + return Obj.getHashCode(); +} + +} // end namespace sampleprof + +/// Template specialization for FunctionId so that it can be used in LLVM map +/// containers. +template <> struct DenseMapInfo { + + static inline sampleprof::FunctionId getEmptyKey() { + return sampleprof::FunctionId(~0ULL); + } + + static inline sampleprof::FunctionId getTombstoneKey() { + return sampleprof::FunctionId(~1ULL); + } + + static unsigned getHashValue(const sampleprof::FunctionId &Val) { + return Val.getHashCode(); + } + + static bool isEqual(const sampleprof::FunctionId &LHS, + const sampleprof::FunctionId &RHS) { + return LHS == RHS; + } +}; + +} // end namespace llvm + +namespace std { + +/// Template specialization for FunctionId so that it can be used in STL +/// containers. +template <> struct hash { + size_t operator()(const llvm::sampleprof::FunctionId &Val) const { + return Val.getHashCode(); + } +}; + +} // end namespace std + +#endif // LLVM_PROFILEDATA_FUNCTIONID_H diff --git a/llvm/include/llvm/ProfileData/HashKeyMap.h b/llvm/include/llvm/ProfileData/HashKeyMap.h new file mode 100644 index 0000000000000..b2f1bf222157b --- /dev/null +++ b/llvm/include/llvm/ProfileData/HashKeyMap.h @@ -0,0 +1,129 @@ +//===--- HashKeyMap.h - Wrapper for maps using hash value key ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// Defines HashKeyMap template. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_HASHKEYMAP_H +#define LLVM_PROFILEDATA_HASHKEYMAP_H + +#include "llvm/ADT/Hashing.h" +#include +#include + +namespace llvm { + +namespace sampleprof { + +/// This class is a wrapper to associative container MapT using +/// the hash value of the original key as the new key. This greatly improves the +/// performance of insert and query operations especially when hash values of +/// keys are available a priori, and reduces memory usage if KeyT has a large +/// size. +/// All keys with the same hash value are considered equivalent (i.e. hash +/// collision is silently ignored). Given such feature this class should only be +/// used where it does not affect compilation correctness, for example, when +/// loading a sample profile. The original key is not stored, so if the user +/// needs to preserve it, it should be stored in the mapped type. +/// Assuming the hashing algorithm is uniform, we use the formula +/// 1 - Permute(n, k) / n ^ k where n is the universe size and k is number of +/// elements chosen at random to calculate the probability of collision. With +/// 1,000,000 entries the probability is negligible: +/// 1 - (2^64)!/((2^64-1000000)!*(2^64)^1000000) ~= 3*10^-8. +/// Source: https://en.wikipedia.org/wiki/Birthday_problem +/// +/// \param MapT The underlying associative container type. +/// \param KeyT The original key type, which requires the implementation of +/// llvm::hash_value(KeyT). +/// \param ValueT The original mapped type, which has the same requirement as +/// the underlying container. +/// \param MapTArgs Additional template parameters passed to the underlying +/// container. +template