diff --git a/.github/workflows/libcxx-build-and-test.yaml b/.github/workflows/libcxx-build-and-test.yaml index 93e673ca513a4..5d4394435890a 100644 --- a/.github/workflows/libcxx-build-and-test.yaml +++ b/.github/workflows/libcxx-build-and-test.yaml @@ -255,11 +255,11 @@ jobs: - name: Install a current LLVM if: ${{ matrix.mingw != true }} run: | - choco install -y llvm --version=18.1.6 --allow-downgrade + choco install -y llvm --version=19.1.7 --allow-downgrade - name: Install llvm-mingw if: ${{ matrix.mingw == true }} run: | - curl -LO https://github.com/mstorsjo/llvm-mingw/releases/download/20240606/llvm-mingw-20240606-ucrt-x86_64.zip + curl -LO https://github.com/mstorsjo/llvm-mingw/releases/download/20250114/llvm-mingw-20250114-ucrt-x86_64.zip powershell Expand-Archive llvm-mingw*.zip -DestinationPath . del llvm-mingw*.zip mv llvm-mingw* c:\llvm-mingw diff --git a/bolt/include/bolt/Passes/ContinuityStats.h b/bolt/include/bolt/Passes/ContinuityStats.h deleted file mode 100644 index bd4d491ad4a55..0000000000000 --- a/bolt/include/bolt/Passes/ContinuityStats.h +++ /dev/null @@ -1,61 +0,0 @@ -//===- bolt/Passes/ContinuityStats.h ----------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass checks how well the BOLT input profile satisfies the following -// "CFG continuity" property of a perfect profile: -// -// Each positive-execution-count block in the function’s CFG -// should be *reachable* from a positive-execution-count function -// entry block through a positive-execution-count path. -// -// More specifically, for each of the hottest 1000 functions, the pass -// calculates the function’s fraction of basic block execution counts -// that is *unreachable*. It then reports the 95th percentile of the -// distribution of the 1000 unreachable fractions in a single BOLT-INFO line. -// The smaller the reported value is, the better the BOLT profile -// satisfies the CFG continuity property. - -// The default value of 1000 above can be changed via the hidden BOLT option -// `-num-functions-for-continuity-check=[N]`. -// If more detailed stats are needed, `-v=1` can be used: the hottest N -// functions will be grouped into 5 equally-sized buckets, from the hottest -// to the coldest; for each bucket, various summary statistics of the -// distribution of the unreachable fractions and the raw unreachable execution -// counts will be reported. -// -//===----------------------------------------------------------------------===// - -#ifndef BOLT_PASSES_CONTINUITYSTATS_H -#define BOLT_PASSES_CONTINUITYSTATS_H - -#include "bolt/Passes/BinaryPasses.h" -#include - -namespace llvm { - -class raw_ostream; - -namespace bolt { -class BinaryContext; - -/// Compute and report to the user the function CFG continuity quality -class PrintContinuityStats : public BinaryFunctionPass { -public: - explicit PrintContinuityStats(const cl::opt &PrintPass) - : BinaryFunctionPass(PrintPass) {} - - bool shouldOptimize(const BinaryFunction &BF) const override; - const char *getName() const override { return "continuity-stats"; } - bool shouldPrint(const BinaryFunction &) const override { return false; } - Error runOnFunctions(BinaryContext &BC) override; -}; - -} // namespace bolt -} // namespace llvm - -#endif // BOLT_PASSES_CONTINUITYSTATS_H diff --git a/bolt/include/bolt/Passes/ProfileQualityStats.h b/bolt/include/bolt/Passes/ProfileQualityStats.h new file mode 100644 index 0000000000000..86fc88cefc10e --- /dev/null +++ b/bolt/include/bolt/Passes/ProfileQualityStats.h @@ -0,0 +1,98 @@ +//===- bolt/Passes/ProfileQualityStats.h ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass checks the BOLT input profile quality. +// +// Check 1: how well the input profile satisfies the following +// "CFG continuity" property of a perfect profile: +// +// Each positive-execution-count block in the function’s CFG +// is *reachable* from a positive-execution-count function +// entry block through a positive-execution-count path. +// +// More specifically, for each of the hottest 1000 functions, the pass +// calculates the function’s fraction of basic block execution counts +// that is *unreachable*. It then reports the 95th percentile of the +// distribution of the 1000 unreachable fractions in a single BOLT-INFO line. +// The smaller the reported value is, the better the BOLT profile +// satisfies the CFG continuity property. +// +// Check 2: how well the input profile satisfies the "call graph flow +// conservation" property of a perfect profile: +// +// For each function that is not a program entry, the number of times the +// function is called is equal to the net CFG outflow of the +// function's entry block(s). +// +// More specifically, for each of the hottest 1000 functions, the pass obtains +// A = number of times the function is called, B = the function's entry blocks' +// inflow, C = the function's entry blocks' outflow, where B and C are computed +// using the function's weighted CFG. It then computes gap = 1 - MIN(A,C-B) / +// MAX(A, C-B). The pass reports the 95th percentile of the distribution of the +// 1000 gaps in a single BOLT-INFO line. The smaller the reported value is, the +// better the BOLT profile satisfies the call graph flow conservation property. +// +// Check 3: how well the input profile satisfies the "function CFG flow +// conservation property" of a perfect profile: +// +// A non-entry non-exit basic block's inflow is equal to its outflow. +// +// More specifically, for each of the hottest 1000 functions, the pass loops +// over its basic blocks that are non-entry and non-exit, and for each block +// obtains a block gap = 1 - MIN(block inflow, block outflow, block call count +// if any) / MAX(block inflow, block outflow, block call count if any). It then +// aggregates the block gaps into 2 values for the function: "weighted" is the +// weighted average of the block conservation gaps, where the weights depend on +// each block's execution count and instruction count; "worst" is the worst +// (biggest) block gap acorss all basic blocks in the function with an execution +// count of > 500. The pass then reports the 95th percentile of the weighted and +// worst values of the 1000 functions in a single BOLT-INFO line. The smaller +// the reported values are, the better the BOLT profile satisfies the function +// CFG flow conservation property. +// +// The default value of 1000 above can be changed via the hidden BOLT option +// `-top-functions-for-profile-quality-check=[N]`. +// The default reporting of the 95th percentile can be changed via the hidden +// BOLT option `-percentile-for-profile-quality-check=[M]`. +// +// If more detailed stats are needed, `-v=1` can be used: the hottest N +// functions will be grouped into 5 equally-sized buckets, from the hottest +// to the coldest; for each bucket, various summary statistics of the +// profile quality will be reported. +// +//===----------------------------------------------------------------------===// + +#ifndef BOLT_PASSES_PROFILEQUALITYSTATS_H +#define BOLT_PASSES_PROFILEQUALITYSTATS_H + +#include "bolt/Passes/BinaryPasses.h" +#include + +namespace llvm { + +class raw_ostream; + +namespace bolt { +class BinaryContext; + +/// Compute and report to the user the profile quality +class PrintProfileQualityStats : public BinaryFunctionPass { +public: + explicit PrintProfileQualityStats(const cl::opt &PrintPass) + : BinaryFunctionPass(PrintPass) {} + + bool shouldOptimize(const BinaryFunction &BF) const override; + const char *getName() const override { return "profile-quality-stats"; } + bool shouldPrint(const BinaryFunction &) const override { return false; } + Error runOnFunctions(BinaryContext &BC) override; +}; + +} // namespace bolt +} // namespace llvm + +#endif // BOLT_PASSES_PROFILEQUALITYSTATS_H diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h index 42094cb732107..fdd65bbd535f7 100644 --- a/bolt/include/bolt/Rewrite/RewriteInstance.h +++ b/bolt/include/bolt/Rewrite/RewriteInstance.h @@ -505,6 +505,9 @@ class RewriteInstance { /// Number of local symbols in newly written symbol table. uint64_t NumLocalSymbols{0}; + /// Flag indicating runtime library linking just started. + bool StartLinkingRuntimeLib{false}; + /// Information on special Procedure Linkage Table sections. There are /// multiple variants generated by different linkers. struct PLTSectionInfo { diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt index adc91658050a6..3864255a09ebe 100644 --- a/bolt/lib/Passes/CMakeLists.txt +++ b/bolt/lib/Passes/CMakeLists.txt @@ -27,7 +27,7 @@ add_llvm_library(LLVMBOLTPasses PatchEntries.cpp PettisAndHansen.cpp PLTCall.cpp - ContinuityStats.cpp + ProfileQualityStats.cpp RegAnalysis.cpp RegReAssign.cpp ReorderAlgorithm.cpp diff --git a/bolt/lib/Passes/ContinuityStats.cpp b/bolt/lib/Passes/ContinuityStats.cpp deleted file mode 100644 index b32365b59065d..0000000000000 --- a/bolt/lib/Passes/ContinuityStats.cpp +++ /dev/null @@ -1,250 +0,0 @@ -//===- bolt/Passes/ContinuityStats.cpp --------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the continuity stats calculation pass. -// -//===----------------------------------------------------------------------===// - -#include "bolt/Passes/ContinuityStats.h" -#include "bolt/Core/BinaryBasicBlock.h" -#include "bolt/Core/BinaryFunction.h" -#include "bolt/Utils/CommandLineOpts.h" -#include "llvm/Support/CommandLine.h" -#include -#include -#include - -#define DEBUG_TYPE "bolt-opts" - -using namespace llvm; -using namespace bolt; - -namespace opts { -extern cl::opt Verbosity; -cl::opt NumFunctionsForContinuityCheck( - "num-functions-for-continuity-check", - cl::desc("number of hottest functions to print aggregated " - "CFG discontinuity stats of."), - cl::init(1000), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); -} // namespace opts - -namespace { -using FunctionListType = std::vector; -using function_iterator = FunctionListType::iterator; - -template -void printDistribution(raw_ostream &OS, std::vector &values, - bool Fraction = false) { - if (values.empty()) - return; - // Sort values from largest to smallest and print the MAX, TOP 1%, 5%, 10%, - // 20%, 50%, 80%, MIN. If Fraction is true, then values are printed as - // fractions instead of integers. - std::sort(values.begin(), values.end()); - - auto printLine = [&](std::string Text, double Percent) { - int Rank = int(values.size() * (1.0 - Percent / 100)); - if (Percent == 0) - Rank = values.size() - 1; - if (Fraction) - OS << " " << Text << std::string(9 - Text.length(), ' ') << ": " - << format("%.2lf%%", values[Rank] * 100) << "\n"; - else - OS << " " << Text << std::string(9 - Text.length(), ' ') << ": " - << values[Rank] << "\n"; - }; - - printLine("MAX", 0); - const int percentages[] = {1, 5, 10, 20, 50, 80}; - for (size_t i = 0; i < sizeof(percentages) / sizeof(percentages[0]); ++i) { - printLine("TOP " + std::to_string(percentages[i]) + "%", percentages[i]); - } - printLine("MIN", 100); -} - -void printCFGContinuityStats(raw_ostream &OS, - iterator_range &Functions) { - // Given a perfect profile, every positive-execution-count BB should be - // connected to an entry of the function through a positive-execution-count - // directed path in the control flow graph. - std::vector NumUnreachables; - std::vector SumECUnreachables; - std::vector FractionECUnreachables; - - for (auto it = Functions.begin(); it != Functions.end(); ++it) { - const BinaryFunction *Function = *it; - if (Function->size() <= 1) - continue; - - // Compute the sum of all BB execution counts (ECs). - size_t NumPosECBBs = 0; - size_t SumAllBBEC = 0; - for (const BinaryBasicBlock &BB : *Function) { - const size_t BBEC = BB.getKnownExecutionCount(); - NumPosECBBs += BBEC > 0 ? 1 : 0; - SumAllBBEC += BBEC; - } - - // Perform BFS on subgraph of CFG induced by positive weight edges. - // Compute the number of BBs reachable from the entry(s) of the function and - // the sum of their execution counts (ECs). - std::unordered_map IndexToBB; - std::unordered_set Visited; - std::queue Queue; - for (const BinaryBasicBlock &BB : *Function) { - // Make sure BB.getIndex() is not already in IndexToBB. - assert(IndexToBB.find(BB.getIndex()) == IndexToBB.end()); - IndexToBB[BB.getIndex()] = &BB; - if (BB.isEntryPoint() && BB.getKnownExecutionCount() > 0) { - Queue.push(BB.getIndex()); - Visited.insert(BB.getIndex()); - } - } - while (!Queue.empty()) { - const unsigned BBIndex = Queue.front(); - const BinaryBasicBlock *BB = IndexToBB[BBIndex]; - Queue.pop(); - auto SuccBIIter = BB->branch_info_begin(); - for (const BinaryBasicBlock *Succ : BB->successors()) { - const uint64_t Count = SuccBIIter->Count; - if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0) { - ++SuccBIIter; - continue; - } - if (!Visited.insert(Succ->getIndex()).second) { - ++SuccBIIter; - continue; - } - Queue.push(Succ->getIndex()); - ++SuccBIIter; - } - } - - const size_t NumReachableBBs = Visited.size(); - - // Loop through Visited, and sum the corresponding BBs' execution counts - // (ECs). - size_t SumReachableBBEC = 0; - for (const unsigned BBIndex : Visited) { - const BinaryBasicBlock *BB = IndexToBB[BBIndex]; - SumReachableBBEC += BB->getKnownExecutionCount(); - } - - const size_t NumPosECBBsUnreachableFromEntry = - NumPosECBBs - NumReachableBBs; - const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC; - const double FractionECUnreachable = - (double)SumUnreachableBBEC / SumAllBBEC; - - if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) { - OS << "Non-trivial CFG discontinuity observed in function " - << Function->getPrintName() << "\n"; - LLVM_DEBUG(Function->dump()); - } - - NumUnreachables.push_back(NumPosECBBsUnreachableFromEntry); - SumECUnreachables.push_back(SumUnreachableBBEC); - FractionECUnreachables.push_back(FractionECUnreachable); - } - - if (FractionECUnreachables.empty()) - return; - - std::sort(FractionECUnreachables.begin(), FractionECUnreachables.end()); - const int Rank = int(FractionECUnreachables.size() * 0.95); - OS << format("top 5%% function CFG discontinuity is %.2lf%%\n", - FractionECUnreachables[Rank] * 100); - - if (opts::Verbosity >= 1) { - OS << "abbreviations: EC = execution count, POS BBs = positive EC BBs\n" - << "distribution of NUM(unreachable POS BBs) among all focal " - "functions\n"; - printDistribution(OS, NumUnreachables); - - OS << "distribution of SUM_EC(unreachable POS BBs) among all focal " - "functions\n"; - printDistribution(OS, SumECUnreachables); - - OS << "distribution of [(SUM_EC(unreachable POS BBs) / SUM_EC(all " - "POS BBs))] among all focal functions\n"; - printDistribution(OS, FractionECUnreachables, /*Fraction=*/true); - } -} - -void printAll(BinaryContext &BC, FunctionListType &ValidFunctions, - size_t NumTopFunctions) { - // Sort the list of functions by execution counts (reverse). - llvm::sort(ValidFunctions, - [&](const BinaryFunction *A, const BinaryFunction *B) { - return A->getKnownExecutionCount() > B->getKnownExecutionCount(); - }); - - const size_t RealNumTopFunctions = - std::min(NumTopFunctions, ValidFunctions.size()); - - iterator_range Functions( - ValidFunctions.begin(), ValidFunctions.begin() + RealNumTopFunctions); - - BC.outs() << format("BOLT-INFO: among the hottest %zu functions ", - RealNumTopFunctions); - printCFGContinuityStats(BC.outs(), Functions); - - // Print more detailed bucketed stats if requested. - if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) { - const size_t PerBucketSize = RealNumTopFunctions / 5; - BC.outs() << format( - "Detailed stats for 5 buckets, each with %zu functions:\n", - PerBucketSize); - - // For each bucket, print the CFG continuity stats of the functions in the - // bucket. - for (size_t BucketIndex = 0; BucketIndex < 5; ++BucketIndex) { - const size_t StartIndex = BucketIndex * PerBucketSize; - const size_t EndIndex = StartIndex + PerBucketSize; - iterator_range Functions( - ValidFunctions.begin() + StartIndex, - ValidFunctions.begin() + EndIndex); - const size_t MaxFunctionExecutionCount = - ValidFunctions[StartIndex]->getKnownExecutionCount(); - const size_t MinFunctionExecutionCount = - ValidFunctions[EndIndex - 1]->getKnownExecutionCount(); - BC.outs() << format("----------------\n| Bucket %zu: " - "|\n----------------\n", - BucketIndex + 1) - << format( - "execution counts of the %zu functions in the bucket: " - "%zu-%zu\n", - EndIndex - StartIndex, MinFunctionExecutionCount, - MaxFunctionExecutionCount); - printCFGContinuityStats(BC.outs(), Functions); - } - } -} -} // namespace - -bool PrintContinuityStats::shouldOptimize(const BinaryFunction &BF) const { - if (BF.empty() || !BF.hasValidProfile()) - return false; - - return BinaryFunctionPass::shouldOptimize(BF); -} - -Error PrintContinuityStats::runOnFunctions(BinaryContext &BC) { - // Create a list of functions with valid profiles. - FunctionListType ValidFunctions; - for (const auto &BFI : BC.getBinaryFunctions()) { - const BinaryFunction *Function = &BFI.second; - if (PrintContinuityStats::shouldOptimize(*Function)) - ValidFunctions.push_back(Function); - } - if (ValidFunctions.empty() || opts::NumFunctionsForContinuityCheck == 0) - return Error::success(); - - printAll(BC, ValidFunctions, opts::NumFunctionsForContinuityCheck); - return Error::success(); -} diff --git a/bolt/lib/Passes/Instrumentation.cpp b/bolt/lib/Passes/Instrumentation.cpp index 76766b05b9176..fbf889279f1c0 100644 --- a/bolt/lib/Passes/Instrumentation.cpp +++ b/bolt/lib/Passes/Instrumentation.cpp @@ -604,7 +604,7 @@ Error Instrumentation::runOnFunctions(BinaryContext &BC) { /*IsText=*/false, /*IsAllocatable=*/true); BC.registerOrUpdateSection(".bolt.instr.counters", ELF::SHT_PROGBITS, Flags, - nullptr, 0, 1); + nullptr, 0, BC.RegularPageSize); BC.registerOrUpdateNoteSection(".bolt.instr.tables", nullptr, 0, /*Alignment=*/1, diff --git a/bolt/lib/Passes/ProfileQualityStats.cpp b/bolt/lib/Passes/ProfileQualityStats.cpp new file mode 100644 index 0000000000000..78e6412f56ba1 --- /dev/null +++ b/bolt/lib/Passes/ProfileQualityStats.cpp @@ -0,0 +1,579 @@ +//===- bolt/Passes/ProfileQualityStats.cpp ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the profile quality stats calculation pass. +// +//===----------------------------------------------------------------------===// + +#include "bolt/Passes/ProfileQualityStats.h" +#include "bolt/Core/BinaryBasicBlock.h" +#include "bolt/Core/BinaryFunction.h" +#include "bolt/Utils/CommandLineOpts.h" +#include "llvm/Support/CommandLine.h" +#include +#include +#include + +using namespace llvm; +using namespace bolt; + +namespace opts { +extern cl::opt Verbosity; +cl::opt TopFunctionsForProfileQualityCheck( + "top-functions-for-profile-quality-check", + cl::desc("number of hottest functions to print aggregated " + "profile quality stats of."), + cl::init(1000), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); +cl::opt PercentileForProfileQualityCheck( + "percentile-for-profile-quality-check", + cl::desc("Percentile of profile quality distributions over hottest " + "functions to report."), + cl::init(95), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); +} // namespace opts + +namespace { +using FunctionListType = std::vector; +using function_iterator = FunctionListType::iterator; + +// Function number -> vector of flows for BBs in the function +using TotalFlowMapTy = std::unordered_map>; +// Function number -> flow count +using FunctionFlowMapTy = std::unordered_map; +struct FlowInfo { + TotalFlowMapTy TotalIncomingFlows; + TotalFlowMapTy TotalOutgoingFlows; + TotalFlowMapTy TotalMaxCountMaps; + TotalFlowMapTy TotalMinCountMaps; + FunctionFlowMapTy CallGraphIncomingFlows; +}; + +template +void printDistribution(raw_ostream &OS, std::vector &values, + bool Fraction = false) { + // Assume values are sorted. + if (values.empty()) + return; + + OS << " Length : " << values.size() << "\n"; + + auto printLine = [&](std::string Text, double Percent) { + int Rank = int(values.size() * (100 - Percent) / 100); + if (Percent == 0) + Rank = values.size() - 1; + if (Fraction) + OS << " " << Text << std::string(11 - Text.length(), ' ') << ": " + << formatv("{0:P}", values[Rank]) << "\n"; + else + OS << " " << Text << std::string(11 - Text.length(), ' ') << ": " + << values[Rank] << "\n"; + }; + + printLine("MAX", 0); + const int percentages[] = {1, 5, 10, 20, 50, 80}; + for (size_t i = 0; i < sizeof(percentages) / sizeof(percentages[0]); ++i) { + printLine("TOP " + std::to_string(percentages[i]) + "%", percentages[i]); + } + printLine("MIN", 100); +} + +void printCFGContinuityStats(raw_ostream &OS, + iterator_range &Functions) { + // Given a perfect profile, every positive-execution-count BB should be + // connected to an entry of the function through a positive-execution-count + // directed path in the control flow graph. + std::vector NumUnreachables; + std::vector SumECUnreachables; + std::vector FractionECUnreachables; + + for (const BinaryFunction *Function : Functions) { + if (Function->size() <= 1) + continue; + + // Compute the sum of all BB execution counts (ECs). + size_t NumPosECBBs = 0; + size_t SumAllBBEC = 0; + for (const BinaryBasicBlock &BB : *Function) { + const size_t BBEC = BB.getKnownExecutionCount(); + NumPosECBBs += !!BBEC; + SumAllBBEC += BBEC; + } + + // Perform BFS on subgraph of CFG induced by positive weight edges. + // Compute the number of BBs reachable from the entry(s) of the function and + // the sum of their execution counts (ECs). + std::unordered_set Visited; + std::queue Queue; + size_t SumReachableBBEC = 0; + + Function->forEachEntryPoint([&](uint64_t Offset, const MCSymbol *Label) { + const BinaryBasicBlock *EntryBB = Function->getBasicBlockAtOffset(Offset); + if (!EntryBB || EntryBB->getKnownExecutionCount() == 0) + return true; + Queue.push(EntryBB->getLayoutIndex()); + Visited.insert(EntryBB->getLayoutIndex()); + SumReachableBBEC += EntryBB->getKnownExecutionCount(); + return true; + }); + + const FunctionLayout &Layout = Function->getLayout(); + + while (!Queue.empty()) { + const unsigned BBIndex = Queue.front(); + const BinaryBasicBlock *BB = Layout.getBlock(BBIndex); + Queue.pop(); + for (const auto &[Succ, BI] : + llvm::zip(BB->successors(), BB->branch_info())) { + const uint64_t Count = BI.Count; + if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0 || + !Visited.insert(Succ->getLayoutIndex()).second) + continue; + SumReachableBBEC += Succ->getKnownExecutionCount(); + Queue.push(Succ->getLayoutIndex()); + } + } + + const size_t NumReachableBBs = Visited.size(); + + const size_t NumPosECBBsUnreachableFromEntry = + NumPosECBBs - NumReachableBBs; + const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC; + const double FractionECUnreachable = + (double)SumUnreachableBBEC / SumAllBBEC; + + if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) { + OS << "Non-trivial CFG discontinuity observed in function " + << Function->getPrintName() << "\n"; + if (opts::Verbosity >= 3) + Function->dump(); + } + + NumUnreachables.push_back(NumPosECBBsUnreachableFromEntry); + SumECUnreachables.push_back(SumUnreachableBBEC); + FractionECUnreachables.push_back(FractionECUnreachable); + } + + if (FractionECUnreachables.empty()) + return; + + llvm::sort(FractionECUnreachables); + const int Rank = int(FractionECUnreachables.size() * + opts::PercentileForProfileQualityCheck / 100); + OS << formatv("function CFG discontinuity {0:P}; ", + FractionECUnreachables[Rank]); + if (opts::Verbosity >= 1) { + OS << "\nabbreviations: EC = execution count, POS BBs = positive EC BBs\n" + << "distribution of NUM(unreachable POS BBs) per function\n"; + llvm::sort(NumUnreachables); + printDistribution(OS, NumUnreachables); + + OS << "distribution of SUM_EC(unreachable POS BBs) per function\n"; + llvm::sort(SumECUnreachables); + printDistribution(OS, SumECUnreachables); + + OS << "distribution of [(SUM_EC(unreachable POS BBs) / SUM_EC(all " + "POS BBs))] per function\n"; + printDistribution(OS, FractionECUnreachables, /*Fraction=*/true); + } +} + +void printCallGraphFlowConservationStats( + raw_ostream &OS, iterator_range &Functions, + FlowInfo &TotalFlowMap) { + std::vector CallGraphGaps; + + for (const BinaryFunction *Function : Functions) { + if (Function->size() <= 1 || !Function->isSimple()) + continue; + + const uint64_t FunctionNum = Function->getFunctionNumber(); + std::vector &IncomingFlows = + TotalFlowMap.TotalIncomingFlows[FunctionNum]; + std::vector &OutgoingFlows = + TotalFlowMap.TotalOutgoingFlows[FunctionNum]; + FunctionFlowMapTy &CallGraphIncomingFlows = + TotalFlowMap.CallGraphIncomingFlows; + + // Only consider functions that are not a program entry. + if (CallGraphIncomingFlows.find(FunctionNum) != + CallGraphIncomingFlows.end()) { + uint64_t EntryInflow = 0; + uint64_t EntryOutflow = 0; + uint32_t NumConsideredEntryBlocks = 0; + + Function->forEachEntryPoint([&](uint64_t Offset, const MCSymbol *Label) { + const BinaryBasicBlock *EntryBB = + Function->getBasicBlockAtOffset(Offset); + if (!EntryBB || EntryBB->succ_size() == 0) + return true; + NumConsideredEntryBlocks++; + EntryInflow += IncomingFlows[EntryBB->getLayoutIndex()]; + EntryOutflow += OutgoingFlows[EntryBB->getLayoutIndex()]; + return true; + }); + + uint64_t NetEntryOutflow = 0; + if (EntryOutflow < EntryInflow) { + if (opts::Verbosity >= 2) { + // We expect entry blocks' CFG outflow >= inflow, i.e., it has a + // non-negative net outflow. If this is not the case, then raise a + // warning if requested. + OS << "BOLT WARNING: unexpected entry block CFG outflow < inflow " + "in function " + << Function->getPrintName() << "\n"; + if (opts::Verbosity >= 3) + Function->dump(); + } + } else { + NetEntryOutflow = EntryOutflow - EntryInflow; + } + if (NumConsideredEntryBlocks > 0) { + const uint64_t CallGraphInflow = + TotalFlowMap.CallGraphIncomingFlows[Function->getFunctionNumber()]; + const uint64_t Min = std::min(NetEntryOutflow, CallGraphInflow); + const uint64_t Max = std::max(NetEntryOutflow, CallGraphInflow); + const double CallGraphGap = 1 - (double)Min / Max; + + if (opts::Verbosity >= 2 && CallGraphGap >= 0.5) { + OS << "Nontrivial call graph gap of size " + << formatv("{0:P}", CallGraphGap) << " observed in function " + << Function->getPrintName() << "\n"; + if (opts::Verbosity >= 3) + Function->dump(); + } + + CallGraphGaps.push_back(CallGraphGap); + } + } + } + + if (CallGraphGaps.empty()) + return; + + llvm::sort(CallGraphGaps); + const int Rank = + int(CallGraphGaps.size() * opts::PercentileForProfileQualityCheck / 100); + OS << formatv("call graph flow conservation gap {0:P}; ", + CallGraphGaps[Rank]); + if (opts::Verbosity >= 1) { + OS << "\ndistribution of function entry flow conservation gaps\n"; + printDistribution(OS, CallGraphGaps, /*Fraction=*/true); + } +} + +void printCFGFlowConservationStats(raw_ostream &OS, + iterator_range &Functions, + FlowInfo &TotalFlowMap) { + std::vector CFGGapsWeightedAvg; + std::vector CFGGapsWorst; + std::vector CFGGapsWorstAbs; + // We only consider blocks with execution counts > MinBlockCount when + // reporting the distribution of worst gaps. + const uint16_t MinBlockCount = 500; + for (const BinaryFunction *Function : Functions) { + if (Function->size() <= 1 || !Function->isSimple()) + continue; + + const uint64_t FunctionNum = Function->getFunctionNumber(); + std::vector &MaxCountMaps = + TotalFlowMap.TotalMaxCountMaps[FunctionNum]; + std::vector &MinCountMaps = + TotalFlowMap.TotalMinCountMaps[FunctionNum]; + double WeightedGapSum = 0.0; + double WeightSum = 0.0; + double WorstGap = 0.0; + uint64_t WorstGapAbs = 0; + BinaryBasicBlock *BBWorstGap = nullptr; + BinaryBasicBlock *BBWorstGapAbs = nullptr; + for (BinaryBasicBlock &BB : *Function) { + // We don't consider function entry or exit blocks for CFG flow + // conservation + if (BB.isEntryPoint() || BB.succ_size() == 0) + continue; + + const uint64_t Max = MaxCountMaps[BB.getLayoutIndex()]; + const uint64_t Min = MinCountMaps[BB.getLayoutIndex()]; + const double Gap = 1 - (double)Min / Max; + double Weight = BB.getKnownExecutionCount() * BB.getNumNonPseudos(); + if (Weight == 0) + continue; + // We use log to prevent the stats from being dominated by extremely hot + // blocks + Weight = log(Weight); + WeightedGapSum += Gap * Weight; + WeightSum += Weight; + if (BB.getKnownExecutionCount() > MinBlockCount && Gap > WorstGap) { + WorstGap = Gap; + BBWorstGap = &BB; + } + if (BB.getKnownExecutionCount() > MinBlockCount && + Max - Min > WorstGapAbs) { + WorstGapAbs = Max - Min; + BBWorstGapAbs = &BB; + } + } + if (WeightSum > 0) { + const double WeightedGap = WeightedGapSum / WeightSum; + if (opts::Verbosity >= 2 && (WeightedGap >= 0.1 || WorstGap >= 0.9)) { + OS << "Nontrivial CFG gap observed in function " + << Function->getPrintName() << "\n" + << "Weighted gap: " << formatv("{0:P}", WeightedGap) << "\n"; + if (BBWorstGap) + OS << "Worst gap: " << formatv("{0:P}", WorstGap) + << " at BB with input offset: 0x" + << Twine::utohexstr(BBWorstGap->getInputOffset()) << "\n"; + if (BBWorstGapAbs) + OS << "Worst gap (absolute value): " << WorstGapAbs << " at BB with " + << "input offset 0x" + << Twine::utohexstr(BBWorstGapAbs->getInputOffset()) << "\n"; + if (opts::Verbosity >= 3) + Function->dump(); + } + + CFGGapsWeightedAvg.push_back(WeightedGap); + CFGGapsWorst.push_back(WorstGap); + CFGGapsWorstAbs.push_back(WorstGapAbs); + } + } + + if (CFGGapsWeightedAvg.empty()) + return; + llvm::sort(CFGGapsWeightedAvg); + const int RankWA = int(CFGGapsWeightedAvg.size() * + opts::PercentileForProfileQualityCheck / 100); + llvm::sort(CFGGapsWorst); + const int RankW = + int(CFGGapsWorst.size() * opts::PercentileForProfileQualityCheck / 100); + OS << formatv("CFG flow conservation gap {0:P} (weighted) {1:P} (worst)\n", + CFGGapsWeightedAvg[RankWA], CFGGapsWorst[RankW]); + if (opts::Verbosity >= 1) { + OS << "distribution of weighted CFG flow conservation gaps\n"; + printDistribution(OS, CFGGapsWeightedAvg, /*Fraction=*/true); + OS << format("Consider only blocks with execution counts > %zu:\n", + MinBlockCount) + << "distribution of worst block flow conservation gap per " + "function \n"; + printDistribution(OS, CFGGapsWorst, /*Fraction=*/true); + OS << "distribution of worst block flow conservation gap (absolute " + "value) per function\n"; + llvm::sort(CFGGapsWorstAbs); + printDistribution(OS, CFGGapsWorstAbs, /*Fraction=*/false); + } +} + +void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) { + // Increment block inflow and outflow with CFG jump counts. + TotalFlowMapTy &TotalIncomingFlows = TotalFlowMap.TotalIncomingFlows; + TotalFlowMapTy &TotalOutgoingFlows = TotalFlowMap.TotalOutgoingFlows; + for (const auto &BFI : BC.getBinaryFunctions()) { + const BinaryFunction *Function = &BFI.second; + std::vector &IncomingFlows = + TotalIncomingFlows[Function->getFunctionNumber()]; + std::vector &OutgoingFlows = + TotalOutgoingFlows[Function->getFunctionNumber()]; + const uint64_t NumBlocks = Function->size(); + IncomingFlows.resize(NumBlocks, 0); + OutgoingFlows.resize(NumBlocks, 0); + if (Function->empty() || !Function->hasValidProfile()) + continue; + for (const BinaryBasicBlock &BB : *Function) { + uint64_t TotalOutgoing = 0ULL; + for (const auto &[Succ, BI] : + llvm::zip(BB.successors(), BB.branch_info())) { + const uint64_t Count = BI.Count; + if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0) + continue; + TotalOutgoing += Count; + IncomingFlows[Succ->getLayoutIndex()] += Count; + } + OutgoingFlows[BB.getLayoutIndex()] = TotalOutgoing; + } + } + // Initialize TotalMaxCountMaps and TotalMinCountMaps using + // TotalIncomingFlows and TotalOutgoingFlows + TotalFlowMapTy &TotalMaxCountMaps = TotalFlowMap.TotalMaxCountMaps; + TotalFlowMapTy &TotalMinCountMaps = TotalFlowMap.TotalMinCountMaps; + for (const auto &BFI : BC.getBinaryFunctions()) { + const BinaryFunction *Function = &BFI.second; + uint64_t FunctionNum = Function->getFunctionNumber(); + std::vector &IncomingFlows = TotalIncomingFlows[FunctionNum]; + std::vector &OutgoingFlows = TotalOutgoingFlows[FunctionNum]; + std::vector &MaxCountMap = TotalMaxCountMaps[FunctionNum]; + std::vector &MinCountMap = TotalMinCountMaps[FunctionNum]; + const uint64_t NumBlocks = Function->size(); + MaxCountMap.resize(NumBlocks, 0); + MinCountMap.resize(NumBlocks, 0); + if (Function->empty() || !Function->hasValidProfile()) + continue; + for (const BinaryBasicBlock &BB : *Function) { + uint64_t BBNum = BB.getLayoutIndex(); + MaxCountMap[BBNum] = std::max(IncomingFlows[BBNum], OutgoingFlows[BBNum]); + MinCountMap[BBNum] = std::min(IncomingFlows[BBNum], OutgoingFlows[BBNum]); + } + } + + // Modify TotalMaxCountMaps and TotalMinCountMaps using call counts and + // fill out CallGraphIncomingFlows + FunctionFlowMapTy &CallGraphIncomingFlows = + TotalFlowMap.CallGraphIncomingFlows; + for (const auto &BFI : BC.getBinaryFunctions()) { + const BinaryFunction *Function = &BFI.second; + uint64_t FunctionNum = Function->getFunctionNumber(); + std::vector &MaxCountMap = TotalMaxCountMaps[FunctionNum]; + std::vector &MinCountMap = TotalMinCountMaps[FunctionNum]; + + // Update MaxCountMap, MinCountMap, and CallGraphIncomingFlows + auto recordCall = [&](const BinaryBasicBlock *SourceBB, + const MCSymbol *DestSymbol, uint64_t Count, + uint64_t TotalCallCount) { + if (Count == BinaryBasicBlock::COUNT_NO_PROFILE) + Count = 0; + const BinaryFunction *DstFunc = + DestSymbol ? BC.getFunctionForSymbol(DestSymbol) : nullptr; + if (DstFunc) + CallGraphIncomingFlows[DstFunc->getFunctionNumber()] += Count; + if (SourceBB) { + unsigned BlockIndex = SourceBB->getLayoutIndex(); + MaxCountMap[BlockIndex] = + std::max(MaxCountMap[BlockIndex], TotalCallCount); + MinCountMap[BlockIndex] = + std::min(MinCountMap[BlockIndex], TotalCallCount); + } + }; + + // Get pairs of (symbol, count) for each target at this callsite. + // If the call is to an unknown function the symbol will be nullptr. + // If there is no profiling data the count will be COUNT_NO_PROFILE. + using TargetDesc = std::pair; + using CallInfoTy = std::vector; + auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) { + CallInfoTy Counts; + const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst); + + if (!DstSym && BC.MIB->hasAnnotation(Inst, "CallProfile")) { + for (const auto &CSI : BC.MIB->getAnnotationAs( + Inst, "CallProfile")) + if (CSI.Symbol) + Counts.emplace_back(CSI.Symbol, CSI.Count); + } else { + const uint64_t Count = BB->getExecutionCount(); + Counts.emplace_back(DstSym, Count); + } + + return Counts; + }; + + // If the function has an invalid profile, try to use the perf data + // directly. The call EC is only used to update CallGraphIncomingFlows. + if (!Function->hasValidProfile() && !Function->getAllCallSites().empty()) { + for (const IndirectCallProfile &CSI : Function->getAllCallSites()) + if (CSI.Symbol) + recordCall(nullptr, CSI.Symbol, CSI.Count, CSI.Count); + continue; + } else { + // If the function has a valid profile + for (const BinaryBasicBlock &BB : *Function) { + for (const MCInst &Inst : BB) { + if (!BC.MIB->isCall(Inst)) + continue; + // Find call instructions and extract target symbols from each + // one. + const CallInfoTy CallInfo = getCallInfo(&BB, Inst); + // We need the total call count to update MaxCountMap and + // MinCountMap in recordCall for indirect calls + uint64_t TotalCallCount = 0; + for (const TargetDesc &CI : CallInfo) + TotalCallCount += CI.second; + for (const TargetDesc &CI : CallInfo) + recordCall(&BB, CI.first, CI.second, TotalCallCount); + } + } + } + } +} + +void printAll(BinaryContext &BC, FunctionListType &ValidFunctions, + size_t NumTopFunctions) { + // Sort the list of functions by execution counts (reverse). + llvm::sort(ValidFunctions, + [&](const BinaryFunction *A, const BinaryFunction *B) { + return A->getKnownExecutionCount() > B->getKnownExecutionCount(); + }); + + const size_t RealNumTopFunctions = + std::min(NumTopFunctions, ValidFunctions.size()); + + iterator_range Functions( + ValidFunctions.begin(), ValidFunctions.begin() + RealNumTopFunctions); + + FlowInfo TotalFlowMap; + computeFlowMappings(BC, TotalFlowMap); + + BC.outs() << format("BOLT-INFO: profile quality metrics for the hottest %zu " + "functions (reporting top %zu%% values): ", + RealNumTopFunctions, + 100 - opts::PercentileForProfileQualityCheck); + printCFGContinuityStats(BC.outs(), Functions); + printCallGraphFlowConservationStats(BC.outs(), Functions, TotalFlowMap); + printCFGFlowConservationStats(BC.outs(), Functions, TotalFlowMap); + + // Print more detailed bucketed stats if requested. + if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) { + const size_t PerBucketSize = RealNumTopFunctions / 5; + BC.outs() << format( + "Detailed stats for 5 buckets, each with %zu functions:\n", + PerBucketSize); + + // For each bucket, print the CFG continuity stats of the functions in + // the bucket. + for (size_t BucketIndex = 0; BucketIndex < 5; ++BucketIndex) { + const size_t StartIndex = BucketIndex * PerBucketSize; + const size_t EndIndex = StartIndex + PerBucketSize; + iterator_range Functions( + ValidFunctions.begin() + StartIndex, + ValidFunctions.begin() + EndIndex); + const size_t MaxFunctionExecutionCount = + ValidFunctions[StartIndex]->getKnownExecutionCount(); + const size_t MinFunctionExecutionCount = + ValidFunctions[EndIndex - 1]->getKnownExecutionCount(); + BC.outs() << format("----------------\n| Bucket %zu: " + "|\n----------------\n", + BucketIndex + 1) + << format( + "execution counts of the %zu functions in the bucket: " + "%zu-%zu\n", + EndIndex - StartIndex, MinFunctionExecutionCount, + MaxFunctionExecutionCount); + printCFGContinuityStats(BC.outs(), Functions); + printCallGraphFlowConservationStats(BC.outs(), Functions, TotalFlowMap); + printCFGFlowConservationStats(BC.outs(), Functions, TotalFlowMap); + } + } +} +} // namespace + +bool PrintProfileQualityStats::shouldOptimize(const BinaryFunction &BF) const { + if (BF.empty() || !BF.hasValidProfile()) + return false; + + return BinaryFunctionPass::shouldOptimize(BF); +} + +Error PrintProfileQualityStats::runOnFunctions(BinaryContext &BC) { + // Create a list of functions with valid profiles. + FunctionListType ValidFunctions; + for (const auto &BFI : BC.getBinaryFunctions()) { + const BinaryFunction *Function = &BFI.second; + if (PrintProfileQualityStats::shouldOptimize(*Function)) + ValidFunctions.push_back(Function); + } + if (ValidFunctions.empty() || opts::TopFunctionsForProfileQualityCheck == 0) + return Error::success(); + + printAll(BC, ValidFunctions, opts::TopFunctionsForProfileQualityCheck); + return Error::success(); +} diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 2d851c751ae10..dd48653931eb9 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -12,7 +12,6 @@ #include "bolt/Passes/AllocCombiner.h" #include "bolt/Passes/AsmDump.h" #include "bolt/Passes/CMOVConversion.h" -#include "bolt/Passes/ContinuityStats.h" #include "bolt/Passes/FixRISCVCallsPass.h" #include "bolt/Passes/FixRelaxationPass.h" #include "bolt/Passes/FrameOptimizer.h" @@ -27,6 +26,7 @@ #include "bolt/Passes/MCF.h" #include "bolt/Passes/PLTCall.h" #include "bolt/Passes/PatchEntries.h" +#include "bolt/Passes/ProfileQualityStats.h" #include "bolt/Passes/RegReAssign.h" #include "bolt/Passes/ReorderData.h" #include "bolt/Passes/ReorderFunctions.h" @@ -379,7 +379,7 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { if (opts::PrintProfileStats) Manager.registerPass(std::make_unique(NeverPrint)); - Manager.registerPass(std::make_unique(NeverPrint)); + Manager.registerPass(std::make_unique(NeverPrint)); Manager.registerPass(std::make_unique(NeverPrint)); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 70a9f084f009b..a97762063eb1e 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -628,6 +628,11 @@ Error RewriteInstance::discoverStorage() { unsigned Phnum = Obj.getHeader().e_phnum; Phnum += 3; + // Reserve two more pheaders to avoid having writeable and executable + // segment in instrumented binary. + if (opts::Instrument) + Phnum += 2; + NextAvailableAddress += Phnum * sizeof(ELF64LEPhdrTy); NextAvailableOffset += Phnum * sizeof(ELF64LEPhdrTy); } @@ -2083,6 +2088,13 @@ void RewriteInstance::adjustCommandLineOptions() { opts::HotText = false; } + if (opts::Instrument && opts::UseGnuStack) { + BC->errs() << "BOLT-ERROR: cannot avoid having writeable and executable " + "segment in instrumented binary if program headers will be " + "updated in place\n"; + exit(1); + } + if (opts::HotText && opts::HotTextMoveSections.getNumOccurrences() == 0) { opts::HotTextMoveSections.addValue(".stub"); opts::HotTextMoveSections.addValue(".mover"); @@ -3612,11 +3624,13 @@ void RewriteInstance::emitAndLink() { static_cast(*Streamer).getAssembler()); } - if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) + if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) { + StartLinkingRuntimeLib = true; RtLibrary->link(*BC, ToolPath, *Linker, [this](auto MapSection) { // Map newly registered sections. this->mapAllocatableSections(MapSection); }); + } // Once the code is emitted, we can rename function sections to actual // output sections and de-register sections used for emission. @@ -4011,12 +4025,17 @@ void RewriteInstance::mapAllocatableSections( Section.setOutputFileOffset(Section.getInputFileOffset()); MapSection(Section, Section.getAddress()); } else { - NextAvailableAddress = - alignTo(NextAvailableAddress, Section.getAlignment()); + uint64_t Alignment = Section.getAlignment(); + if (opts::Instrument && StartLinkingRuntimeLib) { + Alignment = BC->RegularPageSize; + StartLinkingRuntimeLib = false; + } + NextAvailableAddress = alignTo(NextAvailableAddress, Alignment); + LLVM_DEBUG({ - dbgs() << "BOLT: mapping section " << Section.getName() << " (0x" - << Twine::utohexstr(Section.getAllocAddress()) << ") to 0x" - << Twine::utohexstr(NextAvailableAddress) << ":0x" + dbgs() << "BOLT-DEBUG: mapping section " << Section.getName() + << " (0x" << Twine::utohexstr(Section.getAllocAddress()) + << ") to 0x" << Twine::utohexstr(NextAvailableAddress) << ":0x" << Twine::utohexstr(NextAvailableAddress + Section.getOutputSize()) << '\n'; @@ -4079,6 +4098,9 @@ void RewriteInstance::patchELFPHDRTable() { } } + if (opts::Instrument) + Phnum += 2; + // NOTE Currently .eh_frame_hdr appends to the last segment, recalculate // last segments size based on the NextAvailableAddress variable. if (!NewWritableSegmentSize) { @@ -4093,7 +4115,8 @@ void RewriteInstance::patchELFPHDRTable() { const uint64_t SavedPos = OS.tell(); OS.seek(PHDRTableOffset); - auto createNewTextPhdr = [&]() { + auto createNewPhdrs = [&]() { + SmallVector NewPhdrs; ELF64LEPhdrTy NewPhdr; NewPhdr.p_type = ELF::PT_LOAD; if (PHDRTableAddress) { @@ -4108,20 +4131,67 @@ void RewriteInstance::patchELFPHDRTable() { NewPhdr.p_filesz = NewTextSegmentSize; NewPhdr.p_memsz = NewTextSegmentSize; NewPhdr.p_flags = ELF::PF_X | ELF::PF_R; - if (opts::Instrument) { - // FIXME: Currently instrumentation is experimental and the runtime data - // is emitted with code, thus everything needs to be writable. - NewPhdr.p_flags |= ELF::PF_W; - } NewPhdr.p_align = BC->PageAlign; - return NewPhdr; + if (!opts::Instrument) { + NewPhdrs.push_back(NewPhdr); + } else { + ErrorOr Sec = + BC->getUniqueSectionByName(".bolt.instr.counters"); + assert(Sec && "expected one and only one `.bolt.instr.counters` section"); + const uint64_t Addr = Sec->getOutputAddress(); + const uint64_t Offset = Sec->getOutputFileOffset(); + const uint64_t Size = Sec->getOutputSize(); + assert(Addr > NewPhdr.p_vaddr && + Addr + Size < NewPhdr.p_vaddr + NewPhdr.p_memsz && + "`.bolt.instr.counters` section is expected to be included in the " + "new text sgement"); + + // Set correct size for the previous header since we are breaking the + // new text segment into three segments. + uint64_t Delta = Addr - NewPhdr.p_vaddr; + NewPhdr.p_filesz = Delta; + NewPhdr.p_memsz = Delta; + NewPhdrs.push_back(NewPhdr); + + // Create a program header for a RW segment that includes the + // `.bolt.instr.counters` section only. + ELF64LEPhdrTy NewPhdrRWSegment; + NewPhdrRWSegment.p_type = ELF::PT_LOAD; + NewPhdrRWSegment.p_offset = Offset; + NewPhdrRWSegment.p_vaddr = Addr; + NewPhdrRWSegment.p_paddr = Addr; + NewPhdrRWSegment.p_filesz = Size; + NewPhdrRWSegment.p_memsz = Size; + NewPhdrRWSegment.p_flags = ELF::PF_R | ELF::PF_W; + NewPhdrRWSegment.p_align = BC->RegularPageSize; + NewPhdrs.push_back(NewPhdrRWSegment); + + // Create a program header for a RX segment that includes all the RX + // sections from runtime library. + ELF64LEPhdrTy NewPhdrRXSegment; + NewPhdrRXSegment.p_type = ELF::PT_LOAD; + const uint64_t AddrRX = alignTo(Addr + Size, BC->RegularPageSize); + const uint64_t OffsetRX = alignTo(Offset + Size, BC->RegularPageSize); + const uint64_t SizeRX = NewTextSegmentSize - (AddrRX - NewPhdr.p_paddr); + NewPhdrRXSegment.p_offset = OffsetRX; + NewPhdrRXSegment.p_vaddr = AddrRX; + NewPhdrRXSegment.p_paddr = AddrRX; + NewPhdrRXSegment.p_filesz = SizeRX; + NewPhdrRXSegment.p_memsz = SizeRX; + NewPhdrRXSegment.p_flags = ELF::PF_X | ELF::PF_R; + NewPhdrRXSegment.p_align = BC->RegularPageSize; + NewPhdrs.push_back(NewPhdrRXSegment); + } + + return NewPhdrs; }; auto writeNewSegmentPhdrs = [&]() { if (PHDRTableAddress || NewTextSegmentSize) { - ELF64LE::Phdr NewPhdr = createNewTextPhdr(); - OS.write(reinterpret_cast(&NewPhdr), sizeof(NewPhdr)); + SmallVector NewPhdrs = createNewPhdrs(); + OS.write(reinterpret_cast(NewPhdrs.data()), + sizeof(ELF64LE::Phdr) * NewPhdrs.size()); } if (NewWritableSegmentSize) { @@ -4169,8 +4239,12 @@ void RewriteInstance::patchELFPHDRTable() { } case ELF::PT_GNU_STACK: if (opts::UseGnuStack) { - // Overwrite the header with the new text segment header. - NewPhdr = createNewTextPhdr(); + // Overwrite the header with the new segment header. + assert(!opts::Instrument); + SmallVector NewPhdrs = createNewPhdrs(); + assert(NewPhdrs.size() == 1 && + "expect exactly one program header was created"); + NewPhdr = NewPhdrs[0]; ModdedGnuStack = true; } break; diff --git a/bolt/test/X86/cfg-discontinuity-reporting.test b/bolt/test/X86/cfg-discontinuity-reporting.test deleted file mode 100644 index 4d7d3305cdb75..0000000000000 --- a/bolt/test/X86/cfg-discontinuity-reporting.test +++ /dev/null @@ -1,4 +0,0 @@ -## Check profile discontinuity reporting -RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe -RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt | FileCheck %s -CHECK: among the hottest 5 functions top 5% function CFG discontinuity is 100.00% diff --git a/bolt/test/X86/profile-quality-reporting.test b/bolt/test/X86/profile-quality-reporting.test new file mode 100644 index 0000000000000..2e15a6b245afa --- /dev/null +++ b/bolt/test/X86/profile-quality-reporting.test @@ -0,0 +1,4 @@ +## Check profile quality stats reporting +RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe +RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt | FileCheck %s +CHECK: profile quality metrics for the hottest 5 functions (reporting top 5% values): function CFG discontinuity 100.00%; call graph flow conservation gap 60.00%; CFG flow conservation gap 45.53% (weighted) 96.87% (worst) diff --git a/bolt/test/avoid-wx-segment.c b/bolt/test/avoid-wx-segment.c new file mode 100644 index 0000000000000..fcc3eb6e4c640 --- /dev/null +++ b/bolt/test/avoid-wx-segment.c @@ -0,0 +1,15 @@ +// Test bolt instrumentation won't generate a binary with any segment that +// is writable and executable. Basically we want to put `.bolt.instr.counters` +// section into its own segment, separated from its surrounding RX sections. + +// REQUIRES: system-linux + +void foo() {} +void bar() { foo(); } + +// RUN: %clang %cflags -c %s -o %t.o +// RUN: ld.lld -q -o %t.so %t.o -shared --init=foo --fini=foo +// RUN: llvm-bolt --instrument %t.so -o %tt.so +// RUN: llvm-readelf -l %tt.so | FileCheck %s +// CHECK-NOT: RWE +// CHECK: {{[0-9]*}} .bolt.instr.counters {{$}} diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index a8d17d19fda1d..07a79d6bbe807 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -112,7 +112,8 @@ Changes in existing checks ` fixing false positives from smart pointer accessors repeated in checking ``has_value`` and accessing ``value``. The option `IgnoreSmartPointerDereference` should - no longer be needed and will be removed. + no longer be needed and will be removed. Also fixing false positive from + const reference accessors to objects containing optional member. - Improved :doc:`bugprone-unsafe-functions ` check to allow specifying diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index d157c07c9cef8..9ecac68ae72bf 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -2582,9 +2582,9 @@ the configuration (without a prefix: ``Auto``). .. _BracedInitializerIndentWidth: -**BracedInitializerIndentWidth** (``Unsigned``) :versionbadge:`clang-format 17` :ref:`¶ ` +**BracedInitializerIndentWidth** (``Integer``) :versionbadge:`clang-format 17` :ref:`¶ ` The number of columns to use to indent the contents of braced init lists. - If unset, ``ContinuationIndentWidth`` is used. + If unset or negative, ``ContinuationIndentWidth`` is used. .. code-block:: c++ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 2b72143482943..7873c2048e53c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -225,6 +225,8 @@ Bug Fixes in This Version - Clang now outputs correct values when #embed data contains bytes with negative signed char values (#GH102798). +- Fixed rejects-valid problem when #embed appears in std::initializer_list or + when it can affect template argument deduction (#GH122306). Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 0f98d237dcbcd..cfe49acf20b77 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -5189,6 +5189,16 @@ class InitListExpr : public Expr { unsigned getNumInits() const { return InitExprs.size(); } + /// getNumInits but if the list has an EmbedExpr inside includes full length + /// of embedded data. + unsigned getNumInitsWithEmbedExpanded() const { + unsigned Sum = InitExprs.size(); + for (auto *IE : InitExprs) + if (auto *EE = dyn_cast(IE)) + Sum += EE->getDataElementCount() - 1; + return Sum; + } + /// Retrieve the set of initializers. Expr **getInits() { return reinterpret_cast(InitExprs.data()); } diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 39e295aced96b..6d00862dde5ed 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -544,29 +544,29 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12, "V4fiiV4f", TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12, "V4fiiV4f", "nc", "gfx12-insts,wavefrontsize64") TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12, "V4iIbiIbiV4iIb", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32, "V8fV8hV16hV8fs", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32, "V8fV8sV16sV8fs", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32, "V8hV8hV16hV8hs", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32, "V8sV8sV16sV8ss", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32, "V8iIbV2iIbV4iV8isIb", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32, "V8iIbiIbV2iV8isIb", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32, "V8iIbV2iIbV4iV8isIb", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32, "V8fV2iV4iV8fs", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32, "V8fV2iV4iV8fs", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32, "V8fV2iV4iV8fs", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32, "V8fV2iV4iV8fs", "nc", "gfx12-insts,wavefrontsize32") - -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64, "V4fV4hV8hV4fs", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64, "V4fV4sV8sV4fs", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64, "V4hV4hV8hV4hs", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64, "V4sV4sV8sV4ss", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64, "V4iIbiIbV2iV4isIb", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64, "V4iIbiIbiV4isIb", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64, "V4iIbiIbV2iV4isIb", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32, "V8fV8hV16hV8fi", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32, "V8fV8sV16sV8fi", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32, "V8hV8hV16hV8hi", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32, "V8sV8sV16sV8si", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32, "V8iIbV2iIbV4iV8iiIb", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32, "V8iIbiIbV2iV8iiIb", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32, "V8iIbV2iIbV4iV8iiIb", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32, "V8fV2iV4iV8fi", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32, "V8fV2iV4iV8fi", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32, "V8fV2iV4iV8fi", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32, "V8fV2iV4iV8fi", "nc", "gfx12-insts,wavefrontsize32") + +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64, "V4fV4hV8hV4fi", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64, "V4fV4sV8sV4fi", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64, "V4hV4hV8hV4hi", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64, "V4sV4sV8sV4si", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64, "V4iIbiIbV2iV4iiIb", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64, "V4iIbiIbiV4iiIb", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64, "V4iIbiIbV2iV4iiIb", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64, "V4fiV2iV4fi", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64, "V4fiV2iV4fi", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64, "V4fiV2iV4fi", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64, "V4fiV2iV4fi", "nc", "gfx12-insts,wavefrontsize64") TARGET_BUILTIN(__builtin_amdgcn_prng_b32, "UiUi", "nc", "prng-inst") TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_fp6_f16, "V6UiV32hf", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts") diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 8f532a63f9e04..058fecd4e91ef 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -563,6 +563,11 @@ def err_test_module_file_extension_format : Error< def err_drv_module_output_with_multiple_arch : Error< "option '-fmodule-output' cannot be used with multiple arch options">; +def err_drv_reduced_module_output_overrided : Warning< + "the implicit output of reduced BMI may be overrided by the output file specified by '--precompile'. " + "please consider use '-fmodule-output=' to specify the output file for reduced BMI explicitly">, + InGroup>; + def warn_drv_delayed_template_parsing_after_cxx20 : Warning< "-fdelayed-template-parsing is deprecated after C++20">, InGroup>; diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 3afbba51bd138..b51106fa56759 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -19,7 +19,7 @@ include "arm_sve_sme_incl.td" // Loads // Load one vector (scalar base) -def SVLD1 : MInst<"svld1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; +def SVLD1 : MInst<"svld1[_{2}]", "dPc", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; def SVLD1SB : MInst<"svld1sb_{d}", "dPS", "silUsUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1UB : MInst<"svld1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1SH : MInst<"svld1sh_{d}", "dPT", "ilUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">; @@ -33,7 +33,7 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { } // Load one vector (scalar base, VL displacement) -def SVLD1_VNUM : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; +def SVLD1_VNUM : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; def SVLD1SB_VNUM : MInst<"svld1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1UB_VNUM : MInst<"svld1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1SH_VNUM : MInst<"svld1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">; @@ -247,10 +247,10 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = InvalidMode in { } // Load one vector, unextended load, non-temporal (scalar base) -def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; +def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; // Load one vector, unextended load, non-temporal (scalar base, VL displacement) -def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; +def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { def SVLDNT1_BF : MInst<"svldnt1[_{2}]", "dPc", "b", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; @@ -265,7 +265,7 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { } multiclass StructLoad { - def : SInst; + def : SInst; let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { def: SInst; } @@ -314,11 +314,11 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in { def SVLD1UDQ_VNUM : MInst<"svld1udq_vnum[_{d}]", "dPcl", "lUld", [IsLoad], MemEltTyInt64, "aarch64_sve_ld1udq">; // Load one vector (vector base + scalar offset) - def SVLD1Q_GATHER_U64BASE_OFFSET : MInst<"svld1q_gather[_{2}base]_offset_{d}", "dPgl", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; - def SVLD1Q_GATHER_U64BASE : MInst<"svld1q_gather[_{2}base]_{d}", "dPg", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; + def SVLD1Q_GATHER_U64BASE_OFFSET : MInst<"svld1q_gather[_{2}base]_offset_{d}", "dPgl", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; + def SVLD1Q_GATHER_U64BASE : MInst<"svld1q_gather[_{2}base]_{d}", "dPg", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; // Load one vector (scalar base + vector offset) - def SVLD1Q_GATHER_U64OFFSET : MInst<"svld1q_gather_[{3}]offset[_{d}]", "dPcg", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_vector_offset">; + def SVLD1Q_GATHER_U64OFFSET : MInst<"svld1q_gather_[{3}]offset[_{d}]", "dPcg", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_vector_offset">; // Load N-element structure into N vectors (scalar base) defm SVLD2Q : StructLoad<"svld2q[_{2}]", "2Pc", "aarch64_sve_ld2q_sret">; @@ -341,7 +341,7 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in { // Stores // Store one vector (scalar base) -def SVST1 : MInst<"svst1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; +def SVST1 : MInst<"svst1[_{d}]", "vPpd", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; def SVST1B_S : MInst<"svst1b[_{d}]", "vPAd", "sil", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1B_U : MInst<"svst1b[_{d}]", "vPEd", "UsUiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1H_S : MInst<"svst1h[_{d}]", "vPBd", "il", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">; @@ -350,7 +350,7 @@ def SVST1W_S : MInst<"svst1w[_{d}]", "vPCd", "l", [IsStore, Verify def SVST1W_U : MInst<"svst1w[_{d}]", "vPGd", "Ul", [IsStore, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_st1">; // Store one vector (scalar base, VL displacement) -def SVST1_VNUM : MInst<"svst1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; +def SVST1_VNUM : MInst<"svst1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; def SVST1B_VNUM_S : MInst<"svst1b_vnum[_{d}]", "vPAld", "sil", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1B_VNUM_U : MInst<"svst1b_vnum[_{d}]", "vPEld", "UsUiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1H_VNUM_S : MInst<"svst1h_vnum[_{d}]", "vPBld", "il", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">; @@ -435,7 +435,7 @@ def SVST1W_SCATTER_INDEX_S : MInst<"svst1w_scatter[_{2}base]_index[_{d}]", "v } // let SVETargetGuard = "sve" multiclass StructStore { - def : SInst; + def : SInst; let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { def: SInst; } @@ -451,10 +451,10 @@ defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3">; defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4">; // Store one vector, with no truncation, non-temporal (scalar base) -def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; +def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; // Store one vector, with no truncation, non-temporal (scalar base, VL displacement) -def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; +def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { def SVSTNT1_BF : MInst<"svstnt1[_{d}]", "vPpd", "b", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; @@ -470,12 +470,12 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in { def SVST1UDQ_VNUM : MInst<"svst1dq_vnum[_{d}]", "vPpld", "lUld", [IsStore], MemEltTyInt64, "aarch64_sve_st1dq">; // Store one vector (vector base + scalar offset) - def SVST1Q_SCATTER_U64BASE_OFFSET : MInst<"svst1q_scatter[_{2}base]_offset[_{d}]", "vPgld", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; - def SVST1Q_SCATTER_U64BASE : MInst<"svst1q_scatter[_{2}base][_{d}]", "vPgd", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; + def SVST1Q_SCATTER_U64BASE_OFFSET : MInst<"svst1q_scatter[_{2}base]_offset[_{d}]", "vPgld", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; + def SVST1Q_SCATTER_U64BASE : MInst<"svst1q_scatter[_{2}base][_{d}]", "vPgd", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; // Store one vector (scalar base + vector offset) - def SVST1Q_SCATTER_OFFSETS_U : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPpgd", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">; - def SVST1Q_SCATTER_OFFSETS_S : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPp#d", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">; + def SVST1Q_SCATTER_OFFSETS_U : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPpgd", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">; + def SVST1Q_SCATTER_OFFSETS_S : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPp#d", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">; // Store N vectors into N-element structure (scalar base) defm SVST2Q : StructStore<"svst2q[_{d}]", "vPc2", "aarch64_sve_st2q">; @@ -2042,20 +2042,20 @@ def SVWHILEHS_COUNT : SInst<"svwhilege_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNo } multiclass MultiVecLoad { - def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; @@ -2067,20 +2067,20 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in { } multiclass MultiVecStore { - def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index f03241a875845..14afdfc2758ea 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -9,6 +9,7 @@ #ifndef LLVM_CLANG_CIR_DIALECT_BUILDER_CIRBASEBUILDER_H #define LLVM_CLANG_CIR_DIALECT_BUILDER_CIRBASEBUILDER_H +#include "clang/AST/CharUnits.h" #include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" @@ -51,6 +52,47 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { return cir::ConstPtrAttr::get( getContext(), mlir::cast(type), valueAttr); } + + mlir::Value createAlloca(mlir::Location loc, cir::PointerType addrType, + mlir::Type type, llvm::StringRef name, + mlir::IntegerAttr alignment) { + return create(loc, addrType, type, name, alignment); + } + + cir::LoadOp createLoad(mlir::Location loc, mlir::Value ptr, + bool isVolatile = false, uint64_t alignment = 0) { + mlir::IntegerAttr intAttr; + if (alignment) + intAttr = mlir::IntegerAttr::get( + mlir::IntegerType::get(ptr.getContext(), 64), alignment); + + return create(loc, ptr); + } + + // + // Block handling helpers + // ---------------------- + // + static OpBuilder::InsertPoint getBestAllocaInsertPoint(mlir::Block *block) { + auto last = + std::find_if(block->rbegin(), block->rend(), [](mlir::Operation &op) { + // TODO: Add LabelOp missing feature here + return mlir::isa(&op); + }); + + if (last != block->rend()) + return OpBuilder::InsertPoint(block, ++mlir::Block::iterator(&*last)); + return OpBuilder::InsertPoint(block, block->begin()); + }; + + mlir::IntegerAttr getSizeFromCharUnits(mlir::MLIRContext *ctx, + clang::CharUnits size) { + // Note that mlir::IntegerType is used instead of cir::IntType here + // because we don't need sign information for this to be useful, so keep + // it simple. + return mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 64), + size.getQuantity()); + } }; } // namespace cir diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td index 097616ba06749..ece04c225e322 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td @@ -54,6 +54,21 @@ def CIR_BoolAttr : CIR_Attr<"Bool", "bool", [TypedAttrInterface]> { }]; } +//===----------------------------------------------------------------------===// +// UndefAttr +//===----------------------------------------------------------------------===// + +def UndefAttr : CIR_Attr<"Undef", "undef", [TypedAttrInterface]> { + let summary = "Represent an undef constant"; + let description = [{ + The UndefAttr represents an undef constant, corresponding to LLVM's notion + of undef. + }]; + + let parameters = (ins AttributeSelfTypeParameter<"">:$type); + let assemblyFormat = [{}]; +} + //===----------------------------------------------------------------------===// // IntegerAttr //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index f9ce38588e436..083cf46a93ae6 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -115,6 +115,119 @@ def ConstantOp : CIR_Op<"const", let hasFolder = 1; } +//===----------------------------------------------------------------------===// +// AllocaOp +//===----------------------------------------------------------------------===// + +class AllocaTypesMatchWith + : PredOpTrait> { + string lhs = lhsArg; + string rhs = rhsArg; + string transformer = transform; +} + +def AllocaOp : CIR_Op<"alloca", [ + AllocaTypesMatchWith<"'allocaType' matches pointee type of 'addr'", + "addr", "allocaType", + "cast($_self).getPointee()">, + DeclareOpInterfaceMethods]> { + let summary = "Defines a scope-local variable"; + let description = [{ + The `cir.alloca` operation defines a scope-local variable. + + The presence of the `const` attribute indicates that the local variable is + declared with C/C++ `const` keyword. + + The result type is a pointer to the input's type. + + Example: + + ```mlir + // int count; + %0 = cir.alloca i32, !cir.ptr, ["count"] {alignment = 4 : i64} + + // int *ptr; + %1 = cir.alloca !cir.ptr, !cir.ptr>, ["ptr"] {alignment = 8 : i64} + ... + ``` + }]; + + let arguments = (ins + TypeAttr:$allocaType, + StrAttr:$name, + UnitAttr:$init, + UnitAttr:$constant, + ConfinedAttr, [IntMinValue<0>]>:$alignment, + OptionalAttr:$annotations + ); + + let results = (outs Res]>:$addr); + + let skipDefaultBuilders = 1; + let builders = [ + OpBuilder<(ins "mlir::Type":$addr, + "mlir::Type":$allocaType, + "llvm::StringRef":$name, + "mlir::IntegerAttr":$alignment)> + ]; + + let extraClassDeclaration = [{ + // Whether the alloca input type is a pointer. + bool isPointerType() { return ::mlir::isa<::cir::PointerType>(getAllocaType()); } + }]; + + let assemblyFormat = [{ + $allocaType `,` qualified(type($addr)) `,` + `[` $name + (`,` `init` $init^)? + (`,` `const` $constant^)? + `]` + ($annotations^)? attr-dict + }]; + + let hasVerifier = 0; +} + +//===----------------------------------------------------------------------===// +// LoadOp +//===----------------------------------------------------------------------===// + +def LoadOp : CIR_Op<"load", [ + TypesMatchWith<"type of 'result' matches pointee type of 'addr'", + "addr", "result", + "cast($_self).getPointee()">, + DeclareOpInterfaceMethods]> { + + let summary = "Load value from memory adddress"; + let description = [{ + `cir.load` reads a value (lvalue to rvalue conversion) given an address + backed up by a `cir.ptr` type. + + Example: + + ```mlir + + // Read from local variable, address in %0. + %1 = cir.load %0 : !cir.ptr, i32 + ``` + }]; + + let arguments = (ins Arg:$addr); + let results = (outs CIR_AnyType:$result); + + let assemblyFormat = [{ + $addr `:` qualified(type($addr)) `,` type($result) attr-dict + }]; + + // FIXME: add verifier. +} + //===----------------------------------------------------------------------===// // ReturnOp //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index d4fcd52e7e6e3..5c7e10d018809 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -30,12 +30,35 @@ struct MissingFeatures { // This isn't needed until we add support for bools. static bool convertTypeForMemory() { return false; } + // CIRGenFunction implementation details + static bool cgfSymbolTable() { return false; } + // Unhandled global/linkage information. static bool opGlobalDSOLocal() { return false; } static bool opGlobalThreadLocal() { return false; } static bool opGlobalConstant() { return false; } static bool opGlobalAlignment() { return false; } static bool opGlobalLinkage() { return false; } + + // Load attributes + static bool opLoadThreadLocal() { return false; } + static bool opLoadEmitScalarRangeCheck() { return false; } + static bool opLoadBooleanRepresentation() { return false; } + + // AllocaOp handling + static bool opAllocaVarDeclContext() { return false; } + static bool opAllocaStaticLocal() { return false; } + static bool opAllocaNonGC() { return false; } + static bool opAllocaImpreciseLifetime() { return false; } + static bool opAllocaPreciseLifetime() { return false; } + static bool opAllocaTLS() { return false; } + static bool opAllocaOpenMPThreadPrivate() { return false; } + static bool opAllocaEscapeByReference() { return false; } + static bool opAllocaReference() { return false; } + + // Misc + static bool scalarConversionOpts() { return false; } + static bool tryEmitAsConstant() { return false; } }; } // namespace cir diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 883d6a969c258..75b1c51445942 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3176,7 +3176,7 @@ def modules_reduced_bmi : Flag<["-"], "fmodules-reduced-bmi">, HelpText<"Generate the reduced BMI">, MarshallingInfoFlag>; -def experimental_modules_reduced_bmi : Flag<["-"], "fexperimental-modules-reduced-bmi">, +def experimental_modules_reduced_bmi : Flag<["-"], "fexperimental-modules-reduced-bmi">, Group, Visibility<[ClangOption, CC1Option]>, Alias; def fmodules_embed_all_files : Joined<["-"], "fmodules-embed-all-files">, @@ -7431,7 +7431,7 @@ def fuse_register_sized_bitfield_access: Flag<["-"], "fuse-register-sized-bitfie def relaxed_aliasing : Flag<["-"], "relaxed-aliasing">, HelpText<"Turn off Type Based Alias Analysis">, MarshallingInfoFlag>; -defm pointer_tbaa: BoolOption<"", "pointer-tbaa", CodeGenOpts<"PointerTBAA">, +defm pointer_tbaa: BoolOption<"", "pointer-tbaa", CodeGenOpts<"PointerTBAA">, DefaultTrue, PosFlag, NegFlag, diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 46fb1d52701b3..fec47a248abb4 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -1289,7 +1289,7 @@ struct FormatStyle { BitFieldColonSpacingStyle BitFieldColonSpacing; /// The number of columns to use to indent the contents of braced init lists. - /// If unset, ``ContinuationIndentWidth`` is used. + /// If unset or negative, ``ContinuationIndentWidth`` is used. /// \code /// AlignAfterOpenBracket: AlwaysBreak /// BracedInitializerIndentWidth: 2 @@ -1319,7 +1319,7 @@ struct FormatStyle { /// } /// \endcode /// \version 17 - std::optional BracedInitializerIndentWidth; + int BracedInitializerIndentWidth; /// Different ways to wrap braces after control statements. enum BraceWrappingAfterControlStatementStyle : int8_t { diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index ebdbc69384efb..3b2be86a88e82 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -4392,11 +4392,11 @@ class Sema final : public SemaBase { // Whether the callee should be ignored in CUDA/HIP/OpenMP host/device check. bool shouldIgnoreInHostDeviceCheck(FunctionDecl *Callee); -private: /// Function or variable declarations to be checked for whether the deferred /// diagnostics should be emitted. llvm::SmallSetVector DeclsToCheckForDeferredDiags; +private: /// Map of current shadowing declarations to shadowed declarations. Warn if /// it looks like the user is trying to modify the shadowing declaration. llvm::DenseMap ShadowingDecls; diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp index e1394e28cd49a..9381c5c42e566 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -580,6 +580,22 @@ void handleConstMemberCall(const CallExpr *CE, return; } + // Cache if the const method returns a reference + if (RecordLoc != nullptr && CE->isGLValue()) { + const FunctionDecl *DirectCallee = CE->getDirectCallee(); + if (DirectCallee == nullptr) + return; + + StorageLocation &Loc = + State.Lattice.getOrCreateConstMethodReturnStorageLocation( + *RecordLoc, DirectCallee, State.Env, [&](StorageLocation &Loc) { + // no-op + }); + + State.Env.setStorageLocation(*CE, Loc); + return; + } + // Cache if the const method returns a boolean or pointer type. // We may decide to cache other return types in the future. if (RecordLoc != nullptr && diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index ff4f940a596e3..12e99143cb148 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -2364,12 +2364,13 @@ template static std::optional getEndCharLoc(const NodeTy *Node, const SourceManager &SM, const LangOptions &LangOpts) { - unsigned TkLen = Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts); - SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1); - - if (Loc.isValid()) - return Loc; + if (unsigned TkLen = + Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts)) { + SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1); + if (Loc.isValid()) + return Loc; + } return std::nullopt; } diff --git a/clang/lib/CIR/CodeGen/Address.h b/clang/lib/CIR/CodeGen/Address.h new file mode 100644 index 0000000000000..72e7e1dcf1560 --- /dev/null +++ b/clang/lib/CIR/CodeGen/Address.h @@ -0,0 +1,76 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class provides a simple wrapper for a pair of a pointer and an +// alignment. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_LIB_CIR_ADDRESS_H +#define CLANG_LIB_CIR_ADDRESS_H + +#include "mlir/IR/Value.h" +#include "clang/AST/CharUnits.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" +#include "llvm/ADT/PointerIntPair.h" + +namespace clang::CIRGen { + +class Address { + + // The boolean flag indicates whether the pointer is known to be non-null. + llvm::PointerIntPair pointerAndKnownNonNull; + + /// The expected CIR type of the pointer. Carrying accurate element type + /// information in Address makes it more convenient to work with Address + /// values and allows frontend assertions to catch simple mistakes. + mlir::Type elementType; + + clang::CharUnits alignment; + +protected: + Address(std::nullptr_t) : elementType(nullptr) {} + +public: + Address(mlir::Value pointer, mlir::Type elementType, + clang::CharUnits alignment) + : pointerAndKnownNonNull(pointer, false), elementType(elementType), + alignment(alignment) { + assert(mlir::isa(pointer.getType()) && + "Expected cir.ptr type"); + + assert(pointer && "Pointer cannot be null"); + assert(elementType && "Element type cannot be null"); + assert(!alignment.isZero() && "Alignment cannot be zero"); + + assert(mlir::cast(pointer.getType()).getPointee() == + elementType); + } + + static Address invalid() { return Address(nullptr); } + bool isValid() const { + return pointerAndKnownNonNull.getPointer() != nullptr; + } + + mlir::Value getPointer() const { + assert(isValid()); + return pointerAndKnownNonNull.getPointer(); + } + + mlir::Type getElementType() const { + assert(isValid()); + assert(mlir::cast( + pointerAndKnownNonNull.getPointer().getType()) + .getPointee() == elementType); + return elementType; + } +}; + +} // namespace clang::CIRGen + +#endif // CLANG_LIB_CIR_ADDRESS_H diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp new file mode 100644 index 0000000000000..e44cad559d509 --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp @@ -0,0 +1,113 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit Decl nodes as CIR code. +// +//===----------------------------------------------------------------------===// + +#include "CIRGenFunction.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/CIR/MissingFeatures.h" + +using namespace clang; +using namespace clang::CIRGen; + +void CIRGenFunction::emitAutoVarAlloca(const VarDecl &d) { + QualType ty = d.getType(); + if (ty.getAddressSpace() != LangAS::Default) + cgm.errorNYI(d.getSourceRange(), "emitAutoVarAlloca: address space"); + + auto loc = getLoc(d.getSourceRange()); + + if (d.isEscapingByref()) + cgm.errorNYI(d.getSourceRange(), + "emitAutoVarDecl: decl escaping by reference"); + + CharUnits alignment = getContext().getDeclAlign(&d); + + // If the type is variably-modified, emit all the VLA sizes for it. + if (ty->isVariablyModifiedType()) + cgm.errorNYI(d.getSourceRange(), "emitAutoVarDecl: variably modified type"); + + Address address = Address::invalid(); + if (!ty->isConstantSizeType()) + cgm.errorNYI(d.getSourceRange(), "emitAutoVarDecl: non-constant size type"); + + // A normal fixed sized variable becomes an alloca in the entry block, + mlir::Type allocaTy = convertTypeForMem(ty); + // Create the temp alloca and declare variable using it. + address = createTempAlloca(allocaTy, alignment, loc, d.getName()); + declare(address, &d, ty, getLoc(d.getSourceRange()), alignment); + + setAddrOfLocalVar(&d, address); +} + +void CIRGenFunction::emitAutoVarInit(const clang::VarDecl &d) { + QualType type = d.getType(); + + // If this local has an initializer, emit it now. + const Expr *init = d.getInit(); + + if (init || !type.isPODType(getContext())) { + cgm.errorNYI(d.getSourceRange(), "emitAutoVarInit"); + } +} + +void CIRGenFunction::emitAutoVarCleanups(const clang::VarDecl &d) { + // Check the type for a cleanup. + if (QualType::DestructionKind dtorKind = d.needsDestruction(getContext())) + cgm.errorNYI(d.getSourceRange(), "emitAutoVarCleanups: type cleanup"); + + assert(!cir::MissingFeatures::opAllocaPreciseLifetime()); + + // Handle the cleanup attribute. + if (d.hasAttr()) + cgm.errorNYI(d.getSourceRange(), "emitAutoVarCleanups: CleanupAttr"); +} + +/// Emit code and set up symbol table for a variable declaration with auto, +/// register, or no storage class specifier. These turn into simple stack +/// objects, globals depending on target. +void CIRGenFunction::emitAutoVarDecl(const VarDecl &d) { + emitAutoVarAlloca(d); + emitAutoVarInit(d); + emitAutoVarCleanups(d); +} + +void CIRGenFunction::emitVarDecl(const VarDecl &d) { + // If the declaration has external storage, don't emit it now, allow it to be + // emitted lazily on its first use. + if (d.hasExternalStorage()) + return; + + if (d.getStorageDuration() != SD_Automatic) + cgm.errorNYI(d.getSourceRange(), "emitVarDecl automatic storage duration"); + if (d.getType().getAddressSpace() == LangAS::opencl_local) + cgm.errorNYI(d.getSourceRange(), "emitVarDecl openCL address space"); + + assert(d.hasLocalStorage()); + + assert(!cir::MissingFeatures::opAllocaVarDeclContext()); + return emitAutoVarDecl(d); +} + +void CIRGenFunction::emitDecl(const Decl &d) { + switch (d.getKind()) { + case Decl::Var: { + const VarDecl &vd = cast(d); + assert(vd.isLocalVarDecl() && + "Should not see file-scope variables inside a function!"); + emitVarDecl(vd); + return; + } + default: + cgm.errorNYI(d.getSourceRange(), "emitDecl: unhandled decl type"); + } +} diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp new file mode 100644 index 0000000000000..ccc3e20875263 --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -0,0 +1,130 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit Expr nodes as CIR code. +// +//===----------------------------------------------------------------------===// + +#include "Address.h" +#include "CIRGenFunction.h" +#include "CIRGenValue.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "clang/AST/Attr.h" +#include "clang/AST/CharUnits.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" +#include "clang/CIR/MissingFeatures.h" + +using namespace clang; +using namespace clang::CIRGen; +using namespace cir; + +mlir::Value CIRGenFunction::emitLoadOfScalar(LValue lvalue, + SourceLocation loc) { + assert(!cir::MissingFeatures::opLoadThreadLocal()); + assert(!cir::MissingFeatures::opLoadEmitScalarRangeCheck()); + assert(!cir::MissingFeatures::opLoadBooleanRepresentation()); + + Address addr = lvalue.getAddress(); + mlir::Type eltTy = addr.getElementType(); + + mlir::Value ptr = addr.getPointer(); + if (mlir::isa(eltTy)) + cgm.errorNYI(loc, "emitLoadOfScalar: void type"); + + mlir::Value loadOp = builder.CIRBaseBuilderTy::createLoad( + getLoc(loc), ptr, false /*isVolatile*/); + + return loadOp; +} + +/// Given an expression that represents a value lvalue, this +/// method emits the address of the lvalue, then loads the result as an rvalue, +/// returning the rvalue. +RValue CIRGenFunction::emitLoadOfLValue(LValue lv, SourceLocation loc) { + assert(!lv.getType()->isFunctionType()); + assert(!(lv.getType()->isConstantMatrixType()) && "not implemented"); + + if (lv.isSimple()) + return RValue::get(emitLoadOfScalar(lv, loc)); + + cgm.errorNYI(loc, "emitLoadOfLValue"); + return RValue::get(nullptr); +} + +LValue CIRGenFunction::emitDeclRefLValue(const DeclRefExpr *e) { + const NamedDecl *nd = e->getDecl(); + QualType ty = e->getType(); + + assert(e->isNonOdrUse() != NOUR_Unevaluated && + "should not emit an unevaluated operand"); + + if (const auto *vd = dyn_cast(nd)) { + // Checks for omitted feature handling + assert(!cir::MissingFeatures::opAllocaStaticLocal()); + assert(!cir::MissingFeatures::opAllocaNonGC()); + assert(!cir::MissingFeatures::opAllocaImpreciseLifetime()); + assert(!cir::MissingFeatures::opAllocaTLS()); + assert(!cir::MissingFeatures::opAllocaOpenMPThreadPrivate()); + assert(!cir::MissingFeatures::opAllocaEscapeByReference()); + + // Check if this is a global variable + if (vd->hasLinkage() || vd->isStaticDataMember()) + cgm.errorNYI(vd->getSourceRange(), "emitDeclRefLValue: global variable"); + + Address addr = Address::invalid(); + + // The variable should generally be present in the local decl map. + auto iter = LocalDeclMap.find(vd); + if (iter != LocalDeclMap.end()) { + addr = iter->second; + } else { + // Otherwise, it might be static local we haven't emitted yet for some + // reason; most likely, because it's in an outer function. + cgm.errorNYI(vd->getSourceRange(), "emitDeclRefLValue: static local"); + } + + return LValue::makeAddr(addr, ty); + } + + cgm.errorNYI(e->getSourceRange(), "emitDeclRefLValue: unhandled decl type"); + return LValue(); +} + +mlir::Value CIRGenFunction::emitAlloca(StringRef name, mlir::Type ty, + mlir::Location loc, + CharUnits alignment) { + mlir::Block *entryBlock = getCurFunctionEntryBlock(); + + // CIR uses its own alloca address space rather than follow the target data + // layout like original CodeGen. The data layout awareness should be done in + // the lowering pass instead. + assert(!cir::MissingFeatures::addressSpace()); + cir::PointerType localVarPtrTy = builder.getPointerTo(ty); + mlir::IntegerAttr alignIntAttr = cgm.getSize(alignment); + + mlir::Value addr; + { + mlir::OpBuilder::InsertionGuard guard(builder); + builder.restoreInsertionPoint(builder.getBestAllocaInsertPoint(entryBlock)); + addr = builder.createAlloca(loc, /*addr type*/ localVarPtrTy, + /*var type*/ ty, name, alignIntAttr); + assert(!cir::MissingFeatures::opAllocaVarDeclContext()); + } + return addr; +} + +/// This creates an alloca and inserts it at the current insertion point of the +/// builder. +Address CIRGenFunction::createTempAlloca(mlir::Type ty, CharUnits align, + mlir::Location loc, + const Twine &name) { + mlir::Value alloca = emitAlloca(name.str(), ty, loc, align); + return Address(alloca, ty, align); +} diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index 24a959108f73b..90a2fd2a5d806 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -11,9 +11,11 @@ //===----------------------------------------------------------------------===// #include "CIRGenFunction.h" +#include "CIRGenValue.h" #include "clang/AST/Expr.h" #include "clang/AST/StmtVisitor.h" +#include "clang/CIR/MissingFeatures.h" #include "mlir/IR/Value.h" @@ -52,6 +54,19 @@ class ScalarExprEmitter : public StmtVisitor { return {}; } + /// Emits the address of the l-value, then loads and returns the result. + mlir::Value emitLoadOfLValue(const Expr *e) { + LValue lv = cgf.emitLValue(e); + // FIXME: add some akin to EmitLValueAlignmentAssumption(E, V); + return cgf.emitLoadOfLValue(lv, e->getExprLoc()).getScalarVal(); + } + + // l-values + mlir::Value VisitDeclRefExpr(DeclRefExpr *e) { + assert(!cir::MissingFeatures::tryEmitAsConstant()); + return emitLoadOfLValue(e); + } + mlir::Value VisitIntegerLiteral(const IntegerLiteral *e) { mlir::Type type = cgf.convertType(e->getType()); return builder.create( @@ -65,7 +80,27 @@ class ScalarExprEmitter : public StmtVisitor { cgf.getLoc(e->getExprLoc()), type, builder.getCIRBoolAttr(e->getValue())); } + + mlir::Value VisitCastExpr(CastExpr *E); + + /// Emit a conversion from the specified type to the specified destination + /// type, both of which are CIR scalar types. + /// TODO: do we need ScalarConversionOpts here? Should be done in another + /// pass. + mlir::Value emitScalarConversion(mlir::Value src, QualType srcType, + QualType dstType, SourceLocation loc) { + // No sort of type conversion is implemented yet, but the path for implicit + // paths goes through here even if the type isn't being changed. + srcType = srcType.getCanonicalType(); + dstType = dstType.getCanonicalType(); + if (srcType == dstType) + return src; + + cgf.getCIRGenModule().errorNYI(loc, + "emitScalarConversion for unequal types"); + } }; + } // namespace /// Emit the computation of the specified expression of scalar type. @@ -75,3 +110,31 @@ mlir::Value CIRGenFunction::emitScalarExpr(const Expr *e) { return ScalarExprEmitter(*this, builder).Visit(const_cast(e)); } + +// Emit code for an explicit or implicit cast. Implicit +// casts have to handle a more broad range of conversions than explicit +// casts, as they handle things like function to ptr-to-function decay +// etc. +mlir::Value ScalarExprEmitter::VisitCastExpr(CastExpr *ce) { + Expr *e = ce->getSubExpr(); + QualType destTy = ce->getType(); + CastKind kind = ce->getCastKind(); + + switch (kind) { + case CK_LValueToRValue: + assert(cgf.getContext().hasSameUnqualifiedType(e->getType(), destTy)); + assert(e->isGLValue() && "lvalue-to-rvalue applied to r-value!"); + return Visit(const_cast(e)); + + case CK_IntegralCast: { + assert(!cir::MissingFeatures::scalarConversionOpts()); + return emitScalarConversion(Visit(e), e->getType(), destTy, + ce->getExprLoc()); + } + + default: + cgf.getCIRGenModule().errorNYI(e->getSourceRange(), + "CastExpr: ", ce->getCastKindName()); + } + return {}; +} diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index bba2f71a87627..86986b5847e98 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -13,6 +13,7 @@ #include "CIRGenFunction.h" #include "clang/AST/GlobalDecl.h" +#include "clang/CIR/MissingFeatures.h" #include @@ -131,6 +132,21 @@ mlir::Location CIRGenFunction::getLoc(mlir::Location lhs, mlir::Location rhs) { return mlir::FusedLoc::get(locs, metadata, &getMLIRContext()); } +mlir::LogicalResult CIRGenFunction::declare(Address addr, const Decl *var, + QualType ty, mlir::Location loc, + CharUnits alignment) { + const auto *namedVar = dyn_cast_or_null(var); + assert(namedVar && "Needs a named decl"); + assert(!cir::MissingFeatures::cgfSymbolTable()); + + mlir::Value addrVal = addr.getPointer(); + auto allocaOp = cast(addrVal.getDefiningOp()); + if (ty->isReferenceType() || ty.isConstQualified()) + allocaOp.setConstantAttr(mlir::UnitAttr::get(&getMLIRContext())); + + return mlir::success(); +} + void CIRGenFunction::startFunction(GlobalDecl gd, QualType returnType, cir::FuncOp fn, cir::FuncType funcType, SourceLocation loc, @@ -153,6 +169,7 @@ mlir::LogicalResult CIRGenFunction::emitFunctionBody(const clang::Stmt *body) { emitCompoundStmtWithoutScope(*block); else result = emitStmt(body, /*useCurrentScope=*/true); + return result; } @@ -217,4 +234,20 @@ cir::FuncOp CIRGenFunction::generateCode(clang::GlobalDecl gd, cir::FuncOp fn, return fn; } +/// Emit code to compute a designator that specifies the location +/// of the expression. +/// FIXME: document this function better. +LValue CIRGenFunction::emitLValue(const Expr *e) { + // FIXME: ApplyDebugLocation DL(*this, e); + switch (e->getStmtClass()) { + default: + getCIRGenModule().errorNYI(e->getSourceRange(), + std::string("l-value not implemented for '") + + e->getStmtClassName() + "'"); + break; + case Expr::DeclRefExprClass: + return emitDeclRefLValue(cast(e)); + } +} + } // namespace clang::CIRGen diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 92fbea16d3aa1..e0888acdc3dce 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -16,8 +16,12 @@ #include "CIRGenBuilder.h" #include "CIRGenModule.h" #include "CIRGenTypeCache.h" +#include "CIRGenValue.h" + +#include "Address.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/CharUnits.h" #include "clang/AST/Decl.h" #include "clang/AST/Type.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" @@ -49,6 +53,11 @@ class CIRGenFunction : public CIRGenTypeCache { /// for. mlir::Operation *curFn = nullptr; + using DeclMapTy = llvm::DenseMap; + /// This keeps track of the CIR allocas or globals for local C + /// declarations. + DeclMapTy LocalDeclMap; + clang::ASTContext &getContext() const { return cgm.getASTContext(); } CIRGenBuilderTy &getBuilder() { return builder; } @@ -56,6 +65,12 @@ class CIRGenFunction : public CIRGenTypeCache { CIRGenModule &getCIRGenModule() { return cgm; } const CIRGenModule &getCIRGenModule() const { return cgm; } + mlir::Block *getCurFunctionEntryBlock() { + auto fn = mlir::dyn_cast(curFn); + assert(fn && "other callables NYI"); + return &fn.getRegion().front(); + } + mlir::Type convertTypeForMem(QualType T); mlir::Type convertType(clang::QualType T); @@ -78,6 +93,17 @@ class CIRGenFunction : public CIRGenTypeCache { mlir::MLIRContext &getMLIRContext() { return cgm.getMLIRContext(); } +private: + /// Declare a variable in the current scope, return success if the variable + /// wasn't declared yet. + mlir::LogicalResult declare(Address addr, const clang::Decl *var, + clang::QualType ty, mlir::Location loc, + clang::CharUnits alignment); + +public: + mlir::Value emitAlloca(llvm::StringRef name, mlir::Type ty, + mlir::Location loc, clang::CharUnits alignment); + /// Use to track source locations across nested visitor traversals. /// Always use a `SourceLocRAIIObject` to change currSrcLoc. std::optional currSrcLoc; @@ -121,8 +147,50 @@ class CIRGenFunction : public CIRGenTypeCache { void emitCompoundStmtWithoutScope(const clang::CompoundStmt &s); + mlir::LogicalResult emitDeclStmt(const clang::DeclStmt &s); + mlir::LogicalResult emitReturnStmt(const clang::ReturnStmt &s); + /// Given an expression that represents a value lvalue, this method emits + /// the address of the lvalue, then loads the result as an rvalue, + /// returning the rvalue. + RValue emitLoadOfLValue(LValue lv, SourceLocation loc); + + /// EmitLoadOfScalar - Load a scalar value from an address, taking + /// care to appropriately convert from the memory representation to + /// the LLVM value representation. The l-value must be a simple + /// l-value. + mlir::Value emitLoadOfScalar(LValue lvalue, SourceLocation loc); + + /// Emit code to compute a designator that specifies the location + /// of the expression. + /// FIXME: document this function better. + LValue emitLValue(const clang::Expr *e); + + void emitDecl(const clang::Decl &d); + + LValue emitDeclRefLValue(const clang::DeclRefExpr *e); + + /// Emit code and set up symbol table for a variable declaration with auto, + /// register, or no storage class specifier. These turn into simple stack + /// objects, globals depending on target. + void emitAutoVarDecl(const clang::VarDecl &d); + + void emitAutoVarAlloca(const clang::VarDecl &d); + void emitAutoVarInit(const clang::VarDecl &d); + void emitAutoVarCleanups(const clang::VarDecl &d); + + /// This method handles emission of any variable declaration + /// inside a function, including static vars etc. + void emitVarDecl(const clang::VarDecl &d); + + /// Set the address of a local variable. + void setAddrOfLocalVar(const clang::VarDecl *vd, Address addr) { + assert(!LocalDeclMap.count(vd) && "Decl already exists in LocalDeclMap!"); + LocalDeclMap.insert({vd, addr}); + // TODO: Add symbol table support + } + /// Emit the computation of the specified expression of scalar type. mlir::Value emitScalarExpr(const clang::Expr *e); cir::FuncOp generateCode(clang::GlobalDecl gd, cir::FuncOp fn, @@ -134,8 +202,10 @@ class CIRGenFunction : public CIRGenTypeCache { void startFunction(clang::GlobalDecl gd, clang::QualType retTy, cir::FuncOp fn, cir::FuncType funcType, clang::SourceLocation loc, clang::SourceLocation startLoc); -}; + Address createTempAlloca(mlir::Type ty, CharUnits align, mlir::Location loc, + const Twine &name = "tmp"); +}; } // namespace clang::CIRGen #endif diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index bf3a4d1130f15..71a37b8c9a2ea 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -17,6 +17,7 @@ #include "CIRGenTypeCache.h" #include "CIRGenTypes.h" +#include "clang/AST/CharUnits.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "mlir/IR/Builders.h" @@ -116,6 +117,10 @@ class CIRGenModule : public CIRGenTypeCache { cir::FuncType funcType, const clang::FunctionDecl *funcDecl); + mlir::IntegerAttr getSize(CharUnits size) { + return builder.getSizeFromCharUnits(&getMLIRContext(), size); + } + const llvm::Triple &getTriple() const { return target.getTriple(); } /// Helpers to emit "not yet implemented" error diagnostics diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index f42f30cc5a433..ed5d87a39704a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -68,6 +68,8 @@ mlir::LogicalResult CIRGenFunction::emitSimpleStmt(const Stmt *s, default: // Only compound and return statements are supported right now. return mlir::failure(); + case Stmt::DeclStmtClass: + return emitDeclStmt(cast(*s)); case Stmt::CompoundStmtClass: if (useCurrentScope) emitCompoundStmtWithoutScope(cast(*s)); @@ -81,6 +83,15 @@ mlir::LogicalResult CIRGenFunction::emitSimpleStmt(const Stmt *s, return mlir::success(); } +mlir::LogicalResult CIRGenFunction::emitDeclStmt(const DeclStmt &s) { + assert(builder.getInsertionBlock() && "expected valid insertion point"); + + for (const Decl *I : s.decls()) + emitDecl(*I); + + return mlir::success(); +} + mlir::LogicalResult CIRGenFunction::emitReturnStmt(const ReturnStmt &s) { mlir::Location loc = getLoc(s.getSourceRange()); const Expr *rv = s.getRetValue(); diff --git a/clang/lib/CIR/CodeGen/CIRGenValue.h b/clang/lib/CIR/CodeGen/CIRGenValue.h new file mode 100644 index 0000000000000..d29646983fd30 --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenValue.h @@ -0,0 +1,125 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// These classes implement wrappers around mlir::Value in order to fully +// represent the range of values for C L- and R- values. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_LIB_CIR_CIRGENVALUE_H +#define CLANG_LIB_CIR_CIRGENVALUE_H + +#include "Address.h" + +#include "clang/AST/CharUnits.h" +#include "clang/AST/Type.h" + +#include "llvm/ADT/PointerIntPair.h" + +#include "mlir/IR/Value.h" + +namespace clang::CIRGen { + +/// This trivial value class is used to represent the result of an +/// expression that is evaluated. It can be one of three things: either a +/// simple MLIR SSA value, a pair of SSA values for complex numbers, or the +/// address of an aggregate value in memory. +class RValue { + enum Flavor { Scalar, Complex, Aggregate }; + + // Stores first value and flavor. + llvm::PointerIntPair v1; + // Stores second value and volatility. + llvm::PointerIntPair, 1, bool> v2; + // Stores element type for aggregate values. + mlir::Type elementType; + +public: + bool isScalar() const { return v1.getInt() == Scalar; } + + /// Return the mlir::Value of this scalar value. + mlir::Value getScalarVal() const { + assert(isScalar() && "Not a scalar!"); + return v1.getPointer(); + } + + static RValue get(mlir::Value v) { + RValue er; + er.v1.setPointer(v); + er.v1.setInt(Scalar); + er.v2.setInt(false); + return er; + } +}; + +/// The source of the alignment of an l-value; an expression of +/// confidence in the alignment actually matching the estimate. +enum class AlignmentSource { + /// The l-value was an access to a declared entity or something + /// equivalently strong, like the address of an array allocated by a + /// language runtime. + Decl, + + /// The l-value was considered opaque, so the alignment was + /// determined from a type, but that type was an explicitly-aligned + /// typedef. + AttributedType, + + /// The l-value was considered opaque, so the alignment was + /// determined from a type. + Type +}; + +class LValue { + enum { + Simple, // This is a normal l-value, use getAddress(). + VectorElt, // This is a vector element l-value (V[i]), use getVector* + BitField, // This is a bitfield l-value, use getBitfield*. + ExtVectorElt, // This is an extended vector subset, use getExtVectorComp + GlobalReg, // This is a register l-value, use getGlobalReg() + MatrixElt // This is a matrix element, use getVector* + } lvType; + clang::QualType type; + + mlir::Value v; + mlir::Type elementType; + + void initialize(clang::QualType type) { this->type = type; } + +public: + bool isSimple() const { return lvType == Simple; } + + // TODO: Add support for volatile + bool isVolatile() const { return false; } + + clang::QualType getType() const { return type; } + + mlir::Value getPointer() const { return v; } + + clang::CharUnits getAlignment() const { + // TODO: Handle alignment + return clang::CharUnits::One(); + } + + Address getAddress() const { + return Address(getPointer(), elementType, getAlignment()); + } + + static LValue makeAddr(Address address, clang::QualType t) { + LValue r; + r.lvType = Simple; + r.v = address.getPointer(); + r.elementType = address.getElementType(); + r.initialize(t); + return r; + } +}; + +} // namespace clang::CIRGen + +#endif // CLANG_LIB_CIR_CIRGENVALUE_H diff --git a/clang/lib/CIR/CodeGen/CMakeLists.txt b/clang/lib/CIR/CodeGen/CMakeLists.txt index 5602efae1ba41..dbb6d9e7b3807 100644 --- a/clang/lib/CIR/CodeGen/CMakeLists.txt +++ b/clang/lib/CIR/CodeGen/CMakeLists.txt @@ -8,6 +8,8 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) add_clang_library(clangCIR CIRGenerator.cpp + CIRGenDecl.cpp + CIRGenExpr.cpp CIRGenExprScalar.cpp CIRGenFunction.cpp CIRGenModule.cpp diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 3f1be930d71e5..aa21edcb5e99d 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -117,6 +117,24 @@ static void printOmittedTerminatorRegion(mlir::OpAsmPrinter &printer, /*printBlockTerminators=*/!omitRegionTerm(region)); } +//===----------------------------------------------------------------------===// +// AllocaOp +//===----------------------------------------------------------------------===// + +void cir::AllocaOp::build(mlir::OpBuilder &odsBuilder, + mlir::OperationState &odsState, mlir::Type addr, + mlir::Type allocaType, llvm::StringRef name, + mlir::IntegerAttr alignment) { + odsState.addAttribute(getAllocaTypeAttrName(odsState.name), + mlir::TypeAttr::get(allocaType)); + odsState.addAttribute(getNameAttrName(odsState.name), + odsBuilder.getStringAttr(name)); + if (alignment) { + odsState.addAttribute(getAlignmentAttrName(odsState.name), alignment); + } + odsState.addTypes(addr); +} + //===----------------------------------------------------------------------===// // ConstantOp //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp new file mode 100644 index 0000000000000..af6b5e4fbd9f6 --- /dev/null +++ b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp @@ -0,0 +1,77 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements MemorySlot-related interfaces for CIR dialect +// operations. +// +//===----------------------------------------------------------------------===// + +#include "clang/CIR/Dialect/IR/CIRDialect.h" + +using namespace mlir; + +//===----------------------------------------------------------------------===// +// Interfaces for AllocaOp +//===----------------------------------------------------------------------===// + +llvm::SmallVector cir::AllocaOp::getPromotableSlots() { + return {MemorySlot{getResult(), getAllocaType()}}; +} + +Value cir::AllocaOp::getDefaultValue(const MemorySlot &slot, + OpBuilder &builder) { + return builder.create( + getLoc(), slot.elemType, builder.getAttr(slot.elemType)); +} + +void cir::AllocaOp::handleBlockArgument(const MemorySlot &slot, + BlockArgument argument, + OpBuilder &builder) {} + +std::optional +cir::AllocaOp::handlePromotionComplete(const MemorySlot &slot, + Value defaultValue, OpBuilder &builder) { + if (defaultValue && defaultValue.use_empty()) + defaultValue.getDefiningOp()->erase(); + this->erase(); + return std::nullopt; +} + +//===----------------------------------------------------------------------===// +// Interfaces for LoadOp +//===----------------------------------------------------------------------===// + +bool cir::LoadOp::loadsFrom(const MemorySlot &slot) { + return getAddr() == slot.ptr; +} + +bool cir::LoadOp::storesTo(const MemorySlot &slot) { return false; } + +Value cir::LoadOp::getStored(const MemorySlot &slot, OpBuilder &builder, + Value reachingDef, const DataLayout &dataLayout) { + llvm_unreachable("getStored should not be called on LoadOp"); +} + +bool cir::LoadOp::canUsesBeRemoved( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + SmallVectorImpl &newBlockingUses, + const DataLayout &dataLayout) { + if (blockingUses.size() != 1) + return false; + Value blockingUse = (*blockingUses.begin())->get(); + return blockingUse == slot.ptr && getAddr() == slot.ptr && + getResult().getType() == slot.elemType; +} + +DeletionKind cir::LoadOp::removeBlockingUses( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + OpBuilder &builder, Value reachingDefinition, + const DataLayout &dataLayout) { + getResult().replaceAllUsesWith(reachingDefinition); + return DeletionKind::Delete; +} diff --git a/clang/lib/CIR/Dialect/IR/CMakeLists.txt b/clang/lib/CIR/Dialect/IR/CMakeLists.txt index baf8bff185221..925af0d61c984 100644 --- a/clang/lib/CIR/Dialect/IR/CMakeLists.txt +++ b/clang/lib/CIR/Dialect/IR/CMakeLists.txt @@ -1,6 +1,7 @@ add_clang_library(MLIRCIR CIRAttrs.cpp CIRDialect.cpp + CIRMemorySlot.cpp CIRTypes.cpp DEPENDS diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 65fac01d58362..13bffd542e78e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10223,6 +10223,7 @@ llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) { default: llvm_unreachable("Invalid SVETypeFlag!"); + case SVETypeFlags::EltTyMFloat8: case SVETypeFlags::EltTyInt8: return Builder.getInt8Ty(); case SVETypeFlags::EltTyInt16: @@ -10651,7 +10652,7 @@ Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E, unsigned IntrinsicID, bool IsZExtReturn) { QualType LangPTy = E->getArg(1)->getType(); - llvm::Type *MemEltTy = CGM.getTypes().ConvertType( + llvm::Type *MemEltTy = CGM.getTypes().ConvertTypeForMem( LangPTy->castAs()->getPointeeType()); // The vector type that is returned may be different from the @@ -10698,7 +10699,7 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, SmallVectorImpl &Ops, unsigned IntrinsicID) { QualType LangPTy = E->getArg(1)->getType(); - llvm::Type *MemEltTy = CGM.getTypes().ConvertType( + llvm::Type *MemEltTy = CGM.getTypes().ConvertTypeForMem( LangPTy->castAs()->getPointeeType()); // The vector type that is stored may be different from the diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index 9abf2e8c9190d..a9795c2c0dc8f 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -855,6 +855,20 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { // Create parameter copies. We do it before creating a promise, since an // evolution of coroutine TS may allow promise constructor to observe // parameter copies. + for (const ParmVarDecl *Parm : FnArgs) { + // If the original param is in an alloca, exclude it from the coroutine + // frame. The parameter copy will be part of the frame, but the original + // parameter memory should remain on the stack. This is necessary to + // ensure that parameters destroyed in callees, as with `trivial_abi` or + // in the MSVC C++ ABI, are appropriately destroyed after setting up the + // coroutine. + Address ParmAddr = GetAddrOfLocalVar(Parm); + if (auto *ParmAlloca = + dyn_cast(ParmAddr.getBasePointer())) { + ParmAlloca->setMetadata(llvm::LLVMContext::MD_coro_outside_frame, + llvm::MDNode::get(CGM.getLLVMContext(), {})); + } + } for (auto *PM : S.getParamMoves()) { EmitStmt(PM); ParamReplacer.addCopy(cast(PM)); @@ -942,9 +956,16 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { if (Stmt *Ret = S.getReturnStmt()) { // Since we already emitted the return value above, so we shouldn't // emit it again here. - if (GroManager.DirectEmit) + Expr *PreviousRetValue = nullptr; + if (GroManager.DirectEmit) { + PreviousRetValue = cast(Ret)->getRetValue(); cast(Ret)->setRetValue(nullptr); + } EmitStmt(Ret); + // Set the return value back. The code generator, as the AST **Consumer**, + // shouldn't change the AST. + if (PreviousRetValue) + cast(Ret)->setRetValue(PreviousRetValue); } // LLVM require the frontend to mark the coroutine. diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 405242e97e75c..bd625052cb5ed 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -107,6 +107,9 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) { MT->getNumRows() * MT->getNumColumns()); } + if (T->isMFloat8Type()) + return llvm::Type::getInt8Ty(getLLVMContext()); + llvm::Type *R = ConvertType(T); // Check for the boolean vector case. diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index bcd171724c41d..a84412bd5c045 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -5150,9 +5150,14 @@ WebAssemblyCXXABI::emitTerminateForUnexpectedException(CodeGenFunction &CGF, // Itanium ABI calls __clang_call_terminate(), which __cxa_begin_catch() on // the violating exception to mark it handled, but it is currently hard to do // with wasm EH instruction structure with catch/catch_all, we just call - // std::terminate and ignore the violating exception as in CGCXXABI. + // std::terminate and ignore the violating exception as in CGCXXABI in Wasm EH + // and call __clang_call_terminate only in Emscripten EH. // TODO Consider code transformation that makes calling __clang_call_terminate - // possible. + // in Wasm EH possible. + if (Exn && !EHPersonality::get(CGF).isWasmPersonality()) { + assert(CGF.CGM.getLangOpts().CPlusPlus); + return CGF.EmitNounwindRuntimeCall(getClangCallTerminateFn(CGF.CGM), Exn); + } return CGCXXABI::emitTerminateForUnexpectedException(CGF, Exn); } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 86db3f7678436..4ebbd241d2f0b 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4250,10 +4250,18 @@ static bool RenderModulesOptions(Compilation &C, const Driver &D, if (Args.hasArg(options::OPT_fmodule_output_EQ)) Args.AddLastArg(CmdArgs, options::OPT_fmodule_output_EQ); - else + else { + if (Args.hasArg(options::OPT__precompile) && + (!Args.hasArg(options::OPT_o) || + Args.getLastArg(options::OPT_o)->getValue() == + getCXX20NamedModuleOutputPath(Args, Input.getBaseInput()))) { + D.Diag(diag::err_drv_reduced_module_output_overrided); + } + CmdArgs.push_back(Args.MakeArgString( "-fmodule-output=" + getCXX20NamedModuleOutputPath(Args, Input.getBaseInput()))); + } } // Noop if we see '-fmodules-reduced-bmi' with other translation diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index d49128c2b40f8..972dceb697a8b 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -1921,9 +1921,9 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, NewIndent = Style.IndentWidth + std::min(State.Column, CurrentState.NestedBlockIndent); } else if (Current.is(tok::l_brace)) { - NewIndent = - CurrentState.LastSpace + Style.BracedInitializerIndentWidth.value_or( - Style.ContinuationIndentWidth); + const auto Width = Style.BracedInitializerIndentWidth; + NewIndent = CurrentState.LastSpace + + (Width < 0 ? Style.ContinuationIndentWidth : Width); } else { NewIndent = CurrentState.LastSpace + Style.ContinuationIndentWidth; } diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 68ef119fb4d65..92678a031178a 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -1512,7 +1512,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.BinPackLongBracedList = true; LLVMStyle.BinPackParameters = FormatStyle::BPPS_BinPack; LLVMStyle.BitFieldColonSpacing = FormatStyle::BFCS_Both; - LLVMStyle.BracedInitializerIndentWidth = std::nullopt; + LLVMStyle.BracedInitializerIndentWidth = -1; LLVMStyle.BraceWrapping = {/*AfterCaseLabel=*/false, /*AfterClass=*/false, /*AfterControlStatement=*/FormatStyle::BWACS_Never, diff --git a/clang/lib/Headers/avx10_2convertintrin.h b/clang/lib/Headers/avx10_2convertintrin.h index 07722090c30ee..b425aa59251fa 100644 --- a/clang/lib/Headers/avx10_2convertintrin.h +++ b/clang/lib/Headers/avx10_2convertintrin.h @@ -24,24 +24,157 @@ __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ __min_vector_width__(256))) +// clang-format off + +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 128-bit vector +/// containing FP16 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF i < 4 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 4]) +/// FI +/// +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __A +/// A 128-bit vector of [4 x float]. +/// \param __B +/// A 128-bit vector of [4 x float]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Lower 4 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtx2ps_ph(__m128 __A, __m128 __B) { return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask( (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)(-1)); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 128-bit vector +/// containing FP16 elements. Merging mask \a __U is used to determine if given +/// element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// IF i < 4 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 4]) +/// FI +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __W +/// A 128-bit vector of [8 x fp16]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [4 x float]. +/// \param __B +/// A 128-bit vector of [4 x float]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtx2ps_ph(__m128h __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask( (__v4sf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 128-bit vector +/// containing FP16 elements. Zeroing mask \a __U is used to determine if given +/// element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// IF i < 4 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 4]) +/// FI +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [4 x float]. +/// \param __B +/// A 128-bit vector of [4 x float]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then zero is taken instead. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtx2ps_ph(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask( (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 256-bit vector +/// containing FP16 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF i < 8 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8]) +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __B +/// A 256-bit vector of [8 x float]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtx2ps_ph(__m256 __A, __m256 __B) { return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask( @@ -49,6 +182,44 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtx2ps_ph(__m256 __A, _MM_FROUND_CUR_DIRECTION); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 256-bit vector +/// containing FP16 elements. Merging mask \a __U is used to determine if given +/// element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8]) +/// FI +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __W +/// A 256-bit vector of [16 x fp16]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __B +/// A 256-bit vector of [8 x float]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtx2ps_ph(__m256h __W, __mmask16 __U, __m256 __A, __m256 __B) { return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask( @@ -56,6 +227,42 @@ _mm256_mask_cvtx2ps_ph(__m256h __W, __mmask16 __U, __m256 __A, __m256 __B) { _MM_FROUND_CUR_DIRECTION); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 256-bit vector +/// containing FP16 elements. Zeroing mask \a __U is used to determine if given +/// element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8]) +/// FI +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __B +/// A 256-bit vector of [8 x float]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then zero is taken instead. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtx2ps_ph(__mmask16 __U, __m256 __A, __m256 __B) { return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask( @@ -63,32 +270,240 @@ _mm256_maskz_cvtx2ps_ph(__mmask16 __U, __m256 __A, __m256 __B) { _MM_FROUND_CUR_DIRECTION); } -#define _mm256_cvtx_round2ps_ph(A, B, R) \ +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 256-bit vector +/// containing FP16 elements. Rounding mode \a __R needs to be provided. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF i < 8 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8]) +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __B +/// A 256-bit vector of [8 x float]. +/// \param __R +/// Rounding mode. Valid inputs are: _MM_FROUND_CUR_DIRECTION or +/// result of bitwise or of _MM_FROUND_NO_EXC with at most one of the following: +/// _MM_FROUND_TO_NEAREST_INT, _MM_FROUND_TO_NEG_INF, _MM_FROUND_TO_POS_INF, +/// _MM_FROUND_TO_ZERO. +/// \returns +/// A 256-bit vector of [16 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. +#define _mm256_cvtx_round2ps_ph(__A, __B, __R) \ ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \ - (__v8sf)(A), (__v8sf)(B), (__v16hf)_mm256_undefined_ph(), \ - (__mmask16)(-1), (const int)(R))) - -#define _mm256_mask_cvtx_round2ps_ph(W, U, A, B, R) \ + (__v8sf)(__A), (__v8sf)(__B), (__v16hf)_mm256_undefined_ph(), \ + (__mmask16)(-1), (const int)(__R))) + +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 256-bit vector +/// containing FP16 elements. Merging mask \a __U is used to determine if given +/// element should be taken from \a __W instead. Rounding mode \a __R needs to +/// be provided. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8]) +/// FI +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __W +/// A 256-bit vector of [16 x fp16]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __B +/// A 256-bit vector of [8 x float]. +/// \param __R +/// Rounding mode. Valid inputs are: _MM_FROUND_CUR_DIRECTION or +/// result of bitwise or of _MM_FROUND_NO_EXC with at most one of the following: +/// _MM_FROUND_TO_NEAREST_INT, _MM_FROUND_TO_NEG_INF, _MM_FROUND_TO_POS_INF, +/// _MM_FROUND_TO_ZERO. +/// \returns +/// A 256-bit vector of [16 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. +#define _mm256_mask_cvtx_round2ps_ph(__W, __U, __A, __B, __R) \ ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \ - (__v8sf)(A), (__v8sf)(B), (__v16hf)(W), (__mmask16)(U), (const int)(R))) - -#define _mm256_maskz_cvtx_round2ps_ph(U, A, B, R) \ + (__v8sf)(__A), (__v8sf)(__B), (__v16hf)(__W), (__mmask16)(__U), (const int)(__R))) + +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 256-bit vector +/// containing FP16 elements. Zeroing mask \a __U is used to determine if given +/// element should be zeroed instead. Rounding mode \a __R needs to be provided. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8]) +/// FI +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __B +/// A 256-bit vector of [8 x float]. +/// \param __R +/// Rounding mode. Valid inputs are: _MM_FROUND_CUR_DIRECTION or +/// result of bitwise or of _MM_FROUND_NO_EXC with at most one of the following: +/// _MM_FROUND_TO_NEAREST_INT, _MM_FROUND_TO_NEG_INF, _MM_FROUND_TO_POS_INF, +/// _MM_FROUND_TO_ZERO. +/// \returns +/// A 256-bit vector of [16 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then zero is taken instead. +#define _mm256_maskz_cvtx_round2ps_ph(__U, __A, __B, __R) \ ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \ - (__v8sf)(A), (__v8sf)(B), (__v16hf)(_mm256_setzero_ph()), \ - (__mmask16)(U), (const int)(R))) - -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiasph_bf8(__m128i __A, - __m128h __B) { + (__v8sf)(__A), (__v8sf)(__B), (__v16hf)(_mm256_setzero_ph()), \ + (__mmask16)(__U), (const int)(__R))) + +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// converted elements from \a __B using biases from \a __A; higher order +/// elements are zeroed. +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvtbiasph_bf8(__m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Merging mask \a __U is used to determine if +/// given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtbiasph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Zeroing mask \a __U is used to determine if +/// given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtbiasph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask( @@ -96,6 +511,29 @@ _mm_maskz_cvtbiasph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { (__mmask8)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Elements correspond to the +/// converted elements from \a __B using biases from \a __A. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtbiasph_bf8(__m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask( @@ -103,12 +541,76 @@ _mm256_cvtbiasph_bf8(__m256i __A, __m256h __B) { (__mmask16)-1); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Merging mask \a __U is used to determine if +/// given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_bf8( __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Zeroing mask \a __U is used to determine if +/// given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtbiasph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask( @@ -116,18 +618,107 @@ _mm256_maskz_cvtbiasph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { (__mmask16)__U); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// converted elements from \a __B using biases from \a __A; higher order +/// elements are zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiassph_bf8(__m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtbiassph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { - return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( - (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); -} - +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Merging mask \a __U +/// is used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// from \a __W is taken instead. +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtbiassph_bf8(__m128i + __W, __mmask8 __U, __m128i __A, __m128h __B) { return + (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( (__v16qi)__A, + (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); } + +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Zeroing mask \a __U +/// is used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8S instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtbiassph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( @@ -135,6 +726,30 @@ _mm_maskz_cvtbiassph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { (__mmask8)__U); } + +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8S instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Elements correspond to the +/// converted elements from \a __B using biases from \a __A. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtbiassph_bf8(__m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( @@ -142,12 +757,76 @@ _mm256_cvtbiassph_bf8(__m256i __A, __m256h __B) { (__mmask16)-1); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Merging mask \a __U +/// is used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_bf8( __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Zeroing mask \a __U +/// is used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8S instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtbiassph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( @@ -155,18 +834,108 @@ _mm256_maskz_cvtbiassph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { (__mmask16)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiasph_hf8(__m128i __A, - __m128h __B) { +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// converted elements from \a __B using biases from \a __A; higher order +/// elements are zeroed. +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvtbiasph_hf8(__m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Merging mask \a __U is used to determine if +/// given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtbiasph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Zeroing mask \a __U is used to determine if +/// given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8 instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtbiasph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask( @@ -174,6 +943,29 @@ _mm_maskz_cvtbiasph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { (__mmask8)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8 instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x half]. +/// \param __B +/// A 256-bit vector of [16 x i16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Elements correspond to the +/// converted elements from \a __B using biases from \a __A. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtbiasph_hf8(__m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask( @@ -181,12 +973,76 @@ _mm256_cvtbiasph_hf8(__m256i __A, __m256h __B) { (__mmask16)-1); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Merging mask \a __U is used to determine if +/// given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_hf8( __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Zeroing mask \a __U is used to determine if +/// given element should be taken zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8 instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x half]. +/// \param __B +/// A 256-bit vector of [16 x i16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtbiasph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask( @@ -194,18 +1050,108 @@ _mm256_maskz_cvtbiasph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { (__mmask16)__U); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8S`instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// converted elements from \a __B using biases from \a __A; higher order +/// elements are zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiassph_hf8(__m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Merging mask \a __U +/// is used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtbiassph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Zeroing mask \a __U +/// is used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8S instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtbiassph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( @@ -213,6 +1159,29 @@ _mm_maskz_cvtbiassph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { (__mmask8)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8S instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Elements correspond to the +/// converted elements from \a __B using biases from \a __A. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtbiassph_hf8(__m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( @@ -220,12 +1189,76 @@ _mm256_cvtbiassph_hf8(__m256i __A, __m256h __B) { (__mmask16)-1); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Merging mask \a __U +/// is used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_hf8( __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Zeroing mask \a __U +/// is used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8S instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtbiassph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( @@ -233,17 +1266,119 @@ _mm256_maskz_cvtbiassph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { (__mmask16)__U); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E5M2 FP8 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF i < 8 +/// dst.bf8[i] := convert_fp16_to_bf8(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i - 8]) +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvt2ph_bf8(__m128h __A, - __m128h __B) { - return (__m128i)__builtin_ia32_vcvt2ph2bf8_128((__v8hf)(__A), (__v8hf)(__B)); -} - + __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2bf8_128((__v8hf)(__A), + (__v8hf)(__B)); +} + +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E5M2 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.bf8[i] := convert_fp16_to_bf8(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvt2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( (__mmask16)__U, (__v16qi)_mm_cvt2ph_bf8(__A, __B), (__v16qi)__W); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E5M2 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.bf8[i] := convert_fp16_to_bf8(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8 instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvt2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( @@ -251,18 +1386,119 @@ _mm_maskz_cvt2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { (__v16qi)(__m128i)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvt2ph_bf8(__m256h __A, - __m256h __B) { - return (__m256i)__builtin_ia32_vcvt2ph2bf8_256((__v16hf)(__A), - (__v16hf)(__B)); -} - +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E5M2 FP8 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF i < 16 +/// dst.bf8[i] := convert_fp16_to_bf8(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i - 16]) +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8 instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x bf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvt2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { +_mm256_cvt2ph_bf8(__m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_vcvt2ph2bf8_256((__v16hf)(__A), + (__v16hf)(__B)); +} + +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E5M2 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.bf8[i] := convert_fp16_to_bf8(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8 instruction. +/// +/// \param __W +/// A 256-bit vector of [32 x bf8]. +/// \param __U +/// A 32-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x bf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvt2ph_bf8( + __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( (__mmask32)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B), (__v32qi)__W); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E5M2 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.bf8[i] := convert_fp16_to_bf8(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8 instruction. +/// +/// \param __U +/// A 32-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x bf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// zero is taken instead. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvt2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( @@ -270,17 +1506,120 @@ _mm256_maskz_cvt2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { (__v32qi)(__m256i)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts2ph_bf8(__m128h __A, - __m128h __B) { - return (__m128i)__builtin_ia32_vcvt2ph2bf8s_128((__v8hf)(__A), (__v8hf)(__B)); -} - +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E5M2 FP8 elements. +/// Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF i < 8 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i - 8]) +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8S instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvts2ph_bf8(__m128h __A, __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2bf8s_128((__v8hf)(__A), + (__v8hf)(__B)); +} + +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E5M2 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvts2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( (__mmask16)__U, (__v16qi)_mm_cvts2ph_bf8(__A, __B), (__v16qi)__W); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E5M2 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8S instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvts2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( @@ -288,18 +1627,120 @@ _mm_maskz_cvts2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { (__v16qi)(__m128i)_mm_setzero_si128()); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E5M2 FP8 elements. +/// Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF i < 16 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i - 16]) +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8S instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x bf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvts2ph_bf8(__m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_vcvt2ph2bf8s_256((__v16hf)(__A), - (__v16hf)(__B)); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvts2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { + (__v16hf)(__B)); +} + +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E5M2 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8S instruction. +/// +/// \param __W +/// A 256-bit vector of [32 x bf8]. +/// \param __U +/// A 32-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x bf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvts2ph_bf8( + __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( (__mmask32)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B), (__v32qi)__W); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E5M2 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8S instruction. +/// +/// \param __U +/// A 32-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x bf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// zero is taken instead. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvts2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( @@ -307,17 +1748,119 @@ _mm256_maskz_cvts2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { (__v32qi)(__m256i)_mm256_setzero_si256()); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E4M3 FP8 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF i < 8 +/// dst.hf8[i] := convert_fp16_to_hf8(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i - 8]) +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvt2ph_hf8(__m128h __A, - __m128h __B) { - return (__m128i)__builtin_ia32_vcvt2ph2hf8_128((__v8hf)(__A), (__v8hf)(__B)); -} - + __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2hf8_128((__v8hf)(__A), + (__v8hf)(__B)); +} + +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E4M3 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.hf8[i] := convert_fp16_to_hf8(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvt2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( (__mmask16)__U, (__v16qi)_mm_cvt2ph_hf8(__A, __B), (__v16qi)__W); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E4M3 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.hf8[i] := convert_fp16_to_hf8(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8 instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvt2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( @@ -325,18 +1868,119 @@ _mm_maskz_cvt2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { (__v16qi)(__m128i)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvt2ph_hf8(__m256h __A, - __m256h __B) { - return (__m256i)__builtin_ia32_vcvt2ph2hf8_256((__v16hf)(__A), - (__v16hf)(__B)); -} - +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E4M3 FP8 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF i < 16 +/// dst.hf8[i] := convert_fp16_to_hf8(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i - 16]) +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8 instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x hf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvt2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { +_mm256_cvt2ph_hf8(__m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_vcvt2ph2hf8_256((__v16hf)(__A), + (__v16hf)(__B)); +} + +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E4M3 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.hf8[i] := convert_fp16_to_hf8(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8 instruction. +/// +/// \param __W +/// A 256-bit vector of [32 x hf8]. +/// \param __U +/// A 32-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x hf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvt2ph_hf8( + __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( (__mmask32)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B), (__v32qi)__W); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E4M3 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.hf8[i] := convert_fp16_to_hf8(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8 instruction. +/// +/// \param __U +/// A 32-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x hf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// zero is taken instead. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvt2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( @@ -344,17 +1988,120 @@ _mm256_maskz_cvt2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { (__v32qi)(__m256i)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts2ph_hf8(__m128h __A, - __m128h __B) { - return (__m128i)__builtin_ia32_vcvt2ph2hf8s_128((__v8hf)(__A), (__v8hf)(__B)); -} - +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E4M3 FP8 elements. +/// Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF i < 8 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i - 8]) +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8S instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvts2ph_hf8(__m128h __A, __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2hf8s_128((__v8hf)(__A), + (__v8hf)(__B)); +} + +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E4M3 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvts2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( (__mmask16)__U, (__v16qi)_mm_cvts2ph_hf8(__A, __B), (__v16qi)__W); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E4M3 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8S instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvts2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( @@ -362,18 +2109,120 @@ _mm_maskz_cvts2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { (__v16qi)(__m128i)_mm_setzero_si128()); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E4M3 FP8 elements. +/// Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF i < 16 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i - 16]) +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8S instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x hf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvts2ph_hf8(__m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_vcvt2ph2hf8s_256((__v16hf)(__A), - (__v16hf)(__B)); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvts2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { + (__v16hf)(__B)); +} + +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E4M3 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8S instruction. +/// +/// \param __W +/// A 256-bit vector of [32 x hf8]. +/// \param __U +/// A 32-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x hf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvts2ph_hf8( + __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( (__mmask32)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B), (__v32qi)__W); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E4M3 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8S instruction. +/// +/// \param __U +/// A 32-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x hf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// zero is taken instead. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvts2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( @@ -381,207 +2230,1161 @@ _mm256_maskz_cvts2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { (__v32qi)(__m256i)_mm256_setzero_si256()); } +/// Convert 128-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 128-bit vector containing FP16 elements. The conversion is exact. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.fp16[i] := convert_hf8_to_fp16(__A.hf8[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTHF82PH instruction. +/// +/// \param __A +/// A 128-bit vector of [16 x hf8]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvthf8_ph(__m128i __A) { return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask( (__v16qi)__A, (__v8hf)(__m128h)_mm_undefined_ph(), (__mmask8)-1); } +/// Convert 128-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 128-bit vector containing FP16 elements. The conversion is +/// exact. Merging mask \a __U is used to determine if given element should be +/// taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.fp16[i] := convert_hf8_to_fp16(__A.hf8[i]) +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTHF82PH instruction. +/// +/// \param __W +/// A 128-bit vector of [8 x fp16]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [16 x hf8]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvthf8_ph(__m128h __W, __mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask( (__v16qi)__A, (__v8hf)(__m128h)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 128-bit vector containing FP16 elements. The conversion is +/// exact. Zeroing mask \a __U is used to determine if given element should be +/// zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.fp16[i] := convert_hf8_to_fp16(__A.hf8[i]) +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTHF82PH instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [16 x hf8]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvthf8_ph(__mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask( (__v16qi)__A, (__v8hf)(__m128h)_mm_setzero_ph(), (__mmask8)__U); } +/// Convert 256-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 256-bit vector containing FP16 elements. The conversion is exact. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.fp16[i] := convert_hf8_to_fp16(__A.hf8[i]) +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTHF82PH instruction. +/// +/// \param __A +/// A 256-bit vector of [32 x hf8]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvthf8_ph(__m128i __A) { return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask( (__v16qi)__A, (__v16hf)(__m256h)_mm256_undefined_ph(), (__mmask16)-1); } +/// Convert 256-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 256-bit vector containing FP16 elements. The conversion is +/// exact. Merging mask \a __U is used to determine if given element should be +/// taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.fp16[i] := convert_hf8_to_fp16(__A.hf8[i]) +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTHF82PH instruction. +/// +/// \param __W +/// A 256-bit vector of [16 x fp16]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [32 x hf8]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_cvthf8_ph(__m256h __W, __mmask16 __U, __m128i __A) { return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask( (__v16qi)__A, (__v16hf)(__m256h)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 256-bit vector containing FP16 elements. The conversion is +/// exact. Zeroing mask \a __U is used to determine if given element should be +/// zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.fp16[i] := convert_hf8_to_fp16(__A.hf8[i]) +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTHF82PH instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [32 x hf8]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvthf8_ph(__mmask16 __U, __m128i __A) { return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask( (__v16qi)__A, (__v16hf)(__m256h)_mm256_setzero_ph(), (__mmask16)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the (converted) +/// elements from \a __A; upper elements are zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_bf8(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. Merging mask \a __U is used to determine if +/// given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_bf8(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. Zeroing mask \a __U is used to determine if +/// given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8 instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_bf8(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtph_bf8(__m256h __A) { +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8 instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Resulting elements correspond to the (converted) +/// elements from \a __A. +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvtph_bf8(__m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Merging mask \a __U is +/// used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_bf8(__m128i __W, __mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Zeroing mask \a __U is +/// used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8 instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then element is zeroed instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_bf8(__mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8S instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the (converted) +/// elements from \a __A; upper elements are zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsph_bf8(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. Results are saturated. Merging mask \a __U is +/// used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsph_bf8(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. Results are saturated. Zeroing mask \a __U is +/// used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8S instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsph_bf8(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsph_bf8(__m256h __A) { +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8S instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Resulting elements correspond to the (converted) +/// elements from \a __A. +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvtsph_bf8(__m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Results are saturated. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsph_bf8(__m128i __W, __mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Results are saturated. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8S instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then element is zeroed instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsph_bf8(__mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the (converted) +/// elements from \a __A; upper elements are zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_hf8(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Upper elements of +/// resulting vector are zeroed. Merging mask \a __U is used to determine if +/// given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_hf8(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Upper elements of +/// resulting vector are zeroed. Zeroing mask \a __U is used to determine if +/// given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8 instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_hf8(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtph_hf8(__m256h __A) { +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8 instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Resulting elements correspond to the (converted) +/// elements from \a __A. +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvtph_hf8(__m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Merging mask \a __U is +/// used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_hf8(__m128i __W, __mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Zeroing mask \a __U is +/// used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8 instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then element is zeroed instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_hf8(__mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Upper elements of +/// resulting vector are zeroed. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8S instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the (converted) +/// elements from \a __A; upper elements are zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsph_hf8(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Upper elements of +/// resulting vector are zeroed. Results are saturated. Merging mask \a __U is +/// used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsph_hf8(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Upper elements of +/// resulting vector are zeroed. Results are saturated. Zeroing mask \a __U is +/// used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8S instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsph_hf8(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsph_hf8(__m256h __A) { +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8S instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Resulting elements correspond to the (converted) +/// elements from \a __A. +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvtsph_hf8(__m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Results are saturated. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsph_hf8(__m128i __W, __mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Results are saturated. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8S instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then element is zeroed instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsph_hf8(__mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } +/// Convert 128-bit vector \a __A, containing packed FP8 E5M2 floating-point +/// elements to a 128-bit vector containing FP16 elements. The conversion is exact. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.fp16[i] := convert_bf8_to_fp16(__A.bf8[i]) +/// ENDFOR +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a single instruction. +/// +/// \param __A +/// A 128-bit vector of [16 x bf8]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtbf8_ph(__m128i __A) { return _mm_castsi128_ph(_mm_slli_epi16(_mm_cvtepi8_epi16(__A), 8)); } +/// Convert 128-bit vector \a __A, containing packed FP8 E5M2 floating-point +/// elements to a 128-bit vector containing FP16 elements. The conversion is +/// exact. Merging mask \a __U is used to determine if given element should be +/// taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.fp16[i] := convert_bf8_to_fp16(__A.bf8[i]) +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a single instruction. +/// +/// \param __W +/// A 128-bit vector of [8 x fp16]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [16 x bf8]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128h __DEFAULT_FN_ATTRS128 -_mm_mask_cvtbf8_ph(__m128h __S, __mmask8 __U, __m128i __A) { +_mm_mask_cvtbf8_ph(__m128h __W, __mmask8 __U, __m128i __A) { return _mm_castsi128_ph( - _mm_mask_slli_epi16((__m128i)__S, __U, _mm_cvtepi8_epi16(__A), 8)); -} - + _mm_mask_slli_epi16((__m128i)__W, __U, _mm_cvtepi8_epi16(__A), 8)); +} + +/// Convert 128-bit vector \a __A, containing packed FP8 E5M2 floating-point +/// elements to a 128-bit vector containing FP16 elements. The conversion is +/// exact. Zeroing mask \a __U is used to determine if given element should be +/// zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.fp16[i] := convert_bf8_to_fp16(__A.bf8[i]) +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a single instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [16 x bf8]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtbf8_ph(__mmask8 __U, __m128i __A) { return _mm_castsi128_ph(_mm_slli_epi16(_mm_maskz_cvtepi8_epi16(__U, __A), 8)); } +/// Convert 256-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 256-bit vector containing FP16 elements. The conversion is exact. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.fp16[i] := convert_bf8_to_fp16(__A.bf8[i]) +/// ENDFOR +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a single instruction. +/// +/// \param __A +/// A 256-bit vector of [32 x bf8]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtbf8_ph(__m128i __A) { return _mm256_castsi256_ph(_mm256_slli_epi16(_mm256_cvtepi8_epi16(__A), 8)); } +/// Convert 256-bit vector \a __A, containing packed FP8 E5M2 floating-point +/// elements to a 256-bit vector containing FP16 elements. The conversion is +/// exact. Merging mask \a __U is used to determine if given element should be +/// taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.fp16[i] := convert_bf8_to_fp16(__A.bf8[i]) +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a single instruction. +/// +/// \param __W +/// A 256-bit vector of [16 x fp16]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [32 x bf8]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m256h __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtbf8_ph(__m256h __S, __mmask16 __U, __m128i __A) { +_mm256_mask_cvtbf8_ph(__m256h __W, __mmask16 __U, __m128i __A) { return _mm256_castsi256_ph( - _mm256_mask_slli_epi16((__m256i)__S, __U, _mm256_cvtepi8_epi16(__A), 8)); -} - + _mm256_mask_slli_epi16((__m256i)__W, __U, _mm256_cvtepi8_epi16(__A), 8)); +} + +/// Convert 256-bit vector \a __A, containing packed FP8 E5M2 floating-point +/// elements to a 256-bit vector containing FP16 elements. The conversion is +/// exact. Zeroing mask \a __U is used to determine if given element should be +/// zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.fp16[i] := convert_bf8_to_fp16(__A.bf8[i]) +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a single instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [32 x bf8]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtbf8_ph(__mmask16 __U, __m128i __A) { return _mm256_castsi256_ph( _mm256_slli_epi16(_mm256_maskz_cvtepi8_epi16(__U, __A), 8)); } +// clang-format on + #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/vecintrin.h b/clang/lib/Headers/vecintrin.h index a14c39f9f7313..338ea51ce8863 100644 --- a/clang/lib/Headers/vecintrin.h +++ b/clang/lib/Headers/vecintrin.h @@ -7,6 +7,9 @@ *===-----------------------------------------------------------------------=== */ +#ifndef _VECINTRIN_H +#define _VECINTRIN_H + #if defined(__s390x__) && defined(__VEC__) #define __ATTRS_ai __attribute__((__always_inline__)) @@ -12861,3 +12864,5 @@ vec_search_string_until_zero_cc(__vector unsigned int __a, #error "Use -fzvector to enable vector extensions" #endif + +#endif /* _VECINTRIN_H */ diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 42e6aac681c1c..b791c5d5e3019 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -2883,6 +2883,15 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( /*ReadDirectiveWithinMetadirective=*/true); break; } + // If no match is found and no otherwise clause is present, skip + // OMP5.2 Chapter 7.4: If no otherwise clause is specified the effect is as + // if one was specified without an associated directive variant. + if (BestIdx == -1 && Idx == 1) { + assert(Tok.is(tok::annot_pragma_openmp_end) && + "Expecting the end of the pragma here"); + ConsumeAnnotationToken(); + return StmtEmpty(); + } break; } case OMPD_threadprivate: { diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index c699e92985156..b3fba097999f5 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1798,6 +1798,47 @@ class DeferredDiagnosticsEmitter Inherited::visitUsedDecl(Loc, D); } + // Visitor member and parent dtors called by this dtor. + void VisitCalledDestructors(CXXDestructorDecl *DD) { + const CXXRecordDecl *RD = DD->getParent(); + + // Visit the dtors of all members + for (const FieldDecl *FD : RD->fields()) { + QualType FT = FD->getType(); + if (const auto *RT = FT->getAs()) + if (const auto *ClassDecl = dyn_cast(RT->getDecl())) + if (ClassDecl->hasDefinition()) + if (CXXDestructorDecl *MemberDtor = ClassDecl->getDestructor()) + asImpl().visitUsedDecl(MemberDtor->getLocation(), MemberDtor); + } + + // Also visit base class dtors + for (const auto &Base : RD->bases()) { + QualType BaseType = Base.getType(); + if (const auto *RT = BaseType->getAs()) + if (const auto *BaseDecl = dyn_cast(RT->getDecl())) + if (BaseDecl->hasDefinition()) + if (CXXDestructorDecl *BaseDtor = BaseDecl->getDestructor()) + asImpl().visitUsedDecl(BaseDtor->getLocation(), BaseDtor); + } + } + + void VisitDeclStmt(DeclStmt *DS) { + // Visit dtors called by variables that need destruction + for (auto *D : DS->decls()) + if (auto *VD = dyn_cast(D)) + if (VD->isThisDeclarationADefinition() && + VD->needsDestruction(S.Context)) { + QualType VT = VD->getType(); + if (const auto *RT = VT->getAs()) + if (const auto *ClassDecl = dyn_cast(RT->getDecl())) + if (ClassDecl->hasDefinition()) + if (CXXDestructorDecl *Dtor = ClassDecl->getDestructor()) + asImpl().visitUsedDecl(Dtor->getLocation(), Dtor); + } + + Inherited::VisitDeclStmt(DS); + } void checkVar(VarDecl *VD) { assert(VD->isFileVarDecl() && "Should only check file-scope variables"); @@ -1839,6 +1880,8 @@ class DeferredDiagnosticsEmitter if (auto *S = FD->getBody()) { this->Visit(S); } + if (CXXDestructorDecl *Dtor = dyn_cast(FD)) + asImpl().VisitCalledDestructors(Dtor); UsePath.pop_back(); InUsePath.erase(FD); } diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index 0e1bf727d72d2..473956c37bb51 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -372,6 +372,21 @@ bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, CXXMethodDecl *MemberDecl, bool ConstRHS, bool Diagnose) { + // If MemberDecl is virtual destructor of an explicit template class + // instantiation, it must be emitted, therefore it needs to be inferred + // conservatively by ignoring implicit host/device attrs of member and parent + // dtors called by it. Also, it needs to be checed by deferred diag visitor. + bool IsExpVDtor = false; + if (isa(MemberDecl) && MemberDecl->isVirtual()) { + if (auto *Spec = dyn_cast(ClassDecl)) { + TemplateSpecializationKind TSK = Spec->getTemplateSpecializationKind(); + IsExpVDtor = TSK == TSK_ExplicitInstantiationDeclaration || + TSK == TSK_ExplicitInstantiationDefinition; + } + } + if (IsExpVDtor) + SemaRef.DeclsToCheckForDeferredDiags.insert(MemberDecl); + // If the defaulted special member is defined lexically outside of its // owning class, or the special member already has explicit device or host // attributes, do not infer. diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 285bd27a35a76..fe313c62ff846 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -13427,9 +13427,13 @@ bool Sema::GloballyUniqueObjectMightBeAccidentallyDuplicated( FunDcl->getTemplateSpecializationKind() != TSK_Undeclared; } - // Non-inline functions/variables can only legally appear in one TU, - // unless they were part of a template. - if (!TargetIsInline && !TargetWasTemplated) + // Non-inline functions/variables can only legally appear in one TU + // unless they were part of a template. Unfortunately, making complex + // template instantiations visible is infeasible in practice, since + // everything the template depends on also has to be visible. To avoid + // giving impractical-to-fix warnings, don't warn if we're inside + // something that was templated, even on inline stuff. + if (!TargetIsInline || TargetWasTemplated) return false; // If the object isn't hidden, the dynamic linker will prevent duplication. @@ -13469,8 +13473,8 @@ void Sema::DiagnoseUniqueObjectDuplication(const VarDecl *VD) { // FIXME: Windows uses dllexport/dllimport instead of visibility, and we don't // handle that yet. Disable the warning on Windows for now. - // Don't diagnose if we're inside a template; - // we'll diagnose during instantiation instead. + // Don't diagnose if we're inside a template, because it's not practical to + // fix the warning in most cases. if (!Context.getTargetInfo().shouldDLLImportComdatSymbols() && !VD->isTemplated() && GloballyUniqueObjectMightBeAccidentallyDuplicated(VD)) { @@ -20469,6 +20473,21 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(const FunctionDecl *FD, if (IsEmittedForExternalSymbol()) return FunctionEmissionStatus::Emitted; + + // If FD is a virtual destructor of an explicit instantiation + // of a template class, return Emitted. + if (auto *Destructor = dyn_cast(FD)) { + if (Destructor->isVirtual()) { + if (auto *Spec = dyn_cast( + Destructor->getParent())) { + TemplateSpecializationKind TSK = + Spec->getTemplateSpecializationKind(); + if (TSK == TSK_ExplicitInstantiationDeclaration || + TSK == TSK_ExplicitInstantiationDefinition) + return FunctionEmissionStatus::Emitted; + } + } + } } // Otherwise, the function is known-emitted if it's in our set of diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 283a9801fc707..ffc3ac1b65854 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -1446,18 +1446,20 @@ static bool DiagnoseLocalRegisterBinding(Sema &S, SourceLocation &ArgLoc, Ty = Ty->getArrayElementTypeNoTypeQual(); // Basic types - if (Ty->isArithmeticType()) { + if (Ty->isArithmeticType() || Ty->isVectorType()) { bool DeclaredInCOrTBuffer = isa(D->getDeclContext()); if (SpecifiedSpace && !DeclaredInCOrTBuffer) S.Diag(ArgLoc, diag::err_hlsl_space_on_global_constant); - if (!DeclaredInCOrTBuffer && - (Ty->isIntegralType(S.getASTContext()) || Ty->isFloatingType())) { - // Default Globals + if (!DeclaredInCOrTBuffer && (Ty->isIntegralType(S.getASTContext()) || + Ty->isFloatingType() || Ty->isVectorType())) { + // Register annotation on default constant buffer declaration ($Globals) if (RegType == RegisterType::CBuffer) S.Diag(ArgLoc, diag::warn_hlsl_deprecated_register_type_b); else if (RegType != RegisterType::C) S.Diag(ArgLoc, diag::err_hlsl_binding_type_mismatch) << RegTypeNum; + else + return true; } else { if (RegType == RegisterType::C) S.Diag(ArgLoc, diag::warn_hlsl_register_type_c_packoffset); diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 925af06894f72..86f5a5c1d4434 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -4261,7 +4261,7 @@ static bool TryInitializerListConstruction(Sema &S, QualType ArrayType = S.Context.getConstantArrayType( E.withConst(), llvm::APInt(S.Context.getTypeSize(S.Context.getSizeType()), - List->getNumInits()), + List->getNumInitsWithEmbedExpanded()), nullptr, clang::ArraySizeModifier::Normal, 0); InitializedEntity HiddenArray = InitializedEntity::InitializeTemporary(ArrayType); diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 08586b4908dd4..c344b6fff40c6 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -5710,12 +5710,14 @@ TryListConversion(Sema &S, InitListExpr *From, QualType ToType, // - if the initializer list has one element that is not itself an // initializer list, the implicit conversion sequence is the one // required to convert the element to the parameter type. + // Bail out on EmbedExpr as well since we never create EmbedExpr for a + // single integer. unsigned NumInits = From->getNumInits(); - if (NumInits == 1 && !isa(From->getInit(0))) - Result = TryCopyInitialization(S, From->getInit(0), ToType, - SuppressUserConversions, - InOverloadResolution, - AllowObjCWritebackConversion); + if (NumInits == 1 && !isa(From->getInit(0)) && + !isa(From->getInit(0))) + Result = TryCopyInitialization( + S, From->getInit(0), ToType, SuppressUserConversions, + InOverloadResolution, AllowObjCWritebackConversion); // - if the initializer list has no elements, the implicit conversion // sequence is the identity conversion. else if (NumInits == 0) { diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 627cd82ed1c77..dbd73ead8a63f 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -4506,7 +4506,8 @@ static TemplateDeductionResult DeduceFromInitializerList( // C++ [temp.deduct.type]p13: // The type of N in the type T[N] is std::size_t. QualType T = S.Context.getSizeType(); - llvm::APInt Size(S.Context.getIntWidth(T), ILE->getNumInits()); + llvm::APInt Size(S.Context.getIntWidth(T), + ILE->getNumInitsWithEmbedExpanded()); if (auto Result = DeduceNonTypeTemplateArgument( S, TemplateParams, NTTP, llvm::APSInt(Size), T, /*ArrayBound=*/true, Info, /*PartialOrdering=*/false, Deduced, diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index 620fc117c6789..79cb5a07701fd 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -2533,6 +2533,15 @@ RegionStoreManager::bind(LimitedRegionBindingsConstRef B, Loc L, SVal V) { const MemRegion *R = MemRegVal->getRegion(); + // Binding directly to a symbolic region should be treated as binding + // to element 0. + if (const auto *SymReg = dyn_cast(R)) { + QualType Ty = SymReg->getPointeeStaticType(); + if (Ty->isVoidType()) + Ty = StateMgr.getContext().CharTy; + R = GetElementZeroRegion(SymReg, Ty); + } + // Check if the region is a struct region. if (const TypedValueRegion* TR = dyn_cast(R)) { QualType Ty = TR->getValueType(); @@ -2546,15 +2555,6 @@ RegionStoreManager::bind(LimitedRegionBindingsConstRef B, Loc L, SVal V) { return bindAggregate(B, TR, V); } - // Binding directly to a symbolic region should be treated as binding - // to element 0. - if (const auto *SymReg = dyn_cast(R)) { - QualType Ty = SymReg->getPointeeStaticType(); - if (Ty->isVoidType()) - Ty = StateMgr.getContext().CharTy; - R = GetElementZeroRegion(SymReg, Ty); - } - assert((!isa(R) || !B.lookup(R)) && "'this' pointer is not an l-value and is not assignable"); @@ -2570,6 +2570,9 @@ RegionStoreManager::bind(LimitedRegionBindingsConstRef B, Loc L, SVal V) { LimitedRegionBindingsRef RegionStoreManager::setImplicitDefaultValue(LimitedRegionBindingsConstRef B, const MemRegion *R, QualType T) { + if (B.hasExhaustedBindingLimit()) + return B; + SVal V; if (Loc::isLocType(T)) @@ -2596,6 +2599,8 @@ RegionStoreManager::setImplicitDefaultValue(LimitedRegionBindingsConstRef B, std::optional RegionStoreManager::tryBindSmallArray( LimitedRegionBindingsConstRef B, const TypedValueRegion *R, const ArrayType *AT, nonloc::LazyCompoundVal LCV) { + if (B.hasExhaustedBindingLimit()) + return B.withValuesEscaped(LCV); auto CAT = dyn_cast(AT); @@ -2632,6 +2637,8 @@ RegionStoreManager::bindArray(LimitedRegionBindingsConstRef B, const TypedValueRegion *R, SVal Init) { llvm::TimeTraceScope TimeScope("RegionStoreManager::bindArray", [R]() { return R->getDescriptiveName(); }); + if (B.hasExhaustedBindingLimit()) + return B.withValuesEscaped(Init); const ArrayType *AT =cast(Ctx.getCanonicalType(R->getValueType())); QualType ElementTy = AT->getElementType(); @@ -2698,6 +2705,9 @@ RegionStoreManager::bindVector(LimitedRegionBindingsConstRef B, const TypedValueRegion *R, SVal V) { llvm::TimeTraceScope TimeScope("RegionStoreManager::bindVector", [R]() { return R->getDescriptiveName(); }); + if (B.hasExhaustedBindingLimit()) + return B.withValuesEscaped(V); + QualType T = R->getValueType(); const VectorType *VT = T->castAs(); // Use castAs for typedefs. @@ -2722,6 +2732,9 @@ RegionStoreManager::bindVector(LimitedRegionBindingsConstRef B, if (VI == VE) break; + if (NewB.hasExhaustedBindingLimit()) + return NewB.withValuesEscaped(VI, VE); + NonLoc Idx = svalBuilder.makeArrayIndex(index); const ElementRegion *ER = MRMgr.getElementRegion(ElemType, Idx, R, Ctx); @@ -2758,6 +2771,9 @@ RegionStoreManager::getUniqueDefaultBinding(nonloc::LazyCompoundVal LCV) const { std::optional RegionStoreManager::tryBindSmallStruct( LimitedRegionBindingsConstRef B, const TypedValueRegion *R, const RecordDecl *RD, nonloc::LazyCompoundVal LCV) { + if (B.hasExhaustedBindingLimit()) + return B.withValuesEscaped(LCV); + // If we try to copy a Conjured value representing the value of the whole // struct, don't try to element-wise copy each field. // That would unnecessarily bind Derived symbols slicing off the subregion for @@ -2822,6 +2838,9 @@ RegionStoreManager::bindStruct(LimitedRegionBindingsConstRef B, const TypedValueRegion *R, SVal V) { llvm::TimeTraceScope TimeScope("RegionStoreManager::bindStruct", [R]() { return R->getDescriptiveName(); }); + if (B.hasExhaustedBindingLimit()) + return B.withValuesEscaped(V); + QualType T = R->getValueType(); assert(T->isStructureOrClassType()); @@ -2931,6 +2950,9 @@ RegionStoreManager::bindStruct(LimitedRegionBindingsConstRef B, ++VI; } + if (NewB.hasExhaustedBindingLimit()) + return NewB.withValuesEscaped(VI, VE); + // There may be fewer values in the initialize list than the fields of struct. if (FI != FE) { NewB = NewB.addBinding(R, BindingKey::Default, @@ -2945,6 +2967,9 @@ RegionStoreManager::bindAggregate(LimitedRegionBindingsConstRef B, const TypedRegion *R, SVal Val) { llvm::TimeTraceScope TimeScope("RegionStoreManager::bindAggregate", [R]() { return R->getDescriptiveName(); }); + if (B.hasExhaustedBindingLimit()) + return B.withValuesEscaped(Val); + // Remove the old bindings, using 'R' as the root of all regions // we will invalidate. Then add the new binding. return removeSubRegionBindings(B, R).addBinding(R, BindingKey::Default, Val); diff --git a/clang/test/AST/HLSL/resource_binding_attr.hlsl b/clang/test/AST/HLSL/resource_binding_attr.hlsl index 6fac903f75e18..26ab85b7d18a5 100644 --- a/clang/test/AST/HLSL/resource_binding_attr.hlsl +++ b/clang/test/AST/HLSL/resource_binding_attr.hlsl @@ -1,41 +1,56 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -ast-dump -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -finclude-default-header -ast-dump -o - %s | FileCheck %s -// CHECK:HLSLBufferDecl 0x[[CB:[0-9a-f]+]] {{.*}} line:8:9 cbuffer CB -// CHECK-NEXT:HLSLResourceClassAttr 0x[[CB:[0-9a-f]+]] {{.*}} Implicit CBuffer -// CHECK-NEXT:HLSLResourceAttr 0x[[CB:[0-9a-f]+]] {{.*}} Implicit CBuffer -// CHECK-NEXT:HLSLResourceBindingAttr 0x{{[0-9a-f]+}} "b3" "space2" -// CHECK-NEXT:VarDecl 0x[[A:[0-9a-f]+]] {{.*}} col:9 used a 'hlsl_constant float' +// CHECK: HLSLBufferDecl {{.*}} line:[[# @LINE + 5]]:9 cbuffer CB +// CHECK-NEXT: HLSLResourceClassAttr {{.*}} Implicit CBuffer +// CHECK-NEXT: HLSLResourceAttr {{.*}} Implicit CBuffer +// CHECK-NEXT: HLSLResourceBindingAttr {{.*}} "b3" "space2" +// CHECK-NEXT: VarDecl {{.*}} used a 'hlsl_constant float' cbuffer CB : register(b3, space2) { float a; } -// CHECK:HLSLBufferDecl 0x[[TB:[0-9a-f]+]] {{.*}} line:17:9 tbuffer TB -// CHECK-NEXT:HLSLResourceClassAttr 0x[[CB:[0-9a-f]+]] {{.*}} Implicit SRV -// CHECK-NEXT:HLSLResourceAttr 0x[[CB:[0-9a-f]+]] {{.*}} Implicit TBuffer -// CHECK-NEXT:HLSLResourceBindingAttr 0x{{[0-9a-f]+}} "t2" "space1" -// CHECK-NEXT:VarDecl 0x[[B:[0-9a-f]+]] {{.*}} col:9 used b 'hlsl_constant float' +// CHECK: HLSLBufferDecl {{.*}} line:[[# @LINE + 5]]:9 tbuffer TB +// CHECK-NEXT: HLSLResourceClassAttr {{.*}} Implicit SRV +// CHECK-NEXT: HLSLResourceAttr {{.*}} Implicit TBuffer +// CHECK-NEXT: HLSLResourceBindingAttr {{.*}} "t2" "space1" +// CHECK-NEXT: VarDecl {{.*}} used b 'hlsl_constant float' tbuffer TB : register(t2, space1) { float b; } -float foo() { -// CHECK: BinaryOperator 0x{{[0-9a-f]+}} 'float' '+' -// CHECK-NEXT: ImplicitCastExpr 0x{{[0-9a-f]+}} 'float' -// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} 'hlsl_constant float' lvalue Var 0x[[A]] 'a' 'hlsl_constant float' -// CHECK-NEXT: ImplicitCastExpr 0x{{[0-9a-f]+}} 'float' -// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} 'hlsl_constant float' lvalue Var 0x[[B]] 'b' 'hlsl_constant float' +export float foo() { return a + b; } -// CHECK: VarDecl 0x{{[0-9a-f]+}} <{{.*}}> col:17 UAV 'RWBuffer':'hlsl::RWBuffer' callinit -// CHECK-NEXT:-CXXConstructExpr 0x{{[0-9a-f]+}} 'RWBuffer':'hlsl::RWBuffer' 'void ()' -// CHECK-NEXT:-HLSLResourceBindingAttr 0x{{[0-9a-f]+}} "u3" "space0" +// CHECK: VarDecl {{.*}} UAV 'RWBuffer':'hlsl::RWBuffer' +// CHECK: HLSLResourceBindingAttr {{.*}} "u3" "space0" RWBuffer UAV : register(u3); -// CHECK: -VarDecl 0x{{[0-9a-f]+}} <{{.*}}> col:17 UAV1 'RWBuffer':'hlsl::RWBuffer' callinit -// CHECK-NEXT:-CXXConstructExpr 0x{{[0-9a-f]+}} 'RWBuffer':'hlsl::RWBuffer' 'void ()' -// CHECK-NEXT:-HLSLResourceBindingAttr 0x{{[0-9a-f]+}} "u2" "space0" -// CHECK-NEXT:-VarDecl 0x{{[0-9a-f]+}} col:38 UAV2 'RWBuffer':'hlsl::RWBuffer' callinit -// CHECK-NEXT:-CXXConstructExpr 0x{{[0-9a-f]+}} 'RWBuffer':'hlsl::RWBuffer' 'void ()' -// CHECK-NEXT:-HLSLResourceBindingAttr 0x{{[0-9a-f]+}} "u4" "space0" +// CHECK: VarDecl {{.*}} UAV1 'RWBuffer':'hlsl::RWBuffer' +// CHECK: HLSLResourceBindingAttr {{.*}} "u2" "space0" +// CHECK: VarDecl {{.*}} UAV2 'RWBuffer':'hlsl::RWBuffer' +// CHECK: HLSLResourceBindingAttr {{.*}} "u4" "space0" RWBuffer UAV1 : register(u2), UAV2 : register(u4); + +// +// Default constants ($Globals) layout annotations + +// CHECK: VarDecl {{.*}} f 'hlsl_constant float' +// CHECK: HLSLResourceBindingAttr {{.*}} "c5" "space0" +float f : register(c5); + +// CHECK: VarDecl {{.*}} intv 'hlsl_constant int4':'vector' +// CHECK: HLSLResourceBindingAttr {{.*}} "c2" "space0" +int4 intv : register(c2); + +// CHECK: VarDecl {{.*}} dar 'hlsl_constant double[5]' +// CHECK: HLSLResourceBindingAttr {{.*}} "c3" "space0" +double dar[5] : register(c3); + +struct S { + int a; +}; + +// CHECK: VarDecl {{.*}} s 'hlsl_constant S' +// CHECK: HLSLResourceBindingAttr {{.*}} "c10" "space0 +S s : register(c10); diff --git a/clang/test/Analysis/initializer.cpp b/clang/test/Analysis/initializer.cpp index f50afff25d245..713e121168571 100644 --- a/clang/test/Analysis/initializer.cpp +++ b/clang/test/Analysis/initializer.cpp @@ -254,6 +254,224 @@ void foo() { } } // namespace CXX17_aggregate_construction +namespace newexpr_init_list_initialization { +template +void escape(FirstT first, Rest... args); + +struct S { + int foo; + int bar; +}; +void none_designated() { + S *s = new S{13,1}; + clang_analyzer_eval(13 == s->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(1 == s->bar); // expected-warning{{TRUE}} + delete s; +} +void none_designated_swapped() { + S *s = new S{1,13}; + clang_analyzer_eval(1 == s->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(13 == s->bar); // expected-warning{{TRUE}} + delete s; +} +void one_designated_one_not() { + S *s = new S{ 1, .bar = 13 }; + clang_analyzer_eval(1 == s->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(13 == s->bar); // expected-warning{{TRUE}} + delete s; +} +void all_designated() { + S *s = new S{ + .foo = 13, + .bar = 1, + }; + clang_analyzer_eval(13 == s->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(1 == s->bar); // expected-warning{{TRUE}} + delete s; +} +void non_designated_array_of_aggr_struct() { + S *s = new S[2] { {1, 2}, {3, 4} }; + clang_analyzer_eval(1 == s[0].foo); // expected-warning{{TRUE}} + clang_analyzer_eval(2 == s[0].bar); // expected-warning{{TRUE}} + clang_analyzer_eval(3 == s[1].foo); // expected-warning{{TRUE}} + clang_analyzer_eval(4 == s[1].bar); // expected-warning{{TRUE}} + delete[] s; +} + +struct WithGaps { + int foo; + int bar; + int baz; +}; +void out_of_order_designated_initializers_with_gaps() { + WithGaps *s = new WithGaps{ + .foo = 13, + .baz = 1, + }; + clang_analyzer_eval(13 == s->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(0 == s->bar); // expected-warning{{TRUE}} + clang_analyzer_eval(1 == s->baz); // expected-warning{{TRUE}} + delete s; +} + +// https://eel.is/c++draft/dcl.init.aggr#note-6: +// Static data members, non-static data members of anonymous +// union members, and unnamed bit-fields are not considered +// elements of the aggregate. +struct NonConsideredFields { + int i; + static int s; + int j; + int :17; + int k; +}; +void considered_fields_initd() { + auto S = new NonConsideredFields { 1, 2, 3 }; + clang_analyzer_eval(1 == S->i); // expected-warning{{TRUE}} + clang_analyzer_eval(2 == S->j); // expected-warning{{TRUE}} + clang_analyzer_eval(3 == S->k); // expected-warning{{TRUE}} + delete S; +} + +#if __cplusplus >= 201703L +enum Enum : int { +}; +void list_init_enum() { + Enum *E = new Enum{53}; + clang_analyzer_eval(53 == *E); // expected-warning{{TRUE}} + delete E; +} +#endif // __cplusplus >= 201703L + +class PubClass { +public: + int foo; + int bar; +}; +void public_class_designated_initializers() { + S *s = new S{ + .foo = 13, + .bar = 1, + }; + clang_analyzer_eval(13 == s->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(1 == s->bar); // expected-warning{{TRUE}} + delete s; +} + +union UnionTestTy { + int x; + char y; +}; +void new_expr_aggr_init_union_no_designator() { + UnionTestTy *u = new UnionTestTy{}; + clang_analyzer_eval(0 == u->x); // expected-warning{{UNKNOWN}} FIXME: should be TRUE + clang_analyzer_eval(u->y); // expected-warning{{UNKNOWN}} FIXME: should be undefined, warning + delete u; +} +void new_expr_aggr_init_union_designated_first_field() { + UnionTestTy *u = new UnionTestTy{ .x = 14 }; + clang_analyzer_eval(14 == u->x); // expected-warning{{UNKNOWN}} FIXME: should be TRUE + clang_analyzer_eval(u->y); // expected-warning{{UNKNOWN}} FIXME: should be undefined, warning + delete u; +} +void new_expr_aggr_init_union_designated_non_first_field() { + UnionTestTy *u = new UnionTestTy{ .y = 3 }; + clang_analyzer_eval(3 == u->y); // expected-warning{{UNKNOWN}} FIXME: should be TRUE + clang_analyzer_eval(u->x); // expected-warning{{UNKNOWN}} FIXME: should be undefined, warning + delete u; +} + +union UnionTestTyWithDefaultMemberInit { + int x; + char y = 14; +}; +void union_with_default_member_init_empty_init_list() { + auto U = new UnionTestTyWithDefaultMemberInit{}; + // clang_analyzer_eval(14 == U->y); // FIXME: Should be true + clang_analyzer_eval(U->x); // expected-warning{{UNKNOWN}} FIXME: should be undefined, warning + delete U; +} + +struct Inner { + int bar; +}; +struct Nested { + int foo; + Inner inner; + int baz; +}; +void nested_aggregates() { + auto N = new Nested{}; + clang_analyzer_eval(0 == N->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(0 == N->inner.bar); // expected-warning{{TRUE}} + clang_analyzer_eval(0 == N->baz); // expected-warning{{TRUE}} + + auto N1 = new Nested{1}; + clang_analyzer_eval(1 == N1->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(0 == N1->inner.bar); // expected-warning{{TRUE}} + clang_analyzer_eval(0 == N1->baz); // expected-warning{{TRUE}} + + auto N2 = new Nested{.baz = 14}; + clang_analyzer_eval(0 == N2->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(0 == N2->inner.bar); // expected-warning{{TRUE}} + clang_analyzer_eval(14 == N2->baz); // expected-warning{{TRUE}} + + auto N3 = new Nested{1,2,3}; + clang_analyzer_eval(1 == N3->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(2 == N3->inner.bar); // expected-warning{{TRUE}} + clang_analyzer_eval(3 == N3->baz); // expected-warning{{TRUE}} + + auto N4 = new Nested{1, {}, 3}; + clang_analyzer_eval(1 == N4->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(0 == N4->inner.bar); // expected-warning{{TRUE}} + clang_analyzer_eval(3 == N4->baz); // expected-warning{{TRUE}} + + auto N5 = new Nested{{},{},{}}; + clang_analyzer_eval(0 == N5->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(0 == N5->inner.bar); // expected-warning{{TRUE}} + clang_analyzer_eval(0 == N5->baz); // expected-warning{{TRUE}} + + auto N6 = new Nested{1, {.bar = 2}, 3}; + clang_analyzer_eval(1 == N6->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(2 == N6->inner.bar); // expected-warning{{TRUE}} + clang_analyzer_eval(3 == N6->baz); // expected-warning{{TRUE}} + + auto N7 = new Nested{1, {2}, 3}; + clang_analyzer_eval(1 == N7->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(2 == N7->inner.bar); // expected-warning{{TRUE}} + clang_analyzer_eval(3 == N7->baz); // expected-warning{{TRUE}} + + escape(N,N1,N2,N3,N4,N5,N6,N7); +} +} // namespace newexpr_init_list_initialization + +namespace placement_new_initializer_list_arg { +struct S { + int x; +}; +void aggregate_struct() { + S s; + S *s_ptr = new (&s) S{1}; + clang_analyzer_eval(1 == s_ptr->x); // expected-warning{{TRUE}} + + S vi; + S *vi_ptr = new (&vi) S{}; + clang_analyzer_eval(0 == vi_ptr->x); // expected-warning{{TRUE}} + + S di; + S *di_ptr = new (&di) S; + int z = di_ptr->x + 1; // expected-warning{{The left operand of '+' is a garbage value}} +} +void initialize_non_zeroth_element(S arr[2]) { + S *s = new (&arr[1]) S{1}; + clang_analyzer_eval(1 == s->x); // expected-warning{{TRUE}} +} +void initialize_non_zeroth_argument_pointers(S *arr[2]) { + arr[1] = new (arr[1]) S{1}; + clang_analyzer_eval(1 == arr[1]->x); // expected-warning{{TRUE}} +} +} // namespace placement_new_initializer_list_arg + namespace CXX17_transparent_init_list_exprs { class A {}; diff --git a/clang/test/Analysis/new-user-defined.cpp b/clang/test/Analysis/new-user-defined.cpp new file mode 100644 index 0000000000000..8987ac078bf2c --- /dev/null +++ b/clang/test/Analysis/new-user-defined.cpp @@ -0,0 +1,30 @@ +// RUN: %clang_analyze_cc1 -verify %s\ +// RUN: -analyzer-checker=core,debug.ExprInspection + +void clang_analyzer_eval(bool); + +using size_t = decltype(sizeof(int)); + +template +void escape(FirstT first, Rest... args); + +namespace CustomClassType { +struct S { + int x; + static void* operator new(size_t size) { + return ::operator new(size); + } +}; +void F() { + S *s = new S; + clang_analyzer_eval(s->x); // expected-warning{{UNKNOWN}} FIXME: should be an undefined warning + + S *s2 = new S{}; + clang_analyzer_eval(0 == s2->x); // expected-warning{{TRUE}} + + S *s3 = new S{1}; + clang_analyzer_eval(1 == s3->x); // expected-warning{{TRUE}} + + escape(s, s2, s3); +} +} // namespace CustomClassType diff --git a/clang/test/Analysis/out-of-bounds.c b/clang/test/Analysis/out-of-bounds.c index 7d6cb4ecf1b24..2174dafc0021b 100644 --- a/clang/test/Analysis/out-of-bounds.c +++ b/clang/test/Analysis/out-of-bounds.c @@ -217,3 +217,53 @@ int test_negative_offset_with_unsigned_idx(void) { unsigned idx = 2u; return p[idx]; // expected-warning {{Out of bound access to memory preceding}} } + +struct three_words { int c[3]; }; +struct seven_words { int c[7]; }; +void partially_in_bounds(void) { + struct seven_words c; + struct three_words a, *p = (struct three_words *)&c; + p[0] = a; // no-warning + p[1] = a; // no-warning + p[2] = a; // should warn + // FIXME: This is an overflow, but currently security.ArrayBound only checks + // that the _beginning_ of the accessed element is within bounds. +} + +void vla(int a) { + if (a == 5) { + int x[a]; + x[4] = 4; // no-warning + x[5] = 5; // expected-warning{{Out of bound access}} + } +} + +void sizeof_vla(int a) { + // FIXME: VLA modeling is not good enough to cover this case. + if (a == 5) { + char x[a]; + int y[sizeof(x)]; + y[4] = 4; // no-warning + y[5] = 5; // should be {{Out of bounds access}} + } +} + +void sizeof_vla_2(int a) { + // FIXME: VLA modeling is not good enough to cover this case. + if (a == 5) { + char x[a]; + int y[sizeof(x) / sizeof(char)]; + y[4] = 4; // no-warning + y[5] = 5; // should be {{Out of bounds access}} + } +} + +void sizeof_vla_3(int a) { + // FIXME: VLA modeling is not good enough to cover this case. + if (a == 5) { + char x[a]; + int y[sizeof(*&*&*&x)]; + y[4] = 4; // no-warning + y[5] = 5; // should be {{Out of bounds access}} + } +} diff --git a/clang/test/Analysis/outofbound-notwork.c b/clang/test/Analysis/outofbound-notwork.c deleted file mode 100644 index 1318c07bbf2a8..0000000000000 --- a/clang/test/Analysis/outofbound-notwork.c +++ /dev/null @@ -1,32 +0,0 @@ -// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,security.ArrayBound -verify %s -// XFAIL: * - -// Once we better handle modeling of sizes of VLAs, we can pull this back -// into outofbound.c. - -void sizeof_vla(int a) { - if (a == 5) { - char x[a]; - int y[sizeof(x)]; - y[4] = 4; // no-warning - y[5] = 5; // expected-warning{{Out of bounds access}} - } -} - -void sizeof_vla_2(int a) { - if (a == 5) { - char x[a]; - int y[sizeof(x) / sizeof(char)]; - y[4] = 4; // no-warning - y[5] = 5; // expected-warning{{Out of bounds access}} - } -} - -void sizeof_vla_3(int a) { - if (a == 5) { - char x[a]; - int y[sizeof(*&*&*&x)]; - y[4] = 4; // no-warning - y[5] = 5; // expected-warning{{Out of bounds access}} - } -} diff --git a/clang/test/Analysis/outofbound.c b/clang/test/Analysis/outofbound.c deleted file mode 100644 index d3d8ff2b2f0ed..0000000000000 --- a/clang/test/Analysis/outofbound.c +++ /dev/null @@ -1,130 +0,0 @@ -// RUN: %clang_analyze_cc1 -Wno-array-bounds -verify %s \ -// RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=unix \ -// RUN: -analyzer-checker=security.ArrayBound \ -// RUN: -analyzer-config unix.DynamicMemoryModeling:Optimistic=true - -typedef __typeof(sizeof(int)) size_t; -void *malloc(size_t); -void *calloc(size_t, size_t); - -char f1(void) { - char* s = "abcd"; - char c = s[4]; // no-warning - return s[5] + c; // expected-warning{{Out of bound access to memory after}} -} - -void f2(void) { - int *p = malloc(12); - p[3] = 4; // expected-warning{{Out of bound access to memory after}} -} - -struct three_words { - int c[3]; -}; - -struct seven_words { - int c[7]; -}; - -void f3(void) { - struct three_words a, *p; - p = &a; - p[0] = a; // no-warning - p[1] = a; // expected-warning{{Out of bound access to memory after}} -} - -void f4(void) { - struct seven_words c; - struct three_words a, *p = (struct three_words *)&c; - p[0] = a; // no-warning - p[1] = a; // no-warning - p[2] = a; // should warn - // FIXME: This is an overflow, but currently security.ArrayBound only checks - // that the _beginning_ of the accessed element is within bounds. -} - -void f5(void) { - char *p = calloc(2,2); - p[3] = '.'; // no-warning - p[4] = '!'; // expected-warning{{Out of bound access}} -} - -void f6(void) { - char a[2]; - int *b = (int*)a; - b[1] = 3; // expected-warning{{Out of bound access}} -} - -void f7(void) { - struct three_words a; - a.c[3] = 1; // expected-warning{{Out of bound access}} -} - -void vla(int a) { - if (a == 5) { - int x[a]; - x[4] = 4; // no-warning - x[5] = 5; // expected-warning{{Out of bound access}} - } -} - -void alloca_region(int a) { - if (a == 5) { - char *x = __builtin_alloca(a); - x[4] = 4; // no-warning - x[5] = 5; // expected-warning{{Out of bound access}} - } -} - -int symbolic_index(int a) { - int x[2] = {1, 2}; - if (a == 2) { - return x[a]; // expected-warning{{Out of bound access}} - } - return 0; -} - -int symbolic_index2(int a) { - int x[2] = {1, 2}; - if (a < 0) { - return x[a]; // expected-warning{{Out of bound access}} - } - return 0; -} - -int overflow_binary_search(double in) { - int eee = 16; - if (in < 1e-8 || in > 1e23) { - return 0; - } else { - static const double ins[] = {1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, - 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, - 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, - 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; - if (in < ins[eee]) { - eee -= 8; - } else { - eee += 8; - } - if (in < ins[eee]) { - eee -= 4; - } else { - eee += 4; - } - if (in < ins[eee]) { - eee -= 2; - } else { - eee += 2; - } - if (in < ins[eee]) { - eee -= 1; - } else { - eee += 1; - } - if (in < ins[eee]) { // expected-warning {{Out of bound access}} - eee -= 1; - } - } - return eee; -} diff --git a/clang/test/Analysis/region-store.cpp b/clang/test/Analysis/region-store.cpp index 9e80a2e688575..cb3313cbbb313 100644 --- a/clang/test/Analysis/region-store.cpp +++ b/clang/test/Analysis/region-store.cpp @@ -386,3 +386,32 @@ void tooManyFnArgumentsWhenInlining() { 10,11,12,13,14,15,16,17,18,19, }); } + +void gh129211_assertion() { + struct Clazz { + int b; + int : 0; + }; + + Clazz d[][5][5] = { + { + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}, {}} + }, + { + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}, {}}, + }, + { + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}}, + } + }; // no-crash +} diff --git a/clang/test/CIR/CodeGen/basic.cpp b/clang/test/CIR/CodeGen/basic.cpp new file mode 100644 index 0000000000000..210afcd541159 --- /dev/null +++ b/clang/test/CIR/CodeGen/basic.cpp @@ -0,0 +1,27 @@ +// RUN: not %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - 2>&1 | FileCheck %s + +// This error is caused by the "const int i = 2" line in f2(). When +// initaliziers are implemented, the checks there should be updated +// and the "not" should be removed from the run line. +// CHECK: error: ClangIR code gen Not Yet Implemented: emitAutoVarInit + +int f1() { + int i; + return i; +} + +// CHECK: module +// CHECK: cir.func @f1() -> !cir.int +// CHECK: %[[I_PTR:.*]] = cir.alloca !cir.int, !cir.ptr>, ["i"] {alignment = 4 : i64} +// CHECK: %[[I:.*]] = cir.load %[[I_PTR]] : !cir.ptr>, !cir.int +// CHECK: cir.return %[[I]] : !cir.int + +int f2() { + const int i = 2; + return i; +} + +// CHECK: cir.func @f2() -> !cir.int +// CHECK: %[[I_PTR:.*]] = cir.alloca !cir.int, !cir.ptr>, ["i", const] {alignment = 4 : i64} +// CHECK: %[[I:.*]] = cir.load %[[I_PTR]] : !cir.ptr>, !cir.int +// CHECK: cir.return %[[I]] : !cir.int diff --git a/clang/test/CodeGen/AArch64/fp8-init-list.c b/clang/test/CodeGen/AArch64/fp8-init-list.c index 8b4b31a71c46a..872ee4f8a3d42 100644 --- a/clang/test/CodeGen/AArch64/fp8-init-list.c +++ b/clang/test/CodeGen/AArch64/fp8-init-list.c @@ -12,14 +12,14 @@ // CHECK-LABEL: define dso_local <8 x i8> @vector_init_test( // CHECK-SAME: <1 x i8> [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VECINIT14:%.*]] = shufflevector <1 x i8> [[X]], <1 x i8> poison, <8 x i32> zeroinitializer -// CHECK-NEXT: ret <8 x i8> [[VECINIT14]] +// CHECK-NEXT: [[VECINIT7:%.*]] = shufflevector <1 x i8> [[X]], <1 x i8> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i8> [[VECINIT7]] // // CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z16vector_init_testu6__mfp8( // CHECK-CXX-SAME: <1 x i8> [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: [[VECINIT14:%.*]] = shufflevector <1 x i8> [[X]], <1 x i8> poison, <8 x i32> zeroinitializer -// CHECK-CXX-NEXT: ret <8 x i8> [[VECINIT14]] +// CHECK-CXX-NEXT: [[VECINIT7:%.*]] = shufflevector <1 x i8> [[X]], <1 x i8> poison, <8 x i32> zeroinitializer +// CHECK-CXX-NEXT: ret <8 x i8> [[VECINIT7]] // mfloat8x8_t vector_init_test(__mfp8 x) { return (mfloat8x8_t) {x, x, x, x, x, x, x, x}; @@ -34,13 +34,15 @@ struct S s; // CHECK-LABEL: define dso_local void @f( // CHECK-SAME: <1 x i8> [[X:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: store <1 x i8> [[X]], ptr @s, align 1, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[X]], i64 0 +// CHECK-NEXT: store i8 [[TMP0]], ptr @s, align 1, !tbaa [[TBAA2:![0-9]+]] // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z1fu6__mfp8( // CHECK-CXX-SAME: <1 x i8> [[X:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: store <1 x i8> [[X]], ptr @s, align 1, !tbaa [[TBAA2:![0-9]+]] +// CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[X]], i64 0 +// CHECK-CXX-NEXT: store i8 [[TMP0]], ptr @s, align 1, !tbaa [[TBAA2:![0-9]+]] // CHECK-CXX-NEXT: ret void // void f(__mfp8 x) { diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c index 2f3994df03784..0b355db4b2073 100644 --- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c @@ -49,8 +49,8 @@ svfloat32_t test_svdot_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm, // CHECK-LABEL: define dso_local @test_svdot_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fdot.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -59,8 +59,8 @@ svfloat32_t test_svdot_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm, // CHECK-CXX-LABEL: define dso_local @_Z20test_svdot_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fdot.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -91,8 +91,8 @@ svfloat16_t test_svdot_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm, // CHECK-LABEL: define dso_local @test_svdot_n_f16_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fdot.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -101,8 +101,8 @@ svfloat16_t test_svdot_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm, // CHECK-CXX-LABEL: define dso_local @_Z20test_svdot_n_f16_mf8u13__SVFloat16_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fdot.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c index 425e6a57ffe3c..0daeeec9e7dd7 100644 --- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c @@ -49,8 +49,8 @@ svfloat16_t test_svmlalb_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm // CHECK-LABEL: define dso_local @test_svmlalb_n_f16_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalb.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -59,8 +59,8 @@ svfloat16_t test_svmlalb_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm // CHECK-CXX-LABEL: define dso_local @_Z22test_svmlalb_n_f16_mf8u13__SVFloat16_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalb.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -91,8 +91,8 @@ svfloat16_t test_svmlalt_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm // CHECK-LABEL: define dso_local @test_svmlalt_n_f16_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalt.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -101,8 +101,8 @@ svfloat16_t test_svmlalt_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm // CHECK-CXX-LABEL: define dso_local @_Z22test_svmlalt_n_f16_mf8u13__SVFloat16_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalt.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -169,8 +169,8 @@ svfloat32_t test_svmlallbb_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-LABEL: define dso_local @test_svmlallbb_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlallbb.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -179,8 +179,8 @@ svfloat32_t test_svmlallbb_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-CXX-LABEL: define dso_local @_Z24test_svmlallbb_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlallbb.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -211,8 +211,8 @@ svfloat32_t test_svmlallbt_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-LABEL: define dso_local @test_svmlallbt_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlallbt.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -221,8 +221,8 @@ svfloat32_t test_svmlallbt_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-CXX-LABEL: define dso_local @_Z24test_svmlallbt_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlallbt.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -253,8 +253,8 @@ svfloat32_t test_svmlalltb_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-LABEL: define dso_local @test_svmlalltb_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalltb.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -263,8 +263,8 @@ svfloat32_t test_svmlalltb_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-CXX-LABEL: define dso_local @_Z24test_svmlalltb_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalltb.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -295,8 +295,8 @@ svfloat32_t test_svmlalltt_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-LABEL: define dso_local @test_svmlalltt_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalltt.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -305,8 +305,8 @@ svfloat32_t test_svmlalltt_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-CXX-LABEL: define dso_local @_Z24test_svmlalltt_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalltt.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c index 276ef64736bc3..40dcd65f6c609 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c @@ -205,6 +205,21 @@ svfloat64_t test_svld1_f64(svbool_t pg, const float64_t *base) MODE_ATTR return SVE_ACLE_FUNC(svld1,_f64,,)(pg, base); } +// CHECK-LABEL: @test_svld1_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.masked.load.nxv16i8.p0(ptr [[BASE:%.*]], i32 1, [[PG:%.*]], zeroinitializer) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z14test_svld1_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.masked.load.nxv16i8.p0(ptr [[BASE:%.*]], i32 1, [[PG:%.*]], zeroinitializer) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svmfloat8_t test_svld1_mf8(svbool_t pg, const mfloat8_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld1,_mf8,,)(pg, base); +} + // CHECK-LABEL: @test_svld1_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -476,6 +491,29 @@ svfloat64_t test_svld1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum return SVE_ACLE_FUNC(svld1_vnum,_f64,,)(pg, base, vnum); } +// CHECK-LABEL: @test_svld1_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.masked.load.nxv16i8.p0(ptr [[TMP2]], i32 1, [[PG:%.*]], zeroinitializer) +// CHECK-NEXT: ret [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.masked.load.nxv16i8.p0(ptr [[TMP2]], i32 1, [[PG:%.*]], zeroinitializer) +// CPP-CHECK-NEXT: ret [[TMP3]] +// +svmfloat8_t test_svld1_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld1_vnum,_mf8,,)(pg, base, vnum); +} + #ifndef __ARM_FEATURE_SME // CHECK-LABEL: @test_svld1_gather_u32base_s32( diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2.c index 3097cb9cbcaab..abe1c87b6f2c3 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2.c @@ -206,6 +206,21 @@ svfloat64x2_t test_svld2_f64(svbool_t pg, const float64_t *base) MODE_ATTR return SVE_ACLE_FUNC(svld2,_f64,,)(pg, base); } +// CHECK-LABEL: @test_svld2_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z14test_svld2_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svmfloat8x2_t test_svld2_mf8(svbool_t pg, const mfloat8_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld2,_mf8,,)(pg, base); +} + // CHECK-LABEL: @test_svld2_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] @@ -410,3 +425,20 @@ svfloat64x2_t test_svld2_vnum_f64(svbool_t pg, const float64_t *base, int64_t vn { return SVE_ACLE_FUNC(svld2_vnum,_f64,,)(pg, base, vnum); } + +// CHECK-LABEL: @test_svld2_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret { , } [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svld2_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret { , } [[TMP1]] +// +svmfloat8x2_t test_svld2_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld2_vnum,_mf8,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3.c index 2deb5a1d4930c..5ff7ad9de483b 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3.c @@ -205,6 +205,21 @@ svfloat64x3_t test_svld3_f64(svbool_t pg, const float64_t *base) MODE_ATTR return SVE_ACLE_FUNC(svld3,_f64,,)(pg, base); } +// CHECK-LABEL: @test_svld3_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z14test_svld3_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , } [[TMP0]] +// +svmfloat8x3_t test_svld3_mf8(svbool_t pg, const mfloat8_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld3,_mf8,,)(pg, base); +} + // CHECK-LABEL: @test_svld3_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] @@ -409,3 +424,20 @@ svfloat64x3_t test_svld3_vnum_f64(svbool_t pg, const float64_t *base, int64_t vn { return SVE_ACLE_FUNC(svld3_vnum,_f64,,)(pg, base, vnum); } + +// CHECK-LABEL: @test_svld3_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret { , , } [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svld3_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] +// +svmfloat8x3_t test_svld3_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld3_vnum,_mf8,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4.c index 30796a4f46a72..650fd5986be27 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4.c @@ -205,6 +205,21 @@ svfloat64x4_t test_svld4_f64(svbool_t pg, const float64_t *base) MODE_ATTR return SVE_ACLE_FUNC(svld4,_f64,,)(pg, base); } +// CHECK-LABEL: @test_svld4_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z14test_svld4_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svmfloat8x4_t test_svld4_mf8(svbool_t pg, const mfloat8_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld4,_mf8,,)(pg, base); +} + // CHECK-LABEL: @test_svld4_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] @@ -409,3 +424,20 @@ svfloat64x4_t test_svld4_vnum_f64(svbool_t pg, const float64_t *base, int64_t vn { return SVE_ACLE_FUNC(svld4_vnum,_f64,,)(pg, base, vnum); } + +// CHECK-LABEL: @test_svld4_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret { , , , } [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svld4_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] +// +svmfloat8x4_t test_svld4_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld4_vnum,_mf8,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1.c index d343c124fe6a7..b96bf0cb23d12 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1.c @@ -206,6 +206,21 @@ svfloat64_t test_svldnt1_f64(svbool_t pg, const float64_t *base) MODE_ATTR return SVE_ACLE_FUNC(svldnt1,_f64,,)(pg, base); } +// CHECK-LABEL: @test_svldnt1_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svldnt1_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svmfloat8_t test_svldnt1_mf8(svbool_t pg, const mfloat8_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svldnt1,_mf8,,)(pg, base); +} + // CHECK-LABEL: @test_svldnt1_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] @@ -410,3 +425,20 @@ svfloat64_t test_svldnt1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vn { return SVE_ACLE_FUNC(svldnt1_vnum,_f64,,)(pg, base, vnum); } + +// CHECK-LABEL: @test_svldnt1_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z21test_svldnt1_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svmfloat8_t test_svldnt1_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svldnt1_vnum,_mf8,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1.c index 29afdaf3eb0c7..21350007da86f 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1.c @@ -205,6 +205,21 @@ void test_svst1_f64(svbool_t pg, float64_t *base, svfloat64_t data) MODE_ATTR return SVE_ACLE_FUNC(svst1,_f64,,)(pg, base, data); } +// CHECK-LABEL: @test_svst1_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.masked.store.nxv16i8.p0( [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, [[PG:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svst1_mf8u10__SVBool_tPu6__mfp8u13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.masked.store.nxv16i8.p0( [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, [[PG:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_mf8(svbool_t pg, mfloat8_t *base, svmfloat8_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst1,_mf8,,)(pg, base, data); +} + // CHECK-LABEL: @test_svst1_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -476,6 +491,29 @@ void test_svst1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64_t return SVE_ACLE_FUNC(svst1_vnum,_f64,,)(pg, base, vnum, data); } +// CHECK-LABEL: @test_svst1_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.masked.store.nxv16i8.p0( [[DATA:%.*]], ptr [[TMP2]], i32 1, [[PG:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_mf8u10__SVBool_tPu6__mfp8lu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.masked.store.nxv16i8.p0( [[DATA:%.*]], ptr [[TMP2]], i32 1, [[PG:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_mf8(svbool_t pg, mfloat8_t *base, int64_t vnum, svmfloat8_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst1_vnum,_mf8,,)(pg, base, vnum, data); +} + #ifndef __ARM_FEATURE_SME // CHECK-LABEL: @test_svst1_scatter_u32base_s32( diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2.c index d1511b4c363d0..9e73e4464c6f9 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2.c @@ -293,6 +293,29 @@ void test_svst2_f64(svbool_t pg, float64_t *base, svfloat64x2_t data) MODE_ATTR return SVE_ACLE_FUNC(svst2,_f64,,)(pg, base, data); } +// CHECK-LABEL: @test_svst2_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svst2_mf8u10__SVBool_tPu6__mfp813svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2_mf8(svbool_t pg, mfloat8_t *base, svmfloat8x2_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst2,_mf8,,)(pg, base, data); +} + // CHECK-LABEL: @test_svst2_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 @@ -585,3 +608,28 @@ void test_svst2_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x2 { return SVE_ACLE_FUNC(svst2_vnum,_f64,,)(pg, base, vnum, data); } + +// CHECK-LABEL: @test_svst2_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst2_vnum_mf8u10__SVBool_tPu6__mfp8l13svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2_vnum_mf8(svbool_t pg, mfloat8_t *base, int64_t vnum, svmfloat8x2_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst2_vnum,_mf8,,)(pg, base, vnum, data); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3.c index 4198a325f5fb6..b693b693b1ebb 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3.c @@ -337,6 +337,33 @@ void test_svst3_f64(svbool_t pg, float64_t *base, svfloat64x3_t data) MODE_ATTR return SVE_ACLE_FUNC(svst3,_f64,,)(pg, base, data); } +// CHECK-LABEL: @test_svst3_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svst3_mf8u10__SVBool_tPu6__mfp813svmfloat8x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3_mf8(svbool_t pg, mfloat8_t *base, svmfloat8x3_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst3,_mf8,,)(pg, base, data); +} + // CHECK-LABEL: @test_svst3_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 @@ -673,3 +700,32 @@ void test_svst3_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x3 { return SVE_ACLE_FUNC(svst3_vnum,_f64,,)(pg, base, vnum, data); } + +// CHECK-LABEL: @test_svst3_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst3_vnum_mf8u10__SVBool_tPu6__mfp8l13svmfloat8x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3_vnum_mf8(svbool_t pg, mfloat8_t *base, int64_t vnum, svmfloat8x3_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst3_vnum,_mf8,,)(pg, base, vnum, data); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4.c index 160a21d93e416..f8c3b60682573 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4.c @@ -381,6 +381,37 @@ void test_svst4_f64(svbool_t pg, float64_t *base, svfloat64x4_t data) MODE_ATTR return SVE_ACLE_FUNC(svst4,_f64,,)(pg, base, data); } +// CHECK-LABEL: @test_svst4_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svst4_mf8u10__SVBool_tPu6__mfp813svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4_mf8(svbool_t pg, mfloat8_t *base, svmfloat8x4_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst4,_mf8,,)(pg, base, data); +} + // CHECK-LABEL: @test_svst4_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 @@ -761,3 +792,36 @@ void test_svst4_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x4 { return SVE_ACLE_FUNC(svst4_vnum,_f64,,)(pg, base, vnum, data); } + +// CHECK-LABEL: @test_svst4_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[TMP8]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst4_vnum_mf8u10__SVBool_tPu6__mfp8l13svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[TMP8]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4_vnum_mf8(svbool_t pg, mfloat8_t *base, int64_t vnum, svmfloat8x4_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst4_vnum,_mf8,,)(pg, base, vnum, data); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1.c index 5e0869557c8d7..f739ea5dca641 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1.c @@ -206,6 +206,21 @@ void test_svstnt1_f64(svbool_t pg, float64_t *base, svfloat64_t data) MODE_ATTR return SVE_ACLE_FUNC(svstnt1,_f64,,)(pg, base, data); } +// CHECK-LABEL: @test_svstnt1_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv16i8( [[DATA:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z16test_svstnt1_mf8u10__SVBool_tPu6__mfp8u13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv16i8( [[DATA:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_mf8(svbool_t pg, mfloat8_t *base, svmfloat8_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svstnt1,_mf8,,)(pg, base, data); +} + // CHECK-LABEL: @test_svstnt1_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] @@ -410,3 +425,20 @@ void test_svstnt1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64 { return SVE_ACLE_FUNC(svstnt1_vnum,_f64,,)(pg, base, vnum, data); } + +// CHECK-LABEL: @test_svstnt1_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv16i8( [[DATA:%.*]], [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svstnt1_vnum_mf8u10__SVBool_tPu6__mfp8lu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv16i8( [[DATA:%.*]], [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_mf8(svbool_t pg, mfloat8_t *base, int64_t vnum, svmfloat8_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svstnt1_vnum,_mf8,,)(pg, base, vnum, data); +} diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c index 93cb653032df7..ee5c2c592c61d 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c @@ -309,6 +309,21 @@ svfloat64x2_t test_svld1_f64_x2(svcount_t pn, const float64_t *base) ATTR return SVE_ACLE_FUNC(svld1,_f64,_x2,)(pn, base); } +// CHECK-LABEL: @test_svld1_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_mf8_x2u11__SVCount_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svmfloat8x2_t test_svld1_mf8_x2(svcount_t pn, const mfloat8_t *base) ATTR +{ + return SVE_ACLE_FUNC(svld1,_mf8,_x2,)(pn, base); +} + // CHECK-LABEL: @test_svld1_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) @@ -354,6 +369,20 @@ svfloat64x4_t test_svld1_f64_x4(svcount_t pn, const float64_t *base) ATTR return SVE_ACLE_FUNC(svld1,_f64,_x4,)(pn, base); } +// CHECK-LABEL: @test_svld1_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_mf8_x4u11__SVCount_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svmfloat8x4_t test_svld1_mf8_x4(svcount_t pn, const mfloat8_t *base) ATTR +{ + return SVE_ACLE_FUNC(svld1,_mf8,_x4,)(pn, base); +} // == VNUM variants == @@ -795,6 +824,29 @@ svfloat64x2_t test_svld1_vnum_f64_x2(svcount_t pn, const float64_t *base, int64_ return SVE_ACLE_FUNC(svld1_vnum,_f64,_x2,)(pn, base, vnum); } +// CHECK-LABEL: @test_svld1_vnum_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_mf8_x2u11__SVCount_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , } [[TMP3]] +// +svmfloat8x2_t test_svld1_vnum_mf8_x2(svcount_t pn, const mfloat8_t *base, int64_t vnum) ATTR +{ + return SVE_ACLE_FUNC(svld1_vnum,_mf8,_x2,)(pn, base, vnum); +} + // CHECK-LABEL: @test_svld1_vnum_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -863,3 +915,26 @@ svfloat64x4_t test_svld1_vnum_f64_x4(svcount_t pn, const float64_t *base, int64_ { return SVE_ACLE_FUNC(svld1_vnum,_f64,_x4,)(pn, base, vnum); } + +// CHECK-LABEL: @test_svld1_vnum_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , , , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_mf8_x4u11__SVCount_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] +// +svmfloat8x4_t test_svld1_vnum_mf8_x4(svcount_t pn, const mfloat8_t *base, int64_t vnum) ATTR +{ + return SVE_ACLE_FUNC(svld1_vnum,_mf8,_x4,)(pn, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c index 8254c6aec5dc1..692af131e69de 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c @@ -307,6 +307,21 @@ svfloat64x2_t test_svldnt1_f64_x2(svcount_t pn, const float64_t *base) ATTR return SVE_ACLE_FUNC(svldnt1,_f64,_x2,)(pn, base); } +// CHECK-LABEL: @test_svldnt1_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_mf8_x2u11__SVCount_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svmfloat8x2_t test_svldnt1_mf8_x2(svcount_t pn, const mfloat8_t *base) ATTR +{ + return SVE_ACLE_FUNC(svldnt1,_mf8,_x2,)(pn, base); +} + // CHECK-LABEL: @test_svldnt1_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) @@ -352,6 +367,20 @@ svfloat64x4_t test_svldnt1_f64_x4(svcount_t pn, const float64_t *base) ATTR return SVE_ACLE_FUNC(svldnt1,_f64,_x4,)(pn, base); } +// CHECK-LABEL: @test_svldnt1_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_mf8_x4u11__SVCount_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svmfloat8x4_t test_svldnt1_mf8_x4(svcount_t pn, const mfloat8_t *base) ATTR +{ + return SVE_ACLE_FUNC(svldnt1,_mf8,_x4,)(pn, base); +} // == VNUM variants == @@ -793,6 +822,29 @@ svfloat64x2_t test_svldnt1_vnum_f64_x2(svcount_t pn, const float64_t *base, int6 return SVE_ACLE_FUNC(svldnt1_vnum,_f64,_x2,)(pn, base, vnum); } +// CHECK-LABEL: @test_svldnt1_vnum_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_mf8_x2u11__SVCount_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , } [[TMP3]] +// +svmfloat8x2_t test_svldnt1_vnum_mf8_x2(svcount_t pn, const mfloat8_t *base, int64_t vnum) ATTR +{ + return SVE_ACLE_FUNC(svldnt1_vnum,_mf8,_x2,)(pn, base, vnum); +} + // CHECK-LABEL: @test_svldnt1_vnum_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -861,3 +913,26 @@ svfloat64x4_t test_svldnt1_vnum_f64_x4(svcount_t pn, const float64_t *base, int6 { return SVE_ACLE_FUNC(svldnt1_vnum,_f64,_x4,)(pn, base, vnum); } + +// CHECK-LABEL: @test_svldnt1_vnum_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , , , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_mf8_x4u11__SVCount_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] +// +svmfloat8x4_t test_svldnt1_vnum_mf8_x4(svcount_t pn, const mfloat8_t *base, int64_t vnum) ATTR +{ + return SVE_ACLE_FUNC(svldnt1_vnum,_mf8,_x4,)(pn, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c index 233c9b29e707a..7adb3d4940e7f 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c @@ -214,6 +214,21 @@ svfloat64x2_t test_svld2q_f64(svbool_t pg, const float64_t *base) return SVE_ACLE_FUNC(svld2q,,_f64,)(pg, base); } +// CHECK-LABEL: @test_svld2q_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z15test_svld2q_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svmfloat8x2_t test_svld2q_mf8(svbool_t pg, const mfloat8_t *base) +{ + return SVE_ACLE_FUNC(svld2q,,_mf8,)(pg, base); +} + // CHECK-LABEL: @test_svld2q_vnum_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -509,6 +524,29 @@ svfloat64x2_t test_svld2q_vnum_f64(svbool_t pg, const float64_t *base, int64_t v return SVE_ACLE_FUNC(svld2q_vnum,,_f64,)(pg, base, vnum); } +// CHECK-LABEL: @test_svld2q_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , } [[TMP3]] +// +svmfloat8x2_t test_svld2q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld2q_vnum,,_mf8,)(pg, base, vnum); +} + // CHECK-LABEL: @test_svld3q_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) @@ -709,6 +747,21 @@ svfloat64x3_t test_svld3q_f64(svbool_t pg, const float64_t *base) return SVE_ACLE_FUNC(svld3q,,_f64,)(pg, base); } +// CHECK-LABEL: @test_svld3q_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z15test_svld3q_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , } [[TMP0]] +// +svmfloat8x3_t test_svld3q_mf8(svbool_t pg, const mfloat8_t *base) +{ + return SVE_ACLE_FUNC(svld3q,,_mf8,)(pg, base); +} + // CHECK-LABEL: @test_svld3q_vnum_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -1005,6 +1058,29 @@ svfloat64x3_t test_svld3q_vnum_f64(svbool_t pg, const float64_t *base, int64_t v return SVE_ACLE_FUNC(svld3q_vnum,,_f64,)(pg, base, vnum); } +// CHECK-LABEL: @test_svld3q_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] +// +svmfloat8x3_t test_svld3q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld3q_vnum,,_mf8,)(pg, base, vnum); +} + // CHECK-LABEL: @test_svld4q_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) @@ -1190,6 +1266,21 @@ svfloat64x4_t test_svld4q_f64(svbool_t pg, const float64_t *base) return SVE_ACLE_FUNC(svld4q,,_f64,)(pg, base); } +// CHECK-LABEL: @test_svld4q_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z15test_svld4q_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svmfloat8x4_t test_svld4q_mf8(svbool_t pg, const mfloat8_t *base) +{ + return SVE_ACLE_FUNC(svld4q,,_mf8,)(pg, base); +} + // CHECK-LABEL: @test_svld4q_vnum_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -1485,6 +1576,28 @@ svfloat64x4_t test_svld4q_vnum_f64(svbool_t pg, const float64_t *base, int64_t v return SVE_ACLE_FUNC(svld4q_vnum,,_f64,)(pg, base, vnum); } +// CHECK-LABEL: @test_svld4q_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , , , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] +// +svmfloat8x4_t test_svld4q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld4q_vnum,,_mf8,)(pg, base, vnum); +} // Gather for 128 bits // vector base + scalar offset @@ -1692,6 +1805,23 @@ svbfloat16_t test_svld1q_gather_u64base_offset_bf16(svbool_t pg, svuint64_t base return SVE_ACLE_FUNC(svld1q_gather,_u64base,_offset_bf16,)(pg, base, offset); } +// CHECK-LABEL: @test_svld1q_gather_u64base_offset_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z37test_svld1q_gather_u64base_offset_mf8u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svmfloat8_t test_svld1q_gather_u64base_offset_mf8(svbool_t pg, svuint64_t base, int64_t offset) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_offset_mf8,)(pg, base, offset); +} + // Vector base and no offset // CHECK-LABEL: @test_svld1q_gather_u64base_u64( // CHECK-NEXT: entry: @@ -1897,6 +2027,23 @@ svbfloat16_t test_svld1q_gather_u64base_bf16(svbool_t pg, svuint64_t base) return SVE_ACLE_FUNC(svld1q_gather,_u64base,_bf16,)(pg, base); } +// CHECK-LABEL: @test_svld1q_gather_u64base_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z30test_svld1q_gather_u64base_mf8u10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svmfloat8_t test_svld1q_gather_u64base_mf8(svbool_t pg, svuint64_t base) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_mf8,)(pg, base); +} + // CHECK-LABEL: @test_svld1q_gather_u64index_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) @@ -2428,3 +2575,19 @@ svfloat32_t test_svdl1q_gather_u64offset_f32(svbool_t pg, const float32_t *base, svfloat64_t test_svdl1q_gather_u64offset_f64(svbool_t pg, const float64_t *base, svuint64_t off) { return SVE_ACLE_FUNC(svld1q_gather_,u64,offset,_f64)(pg, base, off); } + +// CHECK-LABEL: @test_svld1q_gather_u64offset_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv16i8( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z32test_svld1q_gather_u64offset_mf8u10__SVBool_tPKu6__mfp8u12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv16i8( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svmfloat8_t test_svld1q_gather_u64offset_mf8(svbool_t pg, mfloat8_t const *base, svuint64_t off) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,offset,_mf8)(pg, base, off); +} diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c index 9db3e5e98975a..e71e68114a5af 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c @@ -306,6 +306,21 @@ void test_svst1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v) ATTR return SVE_ACLE_FUNC(svst1,_f64_x2,,)(pn, base, v); } +// CHECK-LABEL: @test_svst1_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_mf8_x2u11__SVCount_tPu6__mfp813svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_mf8_x2(svcount_t pn, mfloat8_t *base, svmfloat8x2_t v) ATTR +{ + return SVE_ACLE_FUNC(svst1,_mf8_x2,,)(pn, base, v); +} + // CHECK-LABEL: @test_svst1_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) @@ -351,6 +366,21 @@ void test_svst1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v) ATTR return SVE_ACLE_FUNC(svst1,_f64_x4,,)(pn, base, v); } +// CHECK-LABEL: @test_svst1_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_mf8_x4u11__SVCount_tPu6__mfp813svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_mf8_x4(svcount_t pn, mfloat8_t *base, svmfloat8x4_t v) ATTR +{ + return SVE_ACLE_FUNC(svst1,_mf8_x4,,)(pn, base, v); +} + // == VNUM variants == @@ -798,6 +828,29 @@ void test_svst1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svflo return SVE_ACLE_FUNC(svst1_vnum,_f64_x2,,)(pn, base, vnum, v); } +// CHECK-LABEL: @test_svst1_vnum_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_mf8_x2u11__SVCount_tPu6__mfp8l13svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_mf8_x2(svcount_t pn, mfloat8_t *base, int64_t vnum, svmfloat8x2_t v) ATTR +{ + return SVE_ACLE_FUNC(svst1_vnum,_mf8_x2,,)(pn, base, vnum, v); +} + // CHECK-LABEL: @test_svst1_vnum_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 @@ -872,3 +925,26 @@ void test_svst1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svflo { return SVE_ACLE_FUNC(svst1_vnum,_f64_x4,,)(pn, base, vnum, v); } + +// CHECK-LABEL: @test_svst1_vnum_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_mf8_x4u11__SVCount_tPu6__mfp8l13svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_mf8_x4(svcount_t pn, mfloat8_t *base, int64_t vnum, svmfloat8x4_t v) ATTR +{ + return SVE_ACLE_FUNC(svst1_vnum,_mf8_x4,,)(pn, base, vnum, v); +} diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c index ed1959327a611..1544260377a20 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c @@ -325,6 +325,21 @@ void test_svstnt1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v) ATTR return SVE_ACLE_FUNC(svstnt1,_f64_x2,,)(pn, base, v); } +// CHECK-LABEL: @test_svstnt1_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_mf8_x2u11__SVCount_tPu6__mfp813svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_mf8_x2(svcount_t pn, mfloat8_t *base, svmfloat8x2_t v) ATTR +{ + return SVE_ACLE_FUNC(svstnt1,_mf8_x2,,)(pn, base, v); +} + // CHECK-LABEL: @test_svstnt1_f16_x4( // CHECK-NEXT: entry: @@ -373,6 +388,21 @@ void test_svstnt1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v) ATTR return SVE_ACLE_FUNC(svstnt1,_f64_x4,,)(pn, base, v); } +// CHECK-LABEL: @test_svstnt1_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_mf8_x4u11__SVCount_tPu6__mfp813svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_mf8_x4(svcount_t pn, mfloat8_t *base, svmfloat8x4_t v) ATTR +{ + return SVE_ACLE_FUNC(svstnt1,_mf8_x4,,)(pn, base, v); +} + // == VNUM variants == @@ -837,6 +867,28 @@ void test_svstnt1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svf return SVE_ACLE_FUNC(svstnt1_vnum,_f64_x2,,)(pn, base, vnum, v); } +// CHECK-LABEL: @test_svstnt1_vnum_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_mf8_x2u11__SVCount_tPu6__mfp8l13svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_mf8_x2(svcount_t pn, mfloat8_t *base, int64_t vnum, svmfloat8x2_t v) ATTR +{ + return SVE_ACLE_FUNC(svstnt1_vnum,_mf8_x2,,)(pn, base, vnum, v); +} // CHECK-LABEL: @test_svstnt1_vnum_f16_x4( // CHECK-NEXT: entry: @@ -914,3 +966,26 @@ void test_svstnt1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svf { return SVE_ACLE_FUNC(svstnt1_vnum,_f64_x4,,)(pn, base, vnum, v); } + +// CHECK-LABEL: @test_svstnt1_vnum_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_mf8_x4u11__SVCount_tPu6__mfp8l13svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_mf8_x4(svcount_t pn, mfloat8_t *base, int64_t vnum, svmfloat8x4_t v) ATTR +{ + return SVE_ACLE_FUNC(svstnt1_vnum,_mf8_x4,,)(pn, base, vnum, v); +} diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c index b91780304dacb..517d5f244a46f 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c @@ -213,6 +213,21 @@ void test_svst2q_f64(svbool_t pg, const float64_t *base, svfloat64x2_t zt) SVE_ACLE_FUNC(svst2q,,_f64,)(pg, base, zt); } +// CHECK-LABEL: @test_svst2q_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst2q_mf8u10__SVBool_tPKu6__mfp813svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_mf8(svbool_t pg, const mfloat8_t *base, svmfloat8x2_t zt) +{ + SVE_ACLE_FUNC(svst2q,,_mf8,)(pg, base, zt); +} + // CHECK-LABEL: @test_svst2q_vnum_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -509,6 +524,29 @@ void test_svst2q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfl SVE_ACLE_FUNC(svst2q_vnum,,_f64,)(pg, base, vnum, zt); } +// CHECK-LABEL: @test_svst2q_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_mf8u10__SVBool_tPKu6__mfp8l13svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum, svmfloat8x2_t zt) +{ + SVE_ACLE_FUNC(svst2q_vnum,,_mf8,)(pg, base, vnum, zt); +} + // // ST3Q // CHECK-LABEL: @test_svst3q_u8( @@ -710,6 +748,21 @@ void test_svst3q_f64(svbool_t pg, const float64_t *base, svfloat64x3_t zt) SVE_ACLE_FUNC(svst3q,,_f64,)(pg, base, zt); } +// CHECK-LABEL: @test_svst3q_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst3q_mf8u10__SVBool_tPKu6__mfp813svmfloat8x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_mf8(svbool_t pg, const mfloat8_t *base, svmfloat8x3_t zt) +{ + SVE_ACLE_FUNC(svst3q,,_mf8,)(pg, base, zt); +} + // CHECK-LABEL: @test_svst3q_vnum_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -1006,6 +1059,29 @@ void test_svst3q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfl SVE_ACLE_FUNC(svst3q_vnum,,_f64,)(pg, base, vnum, zt); } +// CHECK-LABEL: @test_svst3q_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_mf8u10__SVBool_tPKu6__mfp8l13svmfloat8x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum, svmfloat8x3_t zt) +{ + SVE_ACLE_FUNC(svst3q_vnum,,_mf8,)(pg, base, vnum, zt); +} + // // ST4Q // CHECK-LABEL: @test_svst4q_u8( @@ -1207,6 +1283,21 @@ void test_svst4q_f64(svbool_t pg, const float64_t *base, svfloat64x4_t zt) SVE_ACLE_FUNC(svst4q,,_f64,)(pg, base, zt); } +// CHECK-LABEL: @test_svst4q_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst4q_mf8u10__SVBool_tPKu6__mfp813svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_mf8(svbool_t pg, const mfloat8_t *base, svmfloat8x4_t zt) +{ + SVE_ACLE_FUNC(svst4q,,_mf8,)(pg, base, zt); +} + // CHECK-LABEL: @test_svst4q_vnum_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -1503,6 +1594,29 @@ void test_svst4q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfl SVE_ACLE_FUNC(svst4q_vnum,,_f64,)(pg, base, vnum, zt); } +// CHECK-LABEL: @test_svst4q_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_mf8u10__SVBool_tPKu6__mfp8l13svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum, svmfloat8x4_t zt) +{ + SVE_ACLE_FUNC(svst4q_vnum,,_mf8,)(pg, base, vnum, zt); +} + // Scatter for 128 bits // vector base + scalar offset // CHECK-LABEL: @test_svst1q_scatter_u64base_offset_u64( @@ -1710,6 +1824,23 @@ void test_svst1q_scatter_u64base_offset_bf16(svbool_t pg, svuint64_t base, int64 SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _bf16)(pg, base, offset, data); } +// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_mf8u10__SVBool_tu12__SVUint64_tlu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_offset_mf8(svbool_t pg, svuint64_t base, int64_t offset, svmfloat8_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _mf8)(pg, base, offset, data); +} + // Vector Base and no Offset // CHECK-LABEL: @test_svst1q_scatter_u64base_u64( // CHECK-NEXT: entry: @@ -1915,6 +2046,23 @@ void test_svst1q_scatter_u64base_bf16(svbool_t pg, svuint64_t base, svbfloat16_t SVE_ACLE_FUNC(svst1q_scatter, _u64base,,_bf16)(pg, base, data); } +// CHECK-LABEL: @test_svst1q_scatter_u64base_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_mf8u10__SVBool_tu12__SVUint64_tu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_mf8(svbool_t pg, svuint64_t base, svmfloat8_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base,,_mf8)(pg, base, data); +} + // CHECK-LABEL: @test_svst1q_scatter_u64index_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) @@ -2798,3 +2946,35 @@ void test_svst1q_scatter_u64offset_f64(svbool_t pg, float64_t *base, svuint64_t void test_svst1q_scatter_s64offset_f64(svbool_t pg, float64_t *base, svint64_t off, svfloat64_t data) { SVE_ACLE_FUNC(svst1q_scatter_,s64,offset,_f64)(pg, base, off, data); } + +// CHECK-LABEL: @test_svst1q_scatter_u64offset_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_mf8u10__SVBool_tPu6__mfp8u12__SVUint64_tu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64offset_mf8(svbool_t pg, mfloat8_t *base, svuint64_t off, svmfloat8_t data) { + SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_mf8)(pg, base, off, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_s64offset_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_s64offset_mf8u10__SVBool_tPu6__mfp8u11__SVInt64_tu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_s64offset_mf8(svbool_t pg, mfloat8_t *base, svint64_t off, svmfloat8_t data) { + SVE_ACLE_FUNC(svst1q_scatter_,s64,offset,_mf8)(pg, base, off, data); +} diff --git a/clang/test/CodeGen/arm-mfp8.c b/clang/test/CodeGen/arm-mfp8.c index 9385b537f18b3..d9e7b5d4707d8 100644 --- a/clang/test/CodeGen/arm-mfp8.c +++ b/clang/test/CodeGen/arm-mfp8.c @@ -38,22 +38,34 @@ mfloat8x8_t test_ret_mfloat8x8_t(mfloat8x8_t v) { // CHECK-C-LABEL: define dso_local <1 x i8> @func1n( // CHECK-C-SAME: <1 x i8> [[MFP8:%.*]]) #[[ATTR0]] { // CHECK-C-NEXT: [[ENTRY:.*:]] -// CHECK-C-NEXT: [[F1N:%.*]] = alloca [10 x <1 x i8>], align 1 -// CHECK-C-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 -// CHECK-C-NEXT: store <1 x i8> [[MFP8]], ptr [[ARRAYIDX]], align 1 -// CHECK-C-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 -// CHECK-C-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[ARRAYIDX1]], align 1 -// CHECK-C-NEXT: ret <1 x i8> [[TMP0]] +// CHECK-C-NEXT: [[RETVAL:%.*]] = alloca <1 x i8>, align 1 +// CHECK-C-NEXT: [[MFP8_ADDR:%.*]] = alloca i8, align 1 +// CHECK-C-NEXT: [[F1N:%.*]] = alloca [10 x i8], align 1 +// CHECK-C-NEXT: store <1 x i8> [[MFP8]], ptr [[MFP8_ADDR]], align 1 +// CHECK-C-NEXT: [[TMP0:%.*]] = load i8, ptr [[MFP8_ADDR]], align 1 +// CHECK-C-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i8], ptr [[F1N]], i64 0, i64 2 +// CHECK-C-NEXT: store i8 [[TMP0]], ptr [[ARRAYIDX]], align 1 +// CHECK-C-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x i8], ptr [[F1N]], i64 0, i64 2 +// CHECK-C-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +// CHECK-C-NEXT: store i8 [[TMP1]], ptr [[RETVAL]], align 1 +// CHECK-C-NEXT: [[TMP2:%.*]] = load <1 x i8>, ptr [[RETVAL]], align 1 +// CHECK-C-NEXT: ret <1 x i8> [[TMP2]] // // CHECK-CXX-LABEL: define dso_local <1 x i8> @_Z6func1nu6__mfp8( // CHECK-CXX-SAME: <1 x i8> [[MFP8:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: [[F1N:%.*]] = alloca [10 x <1 x i8>], align 1 -// CHECK-CXX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 -// CHECK-CXX-NEXT: store <1 x i8> [[MFP8]], ptr [[ARRAYIDX]], align 1 -// CHECK-CXX-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 -// CHECK-CXX-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[ARRAYIDX1]], align 1 -// CHECK-CXX-NEXT: ret <1 x i8> [[TMP0]] +// CHECK-CXX-NEXT: [[RETVAL:%.*]] = alloca <1 x i8>, align 1 +// CHECK-CXX-NEXT: [[MFP8_ADDR:%.*]] = alloca i8, align 1 +// CHECK-CXX-NEXT: [[F1N:%.*]] = alloca [10 x i8], align 1 +// CHECK-CXX-NEXT: store <1 x i8> [[MFP8]], ptr [[MFP8_ADDR]], align 1 +// CHECK-CXX-NEXT: [[TMP0:%.*]] = load i8, ptr [[MFP8_ADDR]], align 1 +// CHECK-CXX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i8], ptr [[F1N]], i64 0, i64 2 +// CHECK-CXX-NEXT: store i8 [[TMP0]], ptr [[ARRAYIDX]], align 1 +// CHECK-CXX-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x i8], ptr [[F1N]], i64 0, i64 2 +// CHECK-CXX-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +// CHECK-CXX-NEXT: store i8 [[TMP1]], ptr [[RETVAL]], align 1 +// CHECK-CXX-NEXT: [[TMP2:%.*]] = load <1 x i8>, ptr [[RETVAL]], align 1 +// CHECK-CXX-NEXT: ret <1 x i8> [[TMP2]] // __mfp8 func1n(__mfp8 mfp8) { __mfp8 f1n[10]; @@ -86,14 +98,18 @@ mfloat8_t test_extract_element(mfloat8x16_t x, int i) { // CHECK-C-LABEL: define dso_local <16 x i8> @test_insert_element( // CHECK-C-SAME: <16 x i8> [[X:%.*]], i32 noundef [[I:%.*]], <1 x i8> [[V:%.*]]) #[[ATTR0]] { // CHECK-C-NEXT: [[ENTRY:.*:]] -// CHECK-C-NEXT: [[TMP0:%.*]] = bitcast <1 x i8> [[V]] to i8 +// CHECK-C-NEXT: [[V_ADDR:%.*]] = alloca i8, align 1 +// CHECK-C-NEXT: store <1 x i8> [[V]], ptr [[V_ADDR]], align 1 +// CHECK-C-NEXT: [[TMP0:%.*]] = load i8, ptr [[V_ADDR]], align 1 // CHECK-C-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[X]], i8 [[TMP0]], i32 [[I]] // CHECK-C-NEXT: ret <16 x i8> [[VECINS]] // // CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z19test_insert_element14__Mfloat8x16_tiu6__mfp8( // CHECK-CXX-SAME: <16 x i8> [[X:%.*]], i32 noundef [[I:%.*]], <1 x i8> [[V:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <1 x i8> [[V]] to i8 +// CHECK-CXX-NEXT: [[V_ADDR:%.*]] = alloca i8, align 1 +// CHECK-CXX-NEXT: store <1 x i8> [[V]], ptr [[V_ADDR]], align 1 +// CHECK-CXX-NEXT: [[TMP0:%.*]] = load i8, ptr [[V_ADDR]], align 1 // CHECK-CXX-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[X]], i8 [[TMP0]], i32 [[I]] // CHECK-CXX-NEXT: ret <16 x i8> [[VECINS]] // diff --git a/clang/test/CodeGenCXX/wasm-eh.cpp b/clang/test/CodeGenCXX/wasm-eh.cpp index 9dc15633bfed9..e8797794e7c1e 100644 --- a/clang/test/CodeGenCXX/wasm-eh.cpp +++ b/clang/test/CodeGenCXX/wasm-eh.cpp @@ -6,6 +6,9 @@ // RUN: %clang_cc1 %s -triple wasm32-unknown-unknown -fms-extensions -fexceptions -fcxx-exceptions -mllvm -wasm-enable-eh -exception-model=wasm -target-feature +exception-handling -emit-llvm -o - -std=c++11 | FileCheck %s // RUN: %clang_cc1 %s -triple wasm64-unknown-unknown -fms-extensions -fexceptions -fcxx-exceptions -mllvm -wasm-enable-eh -exception-model=wasm -target-feature +exception-handling -emit-llvm -o - -std=c++11 | FileCheck %s +// Test code generation for Wasm EH using WebAssembly EH proposal. +// (https://github.com/WebAssembly/exception-handling/blob/main/proposals/exception-handling/Exceptions.md) + void may_throw(); void dont_throw() noexcept; @@ -381,6 +384,15 @@ void test8() { // CHECK: unreachable +void noexcept_throw() noexcept { + throw 3; +} + +// CATCH-LABEL: define void @_Z14noexcept_throwv() +// CHECK: %{{.*}} = cleanuppad within none [] +// CHECK-NEXT: call void @_ZSt9terminatev() + + // RUN: %clang_cc1 %s -triple wasm32-unknown-unknown -fms-extensions -fexceptions -fcxx-exceptions -exception-model=wasm -target-feature +exception-handling -emit-llvm -o - -std=c++11 2>&1 | FileCheck %s --check-prefix=WARNING-DEFAULT // RUN: %clang_cc1 %s -triple wasm32-unknown-unknown -fms-extensions -fexceptions -fcxx-exceptions -exception-model=wasm -target-feature +exception-handling -Wwasm-exception-spec -emit-llvm -o - -std=c++11 2>&1 | FileCheck %s --check-prefix=WARNING-ON // RUN: %clang_cc1 %s -triple wasm32-unknown-unknown -fms-extensions -fexceptions -fcxx-exceptions -exception-model=wasm -target-feature +exception-handling -Wno-wasm-exception-spec -emit-llvm -o - -std=c++11 2>&1 | FileCheck %s --check-prefix=WARNING-OFF diff --git a/clang/test/CodeGenCXX/wasm-em-eh.cpp b/clang/test/CodeGenCXX/wasm-em-eh.cpp new file mode 100644 index 0000000000000..fc96fa96b5140 --- /dev/null +++ b/clang/test/CodeGenCXX/wasm-em-eh.cpp @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 %s -triple wasm32-unknown-emscripten -fexceptions -fcxx-exceptions -emit-llvm -o - -std=c++11 2>&1 | FileCheck %s + +// Test code generation for Wasm's Emscripten (JavaScript-style) EH. + +void noexcept_throw() noexcept { + throw 3; +} + +// CATCH-LABEL: define void @_Z14noexcept_throwv() +// CHECK: %[[LPAD:.*]] = landingpad { ptr, i32 } +// CHECK-NEXT: catch ptr null +// CHECK-NEXT: %[[EXN:.*]] = extractvalue { ptr, i32 } %[[LPAD]], 0 +// CHECK-NEXT: call void @__clang_call_terminate(ptr %[[EXN]]) diff --git a/clang/test/CodeGenCoroutines/coro-params.cpp b/clang/test/CodeGenCoroutines/coro-params.cpp index b318f2f52ac09..719726cca29c5 100644 --- a/clang/test/CodeGenCoroutines/coro-params.cpp +++ b/clang/test/CodeGenCoroutines/coro-params.cpp @@ -3,6 +3,7 @@ // Vefifies that parameter copies are used in the body of the coroutine // Verifies that parameter copies are used to construct the promise type, if that type has a matching constructor // RUN: %clang_cc1 -std=c++20 -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -disable-llvm-passes -fexceptions | FileCheck %s +// RUN: %clang_cc1 -std=c++20 -triple=x86_64-pc-win32 -emit-llvm -o - %s -disable-llvm-passes -fexceptions | FileCheck %s --check-prefix=MSABI namespace std { template struct coroutine_traits; @@ -59,13 +60,22 @@ struct MoveAndCopy { ~MoveAndCopy(); }; -void consume(int,int,int) noexcept; +struct [[clang::trivial_abi]] TrivialABI { + int val; + TrivialABI(TrivialABI&&) noexcept; + ~TrivialABI(); +}; + +void consume(int,int,int,int) noexcept; // TODO: Add support for CopyOnly params -// CHECK: define{{.*}} void @_Z1fi8MoveOnly11MoveAndCopy(i32 noundef %val, ptr noundef %[[MoParam:.+]], ptr noundef %[[McParam:.+]]) #0 personality ptr @__gxx_personality_v0 -void f(int val, MoveOnly moParam, MoveAndCopy mcParam) { +// CHECK: define{{.*}} void @_Z1fi8MoveOnly11MoveAndCopy10TrivialABI(i32 noundef %val, ptr noundef %[[MoParam:.+]], ptr noundef %[[McParam:.+]], i32 %[[TrivialParam:.+]]) #0 personality ptr @__gxx_personality_v0 +void f(int val, MoveOnly moParam, MoveAndCopy mcParam, TrivialABI trivialParam) { + // CHECK: %[[TrivialAlloca:.+]] = alloca %struct.TrivialABI, + // CHECK-SAME: !coro.outside.frame // CHECK: %[[MoCopy:.+]] = alloca %struct.MoveOnly, // CHECK: %[[McCopy:.+]] = alloca %struct.MoveAndCopy, + // CHECK: %[[TrivialCopy:.+]] = alloca %struct.TrivialABI, // CHECK: store i32 %val, ptr %[[ValAddr:.+]] // CHECK: call ptr @llvm.coro.begin( @@ -73,25 +83,31 @@ void f(int val, MoveOnly moParam, MoveAndCopy mcParam) { // CHECK-NEXT: call void @llvm.lifetime.start.p0( // CHECK-NEXT: call void @_ZN11MoveAndCopyC1EOS_(ptr {{[^,]*}} %[[McCopy]], ptr noundef nonnull align 4 dereferenceable(4) %[[McParam]]) # // CHECK-NEXT: call void @llvm.lifetime.start.p0( - // CHECK-NEXT: invoke void @_ZNSt16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_typeC1Ev( + // CHECK-NEXT: call void @_ZN10TrivialABIC1EOS_(ptr {{[^,]*}} %[[TrivialCopy]], ptr {{[^,]*}} %[[TrivialAlloca]]) + // CHECK-NEXT: call void @llvm.lifetime.start.p0( + // CHECK-NEXT: invoke void @_ZNSt16coroutine_traitsIJvi8MoveOnly11MoveAndCopy10TrivialABIEE12promise_typeC1Ev( // CHECK: call void @_ZN14suspend_always12await_resumeEv( // CHECK: %[[IntParam:.+]] = load i32, ptr %{{.*}} // CHECK: %[[MoGep:.+]] = getelementptr inbounds nuw %struct.MoveOnly, ptr %[[MoCopy]], i32 0, i32 0 // CHECK: %[[MoVal:.+]] = load i32, ptr %[[MoGep]] - // CHECK: %[[McGep:.+]] = getelementptr inbounds nuw %struct.MoveAndCopy, ptr %[[McCopy]], i32 0, i32 0 + // CHECK: %[[McGep:.+]] = getelementptr inbounds nuw %struct.MoveAndCopy, ptr %[[McCopy]], i32 0, i32 0 // CHECK: %[[McVal:.+]] = load i32, ptr %[[McGep]] - // CHECK: call void @_Z7consumeiii(i32 noundef %[[IntParam]], i32 noundef %[[MoVal]], i32 noundef %[[McVal]]) + // CHECK: %[[TrivialGep:.+]] = getelementptr inbounds nuw %struct.TrivialABI, ptr %[[TrivialCopy]], i32 0, i32 0 + // CHECK: %[[TrivialVal:.+]] = load i32, ptr %[[TrivialGep]] + // CHECK: call void @_Z7consumeiiii(i32 noundef %[[IntParam]], i32 noundef %[[MoVal]], i32 noundef %[[McVal]], i32 noundef %[[TrivialVal]]) - consume(val, moParam.val, mcParam.val); + consume(val, moParam.val, mcParam.val, trivialParam.val); co_return; // Skip to final suspend: - // CHECK: call void @_ZNSt16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_type13final_suspendEv( + // CHECK: call void @_ZNSt16coroutine_traitsIJvi8MoveOnly11MoveAndCopy10TrivialABIEE12promise_type13final_suspendEv( // CHECK: call void @_ZN14suspend_always12await_resumeEv( // Destroy promise, then parameter copies: - // CHECK: call void @_ZNSt16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_typeD1Ev(ptr {{[^,]*}} %__promise) + // CHECK: call void @_ZNSt16coroutine_traitsIJvi8MoveOnly11MoveAndCopy10TrivialABIEE12promise_typeD1Ev(ptr {{[^,]*}} %__promise) + // CHECK-NEXT: call void @llvm.lifetime.end.p0( + // CHECK-NEXT: call void @_ZN10TrivialABID1Ev(ptr {{[^,]*}} %[[TrivialCopy]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0( // CHECK-NEXT: call void @_ZN11MoveAndCopyD1Ev(ptr {{[^,]*}} %[[McCopy]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0( @@ -99,6 +115,10 @@ void f(int val, MoveOnly moParam, MoveAndCopy mcParam) { // CHECK-NEXT: call void @llvm.lifetime.end.p0( // CHECK-NEXT: call void @llvm.lifetime.end.p0( // CHECK-NEXT: call ptr @llvm.coro.free( + + // The original trivial_abi parameter is destroyed when returning from the ramp. + // CHECK: call i1 @llvm.coro.end + // CHECK: call void @_ZN10TrivialABID1Ev(ptr {{[^,]*}} %[[TrivialAlloca]]) } // CHECK-LABEL: void @_Z16dependent_paramsI1A1BEvT_T0_S3_(ptr noundef %x, ptr noundef %0, ptr noundef %y) @@ -190,3 +210,38 @@ method some_class::good_coroutine_calls_custom_constructor(float) { // CHECK: invoke void @_ZNSt16coroutine_traitsIJ6methodR10some_classfEE12promise_typeC1ES2_f(ptr {{[^,]*}} %__promise, ptr noundef nonnull align 1 dereferenceable(1) %{{.+}}, float co_return; } + + +struct MSParm { + int val; + ~MSParm(); +}; + +void consume(int) noexcept; + +// Similarly to the [[clang::trivial_abi]] parameters, with the MSVC ABI +// parameters are also destroyed by the callee, and on x86-64 such parameters +// may get passed in registers. In that case it's again important that the +// parameter's local alloca does not become part of the coro frame since that +// may be destroyed before the destructor call. +void msabi(MSParm p) { + // MSABI: define{{.*}} void @"?msabi@@YAXUMSParm@@@Z"(i32 %[[Param:.+]]) + + // The parameter's local alloca is marked not part of the frame. + // MSABI: %[[ParamAlloca:.+]] = alloca %struct.MSParm + // MSABI-SAME: !coro.outside.frame + + // MSABI: %[[ParamCopy:.+]] = alloca %struct.MSParm + + consume(p.val); + // The parameter's copy is used by the coroutine. + // MSABI: %[[ValPtr:.+]] = getelementptr inbounds nuw %struct.MSParm, ptr %[[ParamCopy]], i32 0, i32 0 + // MSABI: %[[Val:.+]] = load i32, ptr %[[ValPtr]] + // MSABI: call void @"?consume@@YAXH@Z"(i32{{.*}} %[[Val]]) + + co_return; + + // The local alloca is used for the destructor call at the end of the ramp. + // MSABI: call i1 @llvm.coro.end + // MSABI: call void @"??1MSParm@@QEAA@XZ"(ptr{{.*}} %[[ParamAlloca]]) +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl index 56495c85bf1fd..9927bb334c486 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl @@ -15,121 +15,121 @@ typedef short v16s __attribute__((ext_vector_type(16))); // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_f16_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.i16(<8 x half> [[A:%.*]], <16 x half> [[B:%.*]], <8 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.i32(<8 x half> [[A:%.*]], <16 x half> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4:![0-9]+]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_f16_w32(global v8f* out, v8h a, v16h b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_f16_w32(global v8f* out, v8h a, v16h b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_f16_w32(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf16_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.i16(<8 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.i32(<8 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_bf16_w32(global v8f* out, v8s a, v16s b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_bf16_w32(global v8f* out, v8s a, v16s b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f16_16x16x32_f16_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.i16(<8 x half> [[A:%.*]], <16 x half> [[B:%.*]], <8 x half> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.i32(<8 x half> [[A:%.*]], <16 x half> [[B:%.*]], <8 x half> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f16_16x16x32_f16_w32(global v8h* out, v8h a, v16h b, v8h c, short index) +void test_amdgcn_swmmac_f16_16x16x32_f16_w32(global v8h* out, v8h a, v16h b, v8h c, int index) { *out = __builtin_amdgcn_swmmac_f16_16x16x32_f16_w32(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_bf16_16x16x32_bf16_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.i16(<8 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.i32(<8 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_bf16_16x16x32_bf16_w32(global v8s* out, v8s a, v16s b, v8s c, short index) +void test_amdgcn_swmmac_bf16_16x16x32_bf16_w32(global v8s* out, v8s a, v16s b, v8s c, int index) { *out = __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x32_iu8_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i16(i1 true, <2 x i32> [[A:%.*]], i1 true, <4 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i16 [[INDEX:%.*]], i1 true) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <4 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true) // CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_i32_16x16x32_iu8_w32(global v8i* out, v2i a, v4i b, v8i c, short index) +void test_amdgcn_swmmac_i32_16x16x32_iu8_w32(global v8i* out, v2i a, v4i b, v8i c, int index) { *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32(true, a, true, b, c, index, true); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x32_iu4_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i16(i1 true, i32 [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i16 [[INDEX:%.*]], i1 true) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i32(i1 true, i32 [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true) // CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_i32_16x16x32_iu4_w32(global v8i* out, int a, v2i b, v8i c, short index) +void test_amdgcn_swmmac_i32_16x16x32_iu4_w32(global v8i* out, int a, v2i b, v8i c, int index) { *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32(true, a, true, b, c, index, true); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x64_iu4_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.i16(i1 true, <2 x i32> [[A:%.*]], i1 true, <4 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i16 [[INDEX:%.*]], i1 true) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <4 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true) // CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_i32_16x16x64_iu4_w32(global v8i* out, v2i a, v4i b, v8i c, short index) +void test_amdgcn_swmmac_i32_16x16x64_iu4_w32(global v8i* out, v2i a, v4i b, v8i c, int index) { *out = __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32(true, a, true, b, c, index, true); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.i16(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.i16(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.i16(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.i16(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(a, b, c, index); } diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl index 89b26edb2f02b..eaa6b14d2a792 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl @@ -14,121 +14,121 @@ typedef short v8s __attribute__((ext_vector_type(8))); // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_f16_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.i16(<4 x half> [[A:%.*]], <8 x half> [[B:%.*]], <4 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.i32(<4 x half> [[A:%.*]], <8 x half> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4:![0-9]+]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_f16_w64(global v4f* out, v4h a, v8h b, v4f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_f16_w64(global v4f* out, v4h a, v8h b, v4f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_f16_w64(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf16_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v4f32.v4i16.v8i16.i16(<4 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v4f32.v4i16.v8i16.i32(<4 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_bf16_w64(global v4f* out, v4s a, v8s b, v4f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_bf16_w64(global v4f* out, v4s a, v8s b, v4f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f16_16x16x32_f16_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.i16(<4 x half> [[A:%.*]], <8 x half> [[B:%.*]], <4 x half> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.i32(<4 x half> [[A:%.*]], <8 x half> [[B:%.*]], <4 x half> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <4 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 8, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f16_16x16x32_f16_w64(global v4h* out, v4h a, v8h b, v4h c, short index) +void test_amdgcn_swmmac_f16_16x16x32_f16_w64(global v4h* out, v4h a, v8h b, v4h c, int index) { *out = __builtin_amdgcn_swmmac_f16_16x16x32_f16_w64(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_bf16_16x16x32_bf16_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v4i16.v4i16.v8i16.i16(<4 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x i16> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v4i16.v4i16.v8i16.i32(<4 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x i16> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 8, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_bf16_16x16x32_bf16_w64(global v4s* out, v4s a, v8s b, v4s c, short index) +void test_amdgcn_swmmac_bf16_16x16x32_bf16_w64(global v4s* out, v4s a, v8s b, v4s c, int index) { *out = __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x32_iu8_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.i16(i1 true, i32 [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], i16 [[INDEX:%.*]], i1 true) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.i32(i1 true, i32 [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true) // CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_i32_16x16x32_iu8_w64(global v4i* out, int a, v2i b, v4i c, short index) +void test_amdgcn_swmmac_i32_16x16x32_iu8_w64(global v4i* out, int a, v2i b, v4i c, int index) { *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64(true, a, true, b, c, index, true); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x32_iu4_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.i16(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i16 [[INDEX:%.*]], i1 true) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true) // CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_i32_16x16x32_iu4_w64(global v4i* out, int a, int b, v4i c, short index) +void test_amdgcn_swmmac_i32_16x16x32_iu4_w64(global v4i* out, int a, int b, v4i c, int index) { *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64(true, a, true, b, c, index, true); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x64_iu4_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.i16(i1 true, i32 [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], i16 [[INDEX:%.*]], i1 true) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.i32(i1 true, i32 [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true) // CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_i32_16x16x64_iu4_w64(global v4i* out, int a, v2i b, v4i c, short index) +void test_amdgcn_swmmac_i32_16x16x64_iu4_w64(global v4i* out, int a, v2i b, v4i c, int index) { *out = __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64(true, a, true, b, c, index, true); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.i16(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.i32(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64(global v4f* out, int a, v2i b, v4f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64(global v4f* out, int a, v2i b, v4f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.i16(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.i32(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64(global v4f* out, int a, v2i b, v4f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64(global v4f* out, int a, v2i b, v4f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.i16(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.i32(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64(global v4f* out, int a, v2i b, v4f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64(global v4f* out, int a, v2i b, v4f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.i16(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.i32(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64(global v4f* out, int a, v2i b, v4f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64(global v4f* out, int a, v2i b, v4f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64(a, b, c, index); } diff --git a/clang/test/Driver/module-fgen-reduced-bmi.cppm b/clang/test/Driver/module-fgen-reduced-bmi.cppm index 7329c12941d73..9bdd4c9f6682f 100644 --- a/clang/test/Driver/module-fgen-reduced-bmi.cppm +++ b/clang/test/Driver/module-fgen-reduced-bmi.cppm @@ -48,6 +48,14 @@ // RUN: %clang -std=c++20 Hello.cppm --precompile -fmodules-reduced-bmi \ // RUN: -o Hello.full.pcm -### 2>&1 | FileCheck Hello.cppm \ // RUN: --check-prefix=CHECK-EMIT-MODULE-INTERFACE + +// RUN: %clang -std=c++20 Hello.cppm --precompile -fmodules-reduced-bmi \ +// RUN: -### 2>&1 | FileCheck Hello.cppm \ +// RUN: --check-prefix=CHECK-OVERRIDE-WARN + +// RUN: %clang -std=c++20 Hello.cppm --precompile -fmodules-reduced-bmi \ +// RUN: -o Hello.pcm -### 2>&1 | FileCheck Hello.cppm \ +// RUN: --check-prefix=CHECK-OVERRIDE-WARN // // RUN: %clang -std=c++20 Hello.cc -fmodules-reduced-bmi -Wall -Werror \ // RUN: -c -o Hello.o -### 2>&1 | FileCheck Hello.cc @@ -74,6 +82,8 @@ export module Hello; // flag. // CHECK-EMIT-MODULE-INTERFACE: -emit-module-interface +// CHECK-OVERRIDE-WARN: warning: the implicit output of reduced BMI may be overrided by the output file specified by '--precompile'. {{.*}}-Wreduced-bmi-output-overrided + // NO_WARN-NOT: warning //--- Hello.cc diff --git a/clang/test/Modules/pr28744.cpp b/clang/test/Modules/pr28744.cpp new file mode 100644 index 0000000000000..2089872a2a75a --- /dev/null +++ b/clang/test/Modules/pr28744.cpp @@ -0,0 +1,17 @@ +// RUN: rm -rf %t +// RUN: %clang_cc1 -std=c++11 -I%S/Inputs/PR28794 -verify %s +// RUN: %clang_cc1 -std=c++11 -fmodules -fmodule-map-file=%S/Inputs/PR28794/module.modulemap -fmodules-cache-path=%t -I%S/Inputs/PR28794/ -verify %s + +#include "Subdir/Empty.h" +#include "LibAHeader.h" + +BumpPtrAllocatorImpl<> &getPreprocessorAllocator(); +class B { + struct ModuleMacroInfo { + ModuleMacroInfo *getModuleInfo() { + return new (getPreprocessorAllocator()) ModuleMacroInfo(); + } + }; +}; + +// expected-no-diagnostics diff --git a/clang/test/OpenMP/metadirective_ast_print.c b/clang/test/OpenMP/metadirective_ast_print.c index d9ff7e7645216..851f08ce37ee7 100644 --- a/clang/test/OpenMP/metadirective_ast_print.c +++ b/clang/test/OpenMP/metadirective_ast_print.c @@ -77,6 +77,34 @@ void foo(void) { : parallel) default(nothing) for (int i = 0; i < 16; i++) ; + +#pragma omp metadirective when(user = {condition(0)} \ + : parallel for) otherwise() + for (int i=0; i<10; i++) + ; +#pragma omp metadirective when(user = {condition(0)} \ + : parallel for) + for (int i=0; i<10; i++) + ; +#pragma omp metadirective when(user = {condition(0)} \ + : parallel for) when(implementation = {extension(match_none)} \ + : parallel) default(parallel for) + for (int i=0; i<10; i++) + ; + +#pragma omp metadirective when(user = {condition(1)} \ + : parallel for) otherwise() + for (int i=0; i<10; i++) + ; +#pragma omp metadirective when(user = {condition(1)} \ + : parallel for) + for (int i=0; i<10; i++) + ; +#pragma omp metadirective when(user = {condition(1)} \ + : parallel for) when(implementation = {extension(match_none)} \ + : parallel) default(parallel for) + for (int i=0; i<10; i++) + ; } // CHECK: void bar(void); diff --git a/clang/test/OpenMP/metadirective_otherwise.cpp b/clang/test/OpenMP/metadirective_otherwise.cpp new file mode 100644 index 0000000000000..0533350c84eed --- /dev/null +++ b/clang/test/OpenMP/metadirective_otherwise.cpp @@ -0,0 +1,125 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +void func1() { +#pragma omp metadirective when(user = {condition(0)} \ + : parallel for) otherwise() + for (int i = 0; i < 100; i++) + ; + +#pragma omp metadirective when(user = {condition(0)} \ + : parallel for) + for (int i = 0; i < 100; i++) + ; + +#pragma omp metadirective when(user = {condition(0)} \ + : parallel for) \ + when(implementation = {extension(match_none)} \ + : parallel) default(parallel for) + + for (int i = 0; i < 100; i++) + ; + + +} + +// CHECK-LABEL: define dso_local void @_Z5func1v() +// CHECK: entry +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I1:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-NEXT: br label %[[FOR_COND:.*]] +// CHECK: [[FOR_COND]]: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 100 +// CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK: [[FOR_BODY]]: +// CHECK-NEXT: br label %[[FOR_INC:.*]] +// CHECK: [[FOR_INC]]: +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: [[FOR_END]]: +// CHECK-NEXT: store i32 0, ptr [[I1]], align 4 +// CHECK-NEXT: br label %[[FOR_COND2:.*]] +// CHECK: [[FOR_COND2]]: +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I1]], align 4 +// CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP2]], 100 +// CHECK-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END7:.*]] +// CHECK: [[FOR_BODY4]]: +// CHECK-NEXT: br label %[[FOR_INC5:.*]] +// CHECK: [[FOR_INC5]]: +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I1]], align 4 +// CHECK-NEXT: [[INC6:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-NEXT: store i32 [[INC6]], ptr [[I1]], align 4 +// CHECK-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK: [[FOR_END7]]: +// CHECK: ret void + +void func2() { +#pragma omp metadirective when(user = {condition(1)} \ + : parallel for) otherwise() + for (int i = 0; i < 100; i++) + ; + +#pragma omp metadirective when(user = {condition(1)} \ + : parallel for) + for (int i = 0; i < 100; i++) + ; +} + +// CHECK-LABEL: define dso_local void @_Z5func2v() +// CHECK: entry +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @_Z5func2v.omp_outlined) +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @_Z5func2v.omp_outlined.1) +// CHECK-NEXT: ret void + + +void func3() { +#pragma omp metadirective when(user = {condition(0)} \ + : parallel for) \ + when(implementation = {extension(match_none)} \ + : parallel) default(parallel for) + + for (int i = 0; i < 100; i++) + ; + +} + +// CHECK-LABEL: define dso_local void @_Z5func3v() +// CHECK: entry +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 0, ptr @_Z5func3v.omp_outlined) +// CHECK-NEXT: ret void +// CHECK-NEXT: } + +// CHECK-LABEL: define internal void @_Z5func3v.omp_outlined +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], +// CHECK-SAME: ptr noalias noundef [[DOTBOUND_TID_:%.*]]) +// CHECK-NEXT: entry +// CHECK-NEXT: [[GLOB_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[BOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[GLOB_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[BOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-NEXT: br label %for.cond +// CHECK:for.cond: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 100 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK:for.body: +// CHECK-NEXT: br label [[FOR_INC:%.*]] +// CHECK:for.inc: +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK:for.end: +// CHECK-NEXT: ret void +// CHECK-NEXT:} + +#endif diff --git a/clang/test/SemaCUDA/dtor.cu b/clang/test/SemaCUDA/dtor.cu new file mode 100644 index 0000000000000..c266e51f5c29e --- /dev/null +++ b/clang/test/SemaCUDA/dtor.cu @@ -0,0 +1,102 @@ +// RUN: %clang_cc1 %s -std=c++20 -fsyntax-only -verify=host +// RUN: %clang_cc1 %s -std=c++20 -fcuda-is-device -fsyntax-only -verify=dev + +// host-no-diagnostics + +#include "Inputs/cuda.h" + +// Virtual dtor ~B() of explicit instantiation B must +// be emitted, which causes host_fun() called. +namespace ExplicitInstantiationExplicitDevDtor { +void host_fun() // dev-note {{'host_fun' declared here}} +{} + +template +constexpr void hd_fun() { + host_fun(); // dev-error {{reference to __host__ function 'host_fun' in __host__ __device__ function}} +} + +struct A { + constexpr ~A() { // dev-note {{called by '~B'}} + hd_fun<8>(); // dev-note {{called by '~A'}} + } +}; + +template +struct B { +public: + virtual __device__ ~B() = default; + A _a; +}; + +template class B; +} + +// The implicit host/device attrs of virtual dtor B::~B() is inferred to +// have implicit device attr since dtors of its members and parent classes can +// be executed on device. This causes a diagnostic since B::~B() must +// be emitted, and it eventually causes host_fun() called on device side. +namespace ExplicitInstantiationDtorNoAttr { +void host_fun() // dev-note {{'host_fun' declared here}} +{} + +template +constexpr void hd_fun() { + host_fun(); // dev-error{{reference to __host__ function 'host_fun' in __host__ __device__ function}} +} + +struct A { + constexpr ~A() { // dev-note {{called by '~B'}} + hd_fun<8>(); // dev-note {{called by '~A'}} + } +}; + +template +struct B { +public: + virtual ~B() = default; + A _a; +}; + +template +struct C { +public: + virtual ~C() = default; +}; + +template class B; +template class C; +__device__ void foo() { + C x; +} +} + +// Dtors of implicit template class instantiation are not +// conservatively inferred because the invalid usage can +// be diagnosed. +namespace ImplicitInstantiation { +void host_fun() // dev-note {{'host_fun' declared here}} +{} + +template +constexpr void hd_fun() { + host_fun(); // dev-error {{reference to __host__ function 'host_fun' in __host__ __device__ function}} +} + +struct A { + constexpr ~A() { // dev-note {{called by '~B'}} + hd_fun<8>(); // dev-note {{called by '~A'}} + } +}; + +template +struct B { +public: + ~B() = default; // dev-note {{called by 'foo'}} + A _a; +}; + +__device__ void foo() { + B x; +} +} diff --git a/clang/test/SemaCXX/embed-init-list.cpp b/clang/test/SemaCXX/embed-init-list.cpp new file mode 100644 index 0000000000000..c511ca707a537 --- /dev/null +++ b/clang/test/SemaCXX/embed-init-list.cpp @@ -0,0 +1,71 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -Wno-c23-extensions %s +// expected-no-diagnostics + +namespace std { +typedef decltype(sizeof(int)) size_t; + +template class initializer_list { + const _E *__begin_; + size_t __size_; + + constexpr initializer_list(const _E *__b, size_t __s) + : __begin_(__b), __size_(__s) {} + +public: + constexpr initializer_list() : __begin_(nullptr), __size_(0) {} +}; +} // namespace std + +template struct S { + S(std::initializer_list); +}; + +template <> struct S { + S(std::initializer_list); +}; + +struct S1 { + S data; + int a; +}; + +template void to_array(_Tp (&&__a)[_Nm]) {} + + +template +void tfn(T) {} + +void tests() { + + S{{ +#embed __FILE__ + }}; + + S1 ss{std::initializer_list{ +#embed __FILE__ + }}; + + S sss = { +#embed __FILE__ + }; + + std::initializer_list il{ +#embed __FILE__ + }; + + static constexpr auto initializer_list = std::initializer_list{ +#embed __FILE__ + , '\0'}; + + static constexpr auto intinitializer_list = std::initializer_list{ +#embed __FILE__ + , '\0'}; + + to_array({ +#embed __FILE__ + }); + + tfn>({ +#embed __FILE__ + }); +} diff --git a/clang/test/SemaCXX/unique_object_duplication.h b/clang/test/SemaCXX/unique_object_duplication.h index 861175766db70..e5c63efbf918c 100644 --- a/clang/test/SemaCXX/unique_object_duplication.h +++ b/clang/test/SemaCXX/unique_object_duplication.h @@ -165,81 +165,17 @@ namespace GlobalTest { namespace TemplateTest { -template -int disallowedTemplate1 = 0; // hidden-warning {{'disallowedTemplate1' may be duplicated when built into a shared library: it is mutable, has hidden visibility, and external linkage}} - -template int disallowedTemplate1; // hidden-note {{in instantiation of}} - - -// Should work for implicit instantiation as well -template -int disallowedTemplate2 = 0; // hidden-warning {{'disallowedTemplate2' may be duplicated when built into a shared library: it is mutable, has hidden visibility, and external linkage}} - -int implicit_instantiate() { - return disallowedTemplate2; // hidden-note {{in instantiation of}} -} - +// We never warn inside templates because it's frequently infeasible to actually +// fix the warning. -// Ensure we only get warnings for templates that are actually instantiated template -int maybeAllowedTemplate = 0; // Not instantiated, so no warning here - -template -int maybeAllowedTemplate = 1; // hidden-warning {{'maybeAllowedTemplate' may be duplicated when built into a shared library: it is mutable, has hidden visibility, and external linkage}} - -template <> -int maybeAllowedTemplate = 2; // hidden-warning {{'maybeAllowedTemplate' may be duplicated when built into a shared library: it is mutable, has hidden visibility, and external linkage}} - -template int maybeAllowedTemplate; // hidden-note {{in instantiation of}} +int allowedTemplate1 = 0; - - -// Should work the same for static class members -template -struct S { - static int staticMember; -}; +template int allowedTemplate1; template -int S::staticMember = 0; // Never instantiated +inline int allowedTemplate2 = 0; -// T* specialization -template -struct S { - static int staticMember; -}; - -template -int S::staticMember = 1; // hidden-warning {{'staticMember' may be duplicated when built into a shared library: it is mutable, has hidden visibility, and external linkage}} - -template class S; // hidden-note {{in instantiation of}} - -// T& specialization, implicitly instantiated -template -struct S { - static int staticMember; -}; - -template -int S::staticMember = 2; // hidden-warning {{'staticMember' may be duplicated when built into a shared library: it is mutable, has hidden visibility, and external linkage}} - -int implicit_instantiate2() { - return S::staticMember; // hidden-note {{in instantiation of}} -} - - -// Should work for static locals as well -template -int* wrapper() { - static int staticLocal; // hidden-warning {{'staticLocal' may be duplicated when built into a shared library: it is mutable, has hidden visibility, and external linkage}} - return &staticLocal; -} - -template <> -int* wrapper() { - static int staticLocal; // hidden-warning {{'staticLocal' may be duplicated when built into a shared library: it is mutable, has hidden visibility, and external linkage}} - return &staticLocal; -} +template int allowedTemplate2; -auto dummy = wrapper(); // hidden-note {{in instantiation of}} } // namespace TemplateTest \ No newline at end of file diff --git a/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp b/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp index 19c3ff49eab27..5031e17188e17 100644 --- a/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp @@ -3863,6 +3863,200 @@ TEST_P(UncheckedOptionalAccessTest, ConstBoolAccessorWithModInBetween) { )cc"); } +TEST_P(UncheckedOptionalAccessTest, + ConstRefAccessorToOptionalViaConstRefAccessorToHoldingObject) { + ExpectDiagnosticsFor(R"cc( + #include "unchecked_optional_access_test.h" + + struct A { + const $ns::$optional& get() const { return x; } + + $ns::$optional x; + }; + + struct B { + const A& getA() const { return a; } + + A a; + }; + + void target(B& b) { + if (b.getA().get().has_value()) { + b.getA().get().value(); + } + } + )cc"); +} + +TEST_P( + UncheckedOptionalAccessTest, + ConstRefAccessorToOptionalViaConstRefAccessorToHoldingObjectWithoutValueCheck) { + ExpectDiagnosticsFor(R"cc( + #include "unchecked_optional_access_test.h" + + struct A { + const $ns::$optional& get() const { return x; } + + $ns::$optional x; + }; + + struct B { + const A& getA() const { return a; } + + A a; + }; + + void target(B& b) { + b.getA().get().value(); // [[unsafe]] + } + )cc"); +} + +TEST_P(UncheckedOptionalAccessTest, + ConstRefToOptionalSavedAsTemporaryVariable) { + ExpectDiagnosticsFor(R"cc( + #include "unchecked_optional_access_test.h" + + struct A { + const $ns::$optional& get() const { return x; } + + $ns::$optional x; + }; + + struct B { + const A& getA() const { return a; } + + A a; + }; + + void target(B& b) { + const auto& opt = b.getA().get(); + if (opt.has_value()) { + opt.value(); + } + } + )cc"); +} + +TEST_P(UncheckedOptionalAccessTest, + ConstRefAccessorToOptionalViaAccessorToHoldingObjectByValue) { + ExpectDiagnosticsFor(R"cc( + #include "unchecked_optional_access_test.h" + + struct A { + const $ns::$optional& get() const { return x; } + + $ns::$optional x; + }; + + struct B { + const A copyA() const { return a; } + + A a; + }; + + void target(B& b) { + if (b.copyA().get().has_value()) { + b.copyA().get().value(); // [[unsafe]] + } + } + )cc"); +} + +TEST_P(UncheckedOptionalAccessTest, + ConstRefAccessorToOptionalViaNonConstRefAccessorToHoldingObject) { + ExpectDiagnosticsFor(R"cc( + #include "unchecked_optional_access_test.h" + + struct A { + const $ns::$optional& get() const { return x; } + + $ns::$optional x; + }; + + struct B { + A& getA() { return a; } + + A a; + }; + + void target(B& b) { + if (b.getA().get().has_value()) { + b.getA().get().value(); // [[unsafe]] + } + } + )cc"); +} + +TEST_P( + UncheckedOptionalAccessTest, + ConstRefAccessorToOptionalViaConstRefAccessorToHoldingObjectWithModAfterCheck) { + ExpectDiagnosticsFor(R"cc( + #include "unchecked_optional_access_test.h" + + struct A { + const $ns::$optional& get() const { return x; } + + $ns::$optional x; + }; + + struct B { + const A& getA() const { return a; } + + A& getA() { return a; } + + void clear() { a = A{}; } + + A a; + }; + + void target(B& b) { + // changing field A via non-const getter after const getter check + if (b.getA().get().has_value()) { + b.getA() = A{}; + b.getA().get().value(); // [[unsafe]] + } + + // calling non-const method which might change field A + if (b.getA().get().has_value()) { + b.clear(); + b.getA().get().value(); // [[unsafe]] + } + } + )cc"); +} + +TEST_P( + UncheckedOptionalAccessTest, + ConstRefAccessorToOptionalViaConstRefAccessorToHoldingObjectWithAnotherConstCallAfterCheck) { + ExpectDiagnosticsFor(R"cc( + #include "unchecked_optional_access_test.h" + + struct A { + const $ns::$optional& get() const { return x; } + + $ns::$optional x; + }; + + struct B { + const A& getA() const { return a; } + + void callWithoutChanges() const { + // no-op + } + + A a; + }; + + void target(B& b) { + if (b.getA().get().has_value()) { + b.callWithoutChanges(); // calling const method which cannot change A + b.getA().get().value(); + } + } + )cc"); +} + // FIXME: Add support for: // - constructors (copy, move) // - assignment operators (default, copy, move) diff --git a/clang/unittests/Format/ConfigParseTest.cpp b/clang/unittests/Format/ConfigParseTest.cpp index 9cd262960b724..273bab87b1ee1 100644 --- a/clang/unittests/Format/ConfigParseTest.cpp +++ b/clang/unittests/Format/ConfigParseTest.cpp @@ -265,9 +265,9 @@ TEST(ConfigParseTest, ParsesConfigurationIntegers) { Style.Language = FormatStyle::LK_Cpp; CHECK_PARSE_INT(AccessModifierOffset); + CHECK_PARSE_INT(BracedInitializerIndentWidth); CHECK_PARSE_INT(PPIndentWidth); - CHECK_PARSE_UNSIGNED(BracedInitializerIndentWidth); CHECK_PARSE_UNSIGNED(ColumnLimit); CHECK_PARSE_UNSIGNED(ConstructorInitializerIndentWidth); CHECK_PARSE_UNSIGNED(ContinuationIndentWidth); @@ -1441,8 +1441,10 @@ TEST(ConfigParseTest, GetStyleOfFile) { ASSERT_EQ(*Style9, SubSubStyle); // Test 9.8: use inheritance from a file without BasedOnStyle - ASSERT_TRUE(FS.addFile("/e/withoutbase/.clang-format", 0, - llvm::MemoryBuffer::getMemBuffer("ColumnLimit: 123"))); + ASSERT_TRUE(FS.addFile( + "/e/withoutbase/.clang-format", 0, + llvm::MemoryBuffer::getMemBuffer("BracedInitializerIndentWidth: 2\n" + "ColumnLimit: 123"))); ASSERT_TRUE( FS.addFile("/e/withoutbase/sub/.clang-format", 0, llvm::MemoryBuffer::getMemBuffer( @@ -1452,6 +1454,7 @@ TEST(ConfigParseTest, GetStyleOfFile) { ASSERT_TRUE(static_cast(Style9)); ASSERT_EQ(*Style9, [] { auto Style = getLLVMStyle(); + Style.BracedInitializerIndentWidth = 2; Style.ColumnLimit = 123; return Style; }()); @@ -1460,6 +1463,7 @@ TEST(ConfigParseTest, GetStyleOfFile) { ASSERT_TRUE(static_cast(Style9)); ASSERT_EQ(*Style9, [] { auto Style = getLLVMStyle(); + Style.BracedInitializerIndentWidth = 2; Style.ColumnLimit = 123; Style.IndentWidth = 7; return Style; diff --git a/clang/unittests/Frontend/CMakeLists.txt b/clang/unittests/Frontend/CMakeLists.txt index 0f05813338f2a..3c94846243870 100644 --- a/clang/unittests/Frontend/CMakeLists.txt +++ b/clang/unittests/Frontend/CMakeLists.txt @@ -10,6 +10,7 @@ add_clang_unittest(FrontendTests FixedPointString.cpp FrontendActionTest.cpp CodeGenActionTest.cpp + NoAlterCodeGenActionTest.cpp ParsedSourceLocationTest.cpp PCHPreambleTest.cpp ReparseWorkingDirTest.cpp @@ -27,4 +28,5 @@ clang_target_link_libraries(FrontendTests clangCodeGen clangFrontendTool clangSerialization + clangTooling ) diff --git a/clang/unittests/Frontend/NoAlterCodeGenActionTest.cpp b/clang/unittests/Frontend/NoAlterCodeGenActionTest.cpp new file mode 100644 index 0000000000000..fed2d255a9fe8 --- /dev/null +++ b/clang/unittests/Frontend/NoAlterCodeGenActionTest.cpp @@ -0,0 +1,197 @@ +//===- unittests/Frontend/NoAlterCodeGenActionTest.cpp --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Unit tests for CodeGenAction may not alter the AST. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/LangStandard.h" +#include "clang/CodeGen/BackendUtil.h" +#include "clang/CodeGen/CodeGenAction.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/MultiplexConsumer.h" +#include "clang/Lex/PreprocessorOptions.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/VirtualFileSystem.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace clang; +using namespace clang::frontend; +using namespace clang::tooling; + +namespace { + +class ASTChecker : public RecursiveASTVisitor { +public: + ASTContext &Ctx; + ASTChecker(ASTContext &Ctx) : Ctx(Ctx) {} + bool VisitReturnStmt(ReturnStmt *RS) { + EXPECT_TRUE(RS->getRetValue()); + return true; + } + + bool VisitCoroutineBodyStmt(CoroutineBodyStmt *CS) { + return VisitReturnStmt(cast(CS->getReturnStmt())); + } +}; + +class ASTCheckerConsumer : public ASTConsumer { +public: + void HandleTranslationUnit(ASTContext &Ctx) override { + ASTChecker Checker(Ctx); + Checker.TraverseAST(Ctx); + } +}; + +class TestCodeGenAction : public EmitLLVMOnlyAction { +public: + using Base = EmitLLVMOnlyAction; + using Base::Base; + + std::unique_ptr CreateASTConsumer(CompilerInstance &CI, + StringRef InFile) override { + std::vector> Consumers; + Consumers.push_back(std::make_unique()); + Consumers.push_back(Base::CreateASTConsumer(CI, InFile)); + return std::make_unique(std::move(Consumers)); + } +}; + +const char *test_contents = R"cpp( + +namespace std { + +template struct coroutine_traits { + using promise_type = typename R::promise_type; +}; + +template struct coroutine_handle; + +template <> struct coroutine_handle { + static coroutine_handle from_address(void *addr) noexcept; + void operator()() { resume(); } + void *address() const noexcept; + void resume() const { __builtin_coro_resume(ptr); } + void destroy() const { __builtin_coro_destroy(ptr); } + bool done() const; + coroutine_handle &operator=(decltype(nullptr)); + coroutine_handle(decltype(nullptr)) : ptr(nullptr) {} + coroutine_handle() : ptr(nullptr) {} +// void reset() { ptr = nullptr; } // add to P0057? + explicit operator bool() const; + +protected: + void *ptr; +}; + +template struct coroutine_handle : coroutine_handle<> { + using coroutine_handle<>::operator=; + + static coroutine_handle from_address(void *addr) noexcept; + + Promise &promise() const; + static coroutine_handle from_promise(Promise &promise); +}; + +template +bool operator==(coroutine_handle<_PromiseT> const &_Left, + coroutine_handle<_PromiseT> const &_Right) noexcept { + return _Left.address() == _Right.address(); +} + +template +bool operator!=(coroutine_handle<_PromiseT> const &_Left, + coroutine_handle<_PromiseT> const &_Right) noexcept { + return !(_Left == _Right); +} + +struct noop_coroutine_promise {}; + +template <> +struct coroutine_handle { + operator coroutine_handle<>() const noexcept; + + constexpr explicit operator bool() const noexcept { return true; } + constexpr bool done() const noexcept { return false; } + + constexpr void operator()() const noexcept {} + constexpr void resume() const noexcept {} + constexpr void destroy() const noexcept {} + + noop_coroutine_promise &promise() const noexcept { + return *static_cast( + __builtin_coro_promise(this->__handle_, alignof(noop_coroutine_promise), false)); + } + + constexpr void *address() const noexcept { return __handle_; } + +private: + friend coroutine_handle noop_coroutine() noexcept; + + coroutine_handle() noexcept { + this->__handle_ = __builtin_coro_noop(); + } + + void *__handle_ = nullptr; +}; + +using noop_coroutine_handle = coroutine_handle; + +inline noop_coroutine_handle noop_coroutine() noexcept { return noop_coroutine_handle(); } + +struct suspend_always { + bool await_ready() noexcept { return false; } + void await_suspend(coroutine_handle<>) noexcept {} + void await_resume() noexcept {} +}; +struct suspend_never { + bool await_ready() noexcept { return true; } + void await_suspend(coroutine_handle<>) noexcept {} + void await_resume() noexcept {} +}; + +} // namespace std + +using namespace std; + +class invoker { +public: + class invoker_promise { + public: + invoker get_return_object() { return invoker{}; } + auto initial_suspend() { return suspend_always{}; } + auto final_suspend() noexcept { return suspend_always{}; } + void return_void() {} + void unhandled_exception() {} + }; + using promise_type = invoker_promise; + invoker() {} + invoker(const invoker &) = delete; + invoker &operator=(const invoker &) = delete; + invoker(invoker &&) = delete; + invoker &operator=(invoker &&) = delete; +}; + +invoker g() { + co_return; +} + +)cpp"; + +TEST(CodeGenTest, TestNonAlterTest) { + EXPECT_TRUE(runToolOnCodeWithArgs(std::make_unique(), + test_contents, + { + "-std=c++20", + })); +} +} // namespace diff --git a/compiler-rt/lib/asan/asan_win.cpp b/compiler-rt/lib/asan/asan_win.cpp index 09a13b11cff1f..027340280e068 100644 --- a/compiler-rt/lib/asan/asan_win.cpp +++ b/compiler-rt/lib/asan/asan_win.cpp @@ -145,7 +145,6 @@ static thread_return_t THREAD_CALLING_CONV asan_thread_start(void *arg) { t->GetStartData(params); auto res = (*params.start_routine)(params.arg); - t->Destroy(); // POSIX calls this from TSD destructor. return res; } @@ -166,6 +165,13 @@ INTERCEPTOR_WINAPI(HANDLE, CreateThread, LPSECURITY_ATTRIBUTES security, thr_flags, tid); } +INTERCEPTOR_WINAPI(void, ExitThread, DWORD dwExitCode) { + AsanThread *t = (AsanThread *)__asan::GetCurrentThread(); + if (t) + t->Destroy(); + REAL(ExitThread)(dwExitCode); +} + // }}} namespace __asan { @@ -181,6 +187,7 @@ void InitializePlatformInterceptors() { (LPCWSTR)&InitializePlatformInterceptors, &pinned)); ASAN_INTERCEPT_FUNC(CreateThread); + ASAN_INTERCEPT_FUNC(ExitThread); ASAN_INTERCEPT_FUNC(SetUnhandledExceptionFilter); #ifdef _WIN64 diff --git a/compiler-rt/lib/tsan/go/buildgo.sh b/compiler-rt/lib/tsan/go/buildgo.sh index 6871b36c3f510..d9e56402ad48f 100755 --- a/compiler-rt/lib/tsan/go/buildgo.sh +++ b/compiler-rt/lib/tsan/go/buildgo.sh @@ -112,6 +112,12 @@ if [ "$GOOS" = "linux" ]; then ARCHCFLAGS="-mips64 -EL" elif [ "$GOARCH" = "mips64" ]; then ARCHCFLAGS="-mips64 -EB" + elif [ "$GOARCH" = "riscv64" ]; then + if [ "$GORISCV64" = "rva23u64" ]; then + ARCHCFLAGS="-march=rv64gcv" + else + ARCHCFLAGS="-march=rv64gc" + fi elif [ "$GOARCH" = "s390x" ]; then SRCS="$SRCS ../../sanitizer_common/sanitizer_linux_s390.cpp" ARCHCFLAGS="" diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform.h b/compiler-rt/lib/tsan/rtl/tsan_platform.h index 377f8aeb8d66e..354f6da6a64a1 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform.h +++ b/compiler-rt/lib/tsan/rtl/tsan_platform.h @@ -681,6 +681,33 @@ struct MappingGoMips64_47 { static const uptr kShadowAdd = 0x200000000000ull; }; +/* Go on linux/riscv64 (48-bit VMA) +0000 0001 0000 - 00e0 0000 0000: executable and heap (896 GiB) +00e0 0000 0000 - 2000 0000 0000: - +2000 0000 0000 - 2400 0000 0000: shadow - 4 TiB ( ~ 4 * app) +2400 0000 0000 - 3000 0000 0000: - +3000 0000 0000 - 3100 0000 0000: metainfo - 1 TiB ( ~ 1 * app) +3100 0000 0000 - 8000 0000 0000: - +*/ +struct MappingGoRiscv64 { + static const uptr kMetaShadowBeg = 0x300000000000ull; + static const uptr kMetaShadowEnd = 0x310000000000ull; + static const uptr kShadowBeg = 0x200000000000ull; + static const uptr kShadowEnd = 0x240000000000ull; + static const uptr kLoAppMemBeg = 0x000000010000ull; + static const uptr kLoAppMemEnd = 0x000e00000000ull; + static const uptr kMidAppMemBeg = 0; + static const uptr kMidAppMemEnd = 0; + static const uptr kHiAppMemBeg = 0; + static const uptr kHiAppMemEnd = 0; + static const uptr kHeapMemBeg = 0; + static const uptr kHeapMemEnd = 0; + static const uptr kVdsoBeg = 0; + static const uptr kShadowMsk = 0; + static const uptr kShadowXor = 0; + static const uptr kShadowAdd = 0x200000000000ull; +}; + /* Go on linux/s390x 0000 0000 1000 - 1000 0000 0000: executable and heap - 16 TiB @@ -728,6 +755,8 @@ ALWAYS_INLINE auto SelectMapping(Arg arg) { return Func::template Apply(arg); # elif defined(__loongarch_lp64) return Func::template Apply(arg); +# elif SANITIZER_RISCV64 + return Func::template Apply(arg); # elif SANITIZER_WINDOWS return Func::template Apply(arg); # else @@ -798,6 +827,7 @@ void ForEachMapping() { Func::template Apply(); Func::template Apply(); Func::template Apply(); + Func::template Apply(); Func::template Apply(); } diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp index 3e08a1bece98f..373acd3d95d01 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp @@ -373,6 +373,12 @@ void InitializePlatformEarly() { Printf("FATAL: Found %zd - Supported 39 and 48\n", vmaSize); Die(); } +# else + if (vmaSize != 48) { + Printf("FATAL: ThreadSanitizer: unsupported VMA range\n"); + Printf("FATAL: Found %zd - Supported 48\n", vmaSize); + Die(); + } # endif # endif diff --git a/cross-project-tests/amdgpu/builtins-amdgcn-swmmac-w32.cl b/cross-project-tests/amdgpu/builtins-amdgcn-swmmac-w32.cl index 317d9a1102ccf..e6adc7bea525c 100644 --- a/cross-project-tests/amdgpu/builtins-amdgcn-swmmac-w32.cl +++ b/cross-project-tests/amdgpu/builtins-amdgcn-swmmac-w32.cl @@ -15,7 +15,7 @@ typedef short v16s __attribute__((ext_vector_type(16))); // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_f32_16x16x32_f16_w32: // CHECK-GFX1200: v_swmmac_f32_16x16x32_f16 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} // -void test_amdgcn_swmmac_f32_16x16x32_f16_w32(global v8f* out, v8h a, v16h b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_f16_w32(global v8f* out, v8h a, v16h b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_f16_w32(a, b, c, index); } @@ -24,7 +24,7 @@ void test_amdgcn_swmmac_f32_16x16x32_f16_w32(global v8f* out, v8h a, v16h b, v8f // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_f32_16x16x32_bf16_w32: // CHECK-GFX1200: v_swmmac_f32_16x16x32_bf16 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} // -void test_amdgcn_swmmac_f32_16x16x32_bf16_w32(global v8f* out, v8s a, v16s b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_bf16_w32(global v8f* out, v8s a, v16s b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32(a, b, c, index); } @@ -33,7 +33,7 @@ void test_amdgcn_swmmac_f32_16x16x32_bf16_w32(global v8f* out, v8s a, v16s b, v8 // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_f16_16x16x32_f16_w32: // CHECK-GFX1200: v_swmmac_f16_16x16x32_f16 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} // -void test_amdgcn_swmmac_f16_16x16x32_f16_w32(global v8h* out, v8h a, v16h b, v8h c, short index) +void test_amdgcn_swmmac_f16_16x16x32_f16_w32(global v8h* out, v8h a, v16h b, v8h c, int index) { *out = __builtin_amdgcn_swmmac_f16_16x16x32_f16_w32(a, b, c, index); } @@ -42,7 +42,7 @@ void test_amdgcn_swmmac_f16_16x16x32_f16_w32(global v8h* out, v8h a, v16h b, v8h // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_bf16_16x16x32_bf16_w32: // CHECK-GFX1200: v_swmmac_bf16_16x16x32_bf16 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} // -void test_amdgcn_swmmac_bf16_16x16x32_bf16_w32(global v8s* out, v8s a, v16s b, v8s c, short index) +void test_amdgcn_swmmac_bf16_16x16x32_bf16_w32(global v8s* out, v8s a, v16s b, v8s c, int index) { *out = __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32(a, b, c, index); } @@ -51,7 +51,7 @@ void test_amdgcn_swmmac_bf16_16x16x32_bf16_w32(global v8s* out, v8s a, v16s b, v // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_i32_16x16x32_iu8_w32: // CHECK-GFX1200: v_swmmac_i32_16x16x32_iu8 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} neg_lo:[1,1,0] clamp // -void test_amdgcn_swmmac_i32_16x16x32_iu8_w32(global v8i* out, v2i a, v4i b, v8i c, short index) +void test_amdgcn_swmmac_i32_16x16x32_iu8_w32(global v8i* out, v2i a, v4i b, v8i c, int index) { *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32(true, a, true, b, c, index, true); } @@ -60,7 +60,7 @@ void test_amdgcn_swmmac_i32_16x16x32_iu8_w32(global v8i* out, v2i a, v4i b, v8i // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_i32_16x16x32_iu4_w32: // CHECK-GFX1200: v_swmmac_i32_16x16x32_iu4 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} neg_lo:[1,1,0] clamp // -void test_amdgcn_swmmac_i32_16x16x32_iu4_w32(global v8i* out, int a, v2i b, v8i c, short index) +void test_amdgcn_swmmac_i32_16x16x32_iu4_w32(global v8i* out, int a, v2i b, v8i c, int index) { *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32(true, a, true, b, c, index, true); } @@ -69,7 +69,7 @@ void test_amdgcn_swmmac_i32_16x16x32_iu4_w32(global v8i* out, int a, v2i b, v8i // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_i32_16x16x64_iu4_w32: // CHECK-GFX1200: v_swmmac_i32_16x16x64_iu4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} neg_lo:[1,1,0] clamp // -void test_amdgcn_swmmac_i32_16x16x64_iu4_w32(global v8i* out, v2i a, v4i b, v8i c, short index) +void test_amdgcn_swmmac_i32_16x16x64_iu4_w32(global v8i* out, v2i a, v4i b, v8i c, int index) { *out = __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32(true, a, true, b, c, index, true); } @@ -78,7 +78,7 @@ void test_amdgcn_swmmac_i32_16x16x64_iu4_w32(global v8i* out, v2i a, v4i b, v8i // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32: // CHECK-GFX1200: v_swmmac_f32_16x16x32_fp8_fp8 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} // -void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(a, b, c, index); } @@ -87,7 +87,7 @@ void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(global v8f* out, v2i a, v4i b, // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32: // CHECK-GFX1200: v_swmmac_f32_16x16x32_fp8_bf8 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} // -void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(a, b, c, index); } @@ -96,7 +96,7 @@ void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(global v8f* out, v2i a, v4i b, // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32: // CHECK-GFX1200: v_swmmac_f32_16x16x32_bf8_fp8 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} // -void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(a, b, c, index); } @@ -104,7 +104,7 @@ void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(global v8f* out, v2i a, v4i b, // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32: // CHECK-GFX1200: v_swmmac_f32_16x16x32_bf8_bf8 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} // -void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(a, b, c, index); } diff --git a/flang-rt/lib/runtime/unit.cpp b/flang-rt/lib/runtime/unit.cpp index 1d4d54ae01956..43501aeb48458 100644 --- a/flang-rt/lib/runtime/unit.cpp +++ b/flang-rt/lib/runtime/unit.cpp @@ -511,7 +511,7 @@ void ExternalFileUnit::EndIoStatement() { void ExternalFileUnit::BeginSequentialVariableUnformattedInputRecord( IoErrorHandler &handler) { RUNTIME_CHECK(handler, access == Access::Sequential); - std::int32_t header{0}, footer{0}; + std::uint32_t header{0}, footer{0}; std::size_t need{recordOffsetInFrame_ + sizeof header}; std::size_t got{ReadFrame(frameOffsetInFile_, need, handler)}; // Try to emit informative errors to help debug corrupted files. @@ -528,17 +528,41 @@ void ExternalFileUnit::BeginSequentialVariableUnformattedInputRecord( recordLength = sizeof header + header; // does not include footer need = recordOffsetInFrame_ + *recordLength + sizeof footer; got = ReadFrame(frameOffsetInFile_, need, handler); - if (got < need) { + if (got >= need) { + footer = ReadHeaderOrFooter(recordOffsetInFrame_ + *recordLength); + } + if (frameOffsetInFile_ == 0 && recordOffsetInFrame_ == 0 && + (got < need || footer != header)) { + // Maybe an omitted or incorrect byte swap flag setting? + // Try it the other way, since this is the first record. + // (N.B. Won't work on files starting with empty records, but there's + // no good way to know later if all preceding records were empty.) + swapEndianness_ = !swapEndianness_; + std::uint32_t header2{ReadHeaderOrFooter(0)}; + std::size_t recordLength2{sizeof header2 + header2}; + std::size_t need2{recordLength2 + sizeof footer}; + std::size_t got2{ReadFrame(0, need2, handler)}; + if (got2 >= need2) { + std::uint32_t footer2{ReadHeaderOrFooter(recordLength2)}; + if (footer2 == header2) { + error = "Unformatted variable-length sequential file input " + "failed on the first record, probably due to a need " + "for byte order data conversion; consider adding " + "CONVERT='SWAP' to the OPEN statement or adding " + "FORT_CONVERT=SWAP to the execution environment"; + } + } + swapEndianness_ = !swapEndianness_; + } + if (error) { + } else if (got < need) { error = "Unformatted variable-length sequential file input failed at " "record #%jd (file offset %jd): hit EOF reading record with " "length %jd bytes"; - } else { - footer = ReadHeaderOrFooter(recordOffsetInFrame_ + *recordLength); - if (footer != header) { - error = "Unformatted variable-length sequential file input failed at " - "record #%jd (file offset %jd): record header has length %jd " - "that does not match record footer (%jd)"; - } + } else if (footer != header) { + error = "Unformatted variable-length sequential file input failed at " + "record #%jd (file offset %jd): record header has length %jd " + "that does not match record footer (%jd)"; } } if (error) { @@ -590,7 +614,7 @@ void ExternalFileUnit::BackspaceFixedRecord(IoErrorHandler &handler) { void ExternalFileUnit::BackspaceVariableUnformattedRecord( IoErrorHandler &handler) { - std::int32_t header{0}; + std::uint32_t header{0}; auto headerBytes{static_cast(sizeof header)}; frameOffsetInFile_ += recordOffsetInFrame_; recordOffsetInFrame_ = 0; @@ -775,8 +799,8 @@ void ExternalFileUnit::PopChildIo(ChildIo &child) { child_.reset(child.AcquirePrevious().release()); // deletes top child } -std::int32_t ExternalFileUnit::ReadHeaderOrFooter(std::int64_t frameOffset) { - std::int32_t word; +std::uint32_t ExternalFileUnit::ReadHeaderOrFooter(std::int64_t frameOffset) { + std::uint32_t word; char *wordPtr{reinterpret_cast(&word)}; std::memcpy(wordPtr, Frame() + frameOffset, sizeof word); if (swapEndianness_) { diff --git a/flang-rt/lib/runtime/unit.h b/flang-rt/lib/runtime/unit.h index eb762a2d3b235..bb3d3650da34b 100644 --- a/flang-rt/lib/runtime/unit.h +++ b/flang-rt/lib/runtime/unit.h @@ -210,7 +210,7 @@ class ExternalFileUnit : public ConnectionState, RT_API_ATTRS void CommitWrites(); RT_API_ATTRS bool CheckDirectAccess(IoErrorHandler &); RT_API_ATTRS void HitEndOnRead(IoErrorHandler &); - RT_API_ATTRS std::int32_t ReadHeaderOrFooter(std::int64_t frameOffset); + RT_API_ATTRS std::uint32_t ReadHeaderOrFooter(std::int64_t frameOffset); Lock lock_; diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md index e70f40306c4e1..d781dee75e07e 100644 --- a/flang/docs/Extensions.md +++ b/flang/docs/Extensions.md @@ -218,7 +218,7 @@ end the length parameter of the implicit type, not the first. * Outside a character literal, a comment after a continuation marker (&) need not begin with a comment marker (!). -* Classic C-style /*comments*/ are skipped, so multi-language header +* Classic C-style `/*comments*/` are skipped, so multi-language header files are easier to write and use. * $ and \ edit descriptors are supported in FORMAT to suppress newline output on user prompts. diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h index 352f6b36458ce..f94981011b6e5 100644 --- a/flang/include/flang/Evaluate/tools.h +++ b/flang/include/flang/Evaluate/tools.h @@ -1417,8 +1417,8 @@ inline bool IsAssumedSizeArray(const Symbol &symbol) { // In a SELECT RANK construct, ResolveAssociations() stops at a // RANK(n) or RANK(*) case symbol, but traverses the selector for // RANK DEFAULT. -const Symbol &ResolveAssociations(const Symbol &); -const Symbol &GetAssociationRoot(const Symbol &); +const Symbol &ResolveAssociations(const Symbol &, bool stopAtTypeGuard = false); +const Symbol &GetAssociationRoot(const Symbol &, bool stopAtTypeGuard = false); const Symbol *FindCommonBlockContaining(const Symbol &); int CountLenParameters(const DerivedTypeSpec &); diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.h b/flang/include/flang/Optimizer/Dialect/FIROps.h index a21f8bbe17685..ed301016ad01c 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.h +++ b/flang/include/flang/Optimizer/Dialect/FIROps.h @@ -50,9 +50,95 @@ struct DebuggingResource mlir::StringRef getName() final { return "DebuggingResource"; } }; +class CoordinateIndicesAdaptor; +using IntOrValue = llvm::PointerUnion; + } // namespace fir #define GET_OP_CLASSES #include "flang/Optimizer/Dialect/FIROps.h.inc" +namespace fir { +class CoordinateIndicesAdaptor { +public: + using value_type = IntOrValue; + + CoordinateIndicesAdaptor(mlir::DenseI32ArrayAttr fieldIndices, + mlir::ValueRange values) + : fieldIndices(fieldIndices), values(values) {} + + value_type operator[](size_t index) const { + assert(index < size() && "index out of bounds"); + return *std::next(begin(), index); + } + + size_t size() const { + return fieldIndices ? fieldIndices.size() : values.size(); + } + + bool empty() const { + return values.empty() && (!fieldIndices || fieldIndices.empty()); + } + + class iterator + : public llvm::iterator_facade_base { + public: + iterator(const CoordinateIndicesAdaptor *base, + std::optional::iterator> fieldIter, + llvm::detail::IterOfRange valuesIter) + : base(base), fieldIter(fieldIter), valuesIter(valuesIter) {} + + value_type operator*() const { + if (fieldIter && **fieldIter != fir::CoordinateOp::kDynamicIndex) { + return mlir::IntegerAttr::get(base->fieldIndices.getElementType(), + **fieldIter); + } + return *valuesIter; + } + + iterator &operator++() { + if (fieldIter) { + if (**fieldIter == fir::CoordinateOp::kDynamicIndex) + valuesIter++; + (*fieldIter)++; + } else { + valuesIter++; + } + return *this; + } + + bool operator==(const iterator &rhs) const { + return base == rhs.base && fieldIter == rhs.fieldIter && + valuesIter == rhs.valuesIter; + } + + private: + const CoordinateIndicesAdaptor *base; + std::optional::const_iterator> fieldIter; + llvm::detail::IterOfRange valuesIter; + }; + + iterator begin() const { + std::optional::const_iterator> fieldIter; + if (fieldIndices) + fieldIter = fieldIndices.asArrayRef().begin(); + return iterator(this, fieldIter, values.begin()); + } + + iterator end() const { + std::optional::const_iterator> fieldIter; + if (fieldIndices) + fieldIter = fieldIndices.asArrayRef().end(); + return iterator(this, fieldIter, values.end()); + } + +private: + mlir::DenseI32ArrayAttr fieldIndices; + mlir::ValueRange values; +}; + +} // namespace fir + #endif // FORTRAN_OPTIMIZER_DIALECT_FIROPS_H diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 8dbc9df9f553d..c83c57186b46d 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -1748,10 +1748,16 @@ def fir_CoordinateOp : fir_Op<"coordinate_of", [NoMemoryEffect]> { Unlike LLVM's GEP instruction, one cannot stride over the outermost reference; therefore, the leading 0 index must be omitted. + This operation can be used to index derived type fields, in which case + the operand is the name of the index field. + ``` %i = ... : index %h = ... : !fir.heap> %p = fir.coordinate_of %h, %i : (!fir.heap>, index) -> !fir.ref + + %d = ... : !fir.ref> + %f = fir.coordinate_of %d, field2 : (!fir.ref>) -> !fir.ref ``` In the example, `%p` will be a pointer to the `%i`-th f32 value in the @@ -1761,7 +1767,8 @@ def fir_CoordinateOp : fir_Op<"coordinate_of", [NoMemoryEffect]> { let arguments = (ins AnyRefOrBox:$ref, Variadic:$coor, - TypeAttr:$baseType + TypeAttr:$baseType, + OptionalAttr:$field_indices ); let results = (outs RefOrLLVMPtr); @@ -1771,10 +1778,14 @@ def fir_CoordinateOp : fir_Op<"coordinate_of", [NoMemoryEffect]> { let builders = [ OpBuilder<(ins "mlir::Type":$resultType, - "mlir::Value":$ref, "mlir::ValueRange":$coor), - [{ return build($_builder, $_state, resultType, ref, coor, - mlir::TypeAttr::get(ref.getType())); }]>, + "mlir::Value":$ref, "mlir::ValueRange":$coor)>, + OpBuilder<(ins "mlir::Type":$resultType, + "mlir::Value":$ref, "llvm::ArrayRef":$coor)> ]; + let extraClassDeclaration = [{ + constexpr static int32_t kDynamicIndex = std::numeric_limits::min(); + CoordinateIndicesAdaptor getIndices(); + }]; } def fir_ExtractValueOp : fir_OneResultOp<"extract_value", [NoMemoryEffect]> { diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h index 4ae2775c0f849..715811885c219 100644 --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -329,9 +329,11 @@ class AssocEntityDetails : public EntityDetails { } bool IsAssumedSize() const { return rank_.value_or(0) == isAssumedSize; } bool IsAssumedRank() const { return rank_.value_or(0) == isAssumedRank; } + bool isTypeGuard() const { return isTypeGuard_; } void set_rank(int rank); void set_IsAssumedSize(); void set_IsAssumedRank(); + void set_isTypeGuard(bool yes = true); private: MaybeExpr expr_; @@ -340,6 +342,7 @@ class AssocEntityDetails : public EntityDetails { static constexpr int isAssumedSize{-1}; // RANK(*) static constexpr int isAssumedRank{-2}; // RANK DEFAULT std::optional rank_; + bool isTypeGuard_{false}; // TYPE IS or CLASS IS, but not CLASS(DEFAULT) }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const AssocEntityDetails &); diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index 56dcfa88ad92d..16fd8d158b0e0 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -631,6 +631,8 @@ using PotentialAndPointerComponentIterator = // dereferenced. PotentialComponentIterator::const_iterator FindEventOrLockPotentialComponent( const DerivedTypeSpec &, bool ignoreCoarrays = false); +PotentialComponentIterator::const_iterator FindCoarrayPotentialComponent( + const DerivedTypeSpec &); UltimateComponentIterator::const_iterator FindCoarrayUltimateComponent( const DerivedTypeSpec &); UltimateComponentIterator::const_iterator FindPointerUltimateComponent( diff --git a/flang/include/flang/Support/Fortran-features.h b/flang/include/flang/Support/Fortran-features.h index 44ba6428e6c93..356623c643e46 100644 --- a/flang/include/flang/Support/Fortran-features.h +++ b/flang/include/flang/Support/Fortran-features.h @@ -74,7 +74,8 @@ ENUM_CLASS(UsageWarning, Portability, PointerToUndefinable, IndexVarRedefinition, IncompatibleImplicitInterfaces, VectorSubscriptFinalization, UndefinedFunctionResult, UselessIomsg, MismatchingDummyProcedure, SubscriptedEmptyArray, UnsignedLiteralTruncation, - CompatibleDeclarationsFromDistinctModules) + CompatibleDeclarationsFromDistinctModules, + NullActualForDefaultIntentAllocatable) using LanguageFeatures = EnumSet; using UsageWarnings = EnumSet; diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index 6d8f19388d8b7..e55a22dce8e99 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -421,6 +421,8 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ {"cos", {{"x", SameFloating}}, SameFloating}, {"cosd", {{"x", SameFloating}}, SameFloating}, {"cosh", {{"x", SameFloating}}, SameFloating}, + {"coshape", {{"coarray", AnyData, Rank::coarray}, SizeDefaultKIND}, KINDInt, + Rank::vector, IntrinsicClass::inquiryFunction}, {"count", {{"mask", AnyLogical, Rank::array}, OptionalDIM, DefaultingKIND}, KINDInt, Rank::dimReduced, IntrinsicClass::transformationalFunction}, {"cshift", @@ -1054,8 +1056,6 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ {"__builtin_numeric_storage_size", {}, DefaultInt}, }; -// TODO: Coarray intrinsic functions -// COSHAPE // TODO: Non-standard intrinsic functions // SHIFT, // COMPL, EQV, NEQV, INT8, JINT, JNINT, KNINT, diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp index 16b0260719097..36b7d0a69d2ba 100644 --- a/flang/lib/Evaluate/tools.cpp +++ b/flang/lib/Evaluate/tools.cpp @@ -1540,10 +1540,12 @@ bool CheckForCoindexedObject(parser::ContextualMessages &messages, namespace Fortran::semantics { -const Symbol &ResolveAssociations(const Symbol &original) { +const Symbol &ResolveAssociations( + const Symbol &original, bool stopAtTypeGuard) { const Symbol &symbol{original.GetUltimate()}; if (const auto *details{symbol.detailsIf()}) { - if (!details->rank()) { // Not RANK(n) or RANK(*) + if (!details->rank() /* not RANK(n) or RANK(*) */ && + !(stopAtTypeGuard && details->isTypeGuard())) { if (const Symbol * nested{UnwrapWholeSymbolDataRef(details->expr())}) { return ResolveAssociations(*nested); } @@ -1567,8 +1569,8 @@ static const Symbol *GetAssociatedVariable(const AssocEntityDetails &details) { return nullptr; } -const Symbol &GetAssociationRoot(const Symbol &original) { - const Symbol &symbol{ResolveAssociations(original)}; +const Symbol &GetAssociationRoot(const Symbol &original, bool stopAtTypeGuard) { + const Symbol &symbol{ResolveAssociations(original, stopAtTypeGuard)}; if (const auto *details{symbol.detailsIf()}) { if (const Symbol * root{GetAssociatedVariable(*details)}) { return *root; @@ -1812,7 +1814,11 @@ bool IsSaved(const Symbol &original) { } else if (scopeKind == Scope::Kind::DerivedType) { return false; // this is a component } else if (symbol.attrs().test(Attr::SAVE)) { - return true; // explicit SAVE attribute + // explicit or implied SAVE attribute + // N.B.: semantics sets implied SAVE for main program + // local variables whose derived types have coarray + // potential subobject components. + return true; } else if (IsDummy(symbol) || IsFunctionResult(symbol) || IsAutomatic(symbol) || IsNamedConstant(symbol)) { return false; diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index fa1975dac789b..48bcf492fd368 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -354,14 +354,12 @@ mlir::Value createParentSymAndGenIntermediateMaps( // type. if (fir::RecordType recordType = mlir::dyn_cast( fir::unwrapPassByRefType(curValue.getType()))) { - mlir::Value idxConst = firOpBuilder.createIntegerConstant( - clauseLocation, firOpBuilder.getIndexType(), - indices[currentIndicesIdx]); - mlir::Type memberTy = - recordType.getTypeList().at(indices[currentIndicesIdx]).second; + fir::IntOrValue idxConst = mlir::IntegerAttr::get( + firOpBuilder.getI32Type(), indices[currentIndicesIdx]); + mlir::Type memberTy = recordType.getType(indices[currentIndicesIdx]); curValue = firOpBuilder.create( clauseLocation, firOpBuilder.getRefType(memberTy), curValue, - idxConst); + llvm::SmallVector{idxConst}); // Skip mapping and the subsequent load if we're the final member or not // a type with a descriptor such as a pointer/allocatable. If we're a diff --git a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp index 26f4aee21d8bd..82b11ad7db32a 100644 --- a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp +++ b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp @@ -348,8 +348,9 @@ class BoxedProcedurePass rewriter.setInsertionPoint(coor); auto toTy = typeConverter.convertType(ty); auto toBaseTy = typeConverter.convertType(baseTy); - rewriter.replaceOpWithNewOp(coor, toTy, coor.getRef(), - coor.getCoor(), toBaseTy); + rewriter.replaceOpWithNewOp( + coor, toTy, coor.getRef(), coor.getCoor(), toBaseTy, + coor.getFieldIndicesAttr()); opIsValid = false; } } else if (auto index = mlir::dyn_cast(op)) { diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index aaefe675730e1..a2743edd7844a 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -2653,57 +2653,78 @@ struct CoordinateOpConversion return mlir::isa(type); } - /// Check whether this form of `!fir.coordinate_of` is supported. These - /// additional checks are required, because we are not yet able to convert - /// all valid forms of `!fir.coordinate_of`. - /// TODO: Either implement the unsupported cases or extend the verifier - /// in FIROps.cpp instead. - static bool supportedCoordinate(mlir::Type type, mlir::ValueRange coors) { - const std::size_t numOfCoors = coors.size(); - std::size_t i = 0; - bool subEle = false; - bool ptrEle = false; - for (; i < numOfCoors; ++i) { - mlir::Value nxtOpnd = coors[i]; - if (auto arrTy = mlir::dyn_cast(type)) { - subEle = true; - i += arrTy.getDimension() - 1; - type = arrTy.getEleTy(); - } else if (auto recTy = mlir::dyn_cast(type)) { - subEle = true; - type = recTy.getType(getFieldNumber(recTy, nxtOpnd)); - } else if (auto tupTy = mlir::dyn_cast(type)) { - subEle = true; - type = tupTy.getType(getConstantIntValue(nxtOpnd)); - } else { - ptrEle = true; - } - } - if (ptrEle) - return (!subEle) && (numOfCoors == 1); - return subEle && (i >= numOfCoors); - } + // Helper structure to analyze the CoordinateOp path and decide if and how + // the GEP should be generated for it. + struct ShapeAnalysis { + bool hasKnownShape; + bool columnIsDeferred; + }; /// Walk the abstract memory layout and determine if the path traverses any /// array types with unknown shape. Return true iff all the array types have a /// constant shape along the path. - static bool arraysHaveKnownShape(mlir::Type type, mlir::ValueRange coors) { - for (std::size_t i = 0, sz = coors.size(); i < sz; ++i) { - mlir::Value nxtOpnd = coors[i]; + /// TODO: move the verification logic into the verifier. + static std::optional + arraysHaveKnownShape(mlir::Type type, fir::CoordinateOp coor) { + fir::CoordinateIndicesAdaptor indices = coor.getIndices(); + auto begin = indices.begin(); + bool hasKnownShape = true; + bool columnIsDeferred = false; + for (auto it = begin, end = indices.end(); it != end;) { if (auto arrTy = mlir::dyn_cast(type)) { - if (fir::sequenceWithNonConstantShape(arrTy)) - return false; - i += arrTy.getDimension() - 1; + bool addressingStart = (it == begin); + unsigned arrayDim = arrTy.getDimension(); + for (auto dimExtent : llvm::enumerate(arrTy.getShape())) { + if (dimExtent.value() == fir::SequenceType::getUnknownExtent()) { + hasKnownShape = false; + if (addressingStart && dimExtent.index() + 1 == arrayDim) { + // If this point was reached, the raws of the first array have + // constant extents. + columnIsDeferred = true; + } else { + // One of the array dimension that is not the column of the first + // array has dynamic extent. It will not possible to do + // code generation for the CoordinateOp if the base is not a + // fir.box containing the value of that extent. + return ShapeAnalysis{false, false}; + } + } + // There may be less operands than the array size if the + // fir.coordinate_of result is not an element but a sub-array. + if (it != end) + ++it; + } type = arrTy.getEleTy(); - } else if (auto strTy = mlir::dyn_cast(type)) { - type = strTy.getType(getFieldNumber(strTy, nxtOpnd)); + continue; + } + if (auto strTy = mlir::dyn_cast(type)) { + auto intAttr = llvm::dyn_cast(*it); + if (!intAttr) { + mlir::emitError(coor.getLoc(), + "expected field name in fir.coordinate_of"); + return std::nullopt; + } + type = strTy.getType(intAttr.getInt()); } else if (auto strTy = mlir::dyn_cast(type)) { - type = strTy.getType(getConstantIntValue(nxtOpnd)); - } else { - return true; + auto value = llvm::dyn_cast(*it); + if (!value) { + mlir::emitError( + coor.getLoc(), + "expected constant value to address tuple in fir.coordinate_of"); + return std::nullopt; + } + type = strTy.getType(getConstantIntValue(value)); + } else if (auto charType = mlir::dyn_cast(type)) { + // Addressing character in string. Fortran strings degenerate to arrays + // in LLVM, so they are handled like arrays of characters here. + if (charType.getLen() == fir::CharacterType::unknownLen()) + return ShapeAnalysis{false, true}; + type = fir::CharacterType::getSingleton(charType.getContext(), + charType.getFKind()); } + ++it; } - return true; + return ShapeAnalysis{hasKnownShape, columnIsDeferred}; } private: @@ -2754,9 +2775,11 @@ struct CoordinateOpConversion mlir::LLVM::IntegerOverflowFlags nsw = mlir::LLVM::IntegerOverflowFlags::nsw; - for (unsigned i = 1, last = operands.size(); i < last; ++i) { + int nextIndexValue = 1; + fir::CoordinateIndicesAdaptor indices = coor.getIndices(); + for (auto it = indices.begin(), end = indices.end(); it != end;) { if (auto arrTy = mlir::dyn_cast(cpnTy)) { - if (i != 1) + if (it != indices.begin()) TODO(loc, "fir.array nested inside other array and/or derived type"); // Applies byte strides from the box. Ignore lower bound from box // since fir.coordinate_of indexes are zero based. Lowering takes care @@ -2764,26 +2787,31 @@ struct CoordinateOpConversion // types and non contiguous arrays. auto idxTy = lowerTy().indexType(); mlir::Value off = genConstantIndex(loc, idxTy, rewriter, 0); - for (unsigned index = i, lastIndex = i + arrTy.getDimension(); - index < lastIndex; ++index) { - mlir::Value stride = getStrideFromBox(loc, boxTyPair, operands[0], - index - i, rewriter); + unsigned arrayDim = arrTy.getDimension(); + for (unsigned dim = 0; dim < arrayDim && it != end; ++dim, ++it) { + mlir::Value stride = + getStrideFromBox(loc, boxTyPair, operands[0], dim, rewriter); auto sc = rewriter.create( - loc, idxTy, operands[index], stride, nsw); + loc, idxTy, operands[nextIndexValue + dim], stride, nsw); off = rewriter.create(loc, idxTy, sc, off, nsw); } + nextIndexValue += arrayDim; resultAddr = rewriter.create( loc, llvmPtrTy, byteTy, resultAddr, llvm::ArrayRef{off}); - i += arrTy.getDimension() - 1; cpnTy = arrTy.getEleTy(); } else if (auto recTy = mlir::dyn_cast(cpnTy)) { - mlir::Value nxtOpnd = operands[i]; - cpnTy = recTy.getType(getFieldNumber(recTy, nxtOpnd)); + auto intAttr = llvm::dyn_cast(*it); + if (!intAttr) + return mlir::emitError(loc, + "expected field name in fir.coordinate_of"); + int fieldIndex = intAttr.getInt(); + ++it; + cpnTy = recTy.getType(fieldIndex); auto llvmRecTy = lowerTy().convertType(recTy); resultAddr = rewriter.create( loc, llvmPtrTy, llvmRecTy, resultAddr, - llvm::ArrayRef{0, nxtOpnd}); + llvm::ArrayRef{0, fieldIndex}); } else { fir::emitFatalError(loc, "unexpected type in coordinate_of"); } @@ -2801,92 +2829,71 @@ struct CoordinateOpConversion // Component Type mlir::Type cpnTy = fir::dyn_cast_ptrOrBoxEleTy(baseObjectTy); - bool hasSubdimension = hasSubDimensions(cpnTy); - bool columnIsDeferred = !hasSubdimension; - - if (!supportedCoordinate(cpnTy, operands.drop_front(1))) - TODO(loc, "unsupported combination of coordinate operands"); - - const bool hasKnownShape = - arraysHaveKnownShape(cpnTy, operands.drop_front(1)); - - // If only the column is `?`, then we can simply place the column value in - // the 0-th GEP position. - if (auto arrTy = mlir::dyn_cast(cpnTy)) { - if (!hasKnownShape) { - const unsigned sz = arrTy.getDimension(); - if (arraysHaveKnownShape(arrTy.getEleTy(), - operands.drop_front(1 + sz))) { - fir::SequenceType::ShapeRef shape = arrTy.getShape(); - bool allConst = true; - for (unsigned i = 0; i < sz - 1; ++i) { - if (shape[i] < 0) { - allConst = false; - break; - } - } - if (allConst) - columnIsDeferred = true; - } - } - } + + const std::optional shapeAnalysis = + arraysHaveKnownShape(cpnTy, coor); + if (!shapeAnalysis) + return mlir::failure(); if (fir::hasDynamicSize(fir::unwrapSequenceType(cpnTy))) return mlir::emitError( loc, "fir.coordinate_of with a dynamic element size is unsupported"); - if (hasKnownShape || columnIsDeferred) { + if (shapeAnalysis->hasKnownShape || shapeAnalysis->columnIsDeferred) { llvm::SmallVector offs; - if (hasKnownShape && hasSubdimension) { + if (shapeAnalysis->hasKnownShape) { offs.push_back(0); } + // Else, only the column is `?` and we can simply place the column value + // in the 0-th GEP position. + std::optional dims; llvm::SmallVector arrIdx; - for (std::size_t i = 1, sz = operands.size(); i < sz; ++i) { - mlir::Value nxtOpnd = operands[i]; - - if (!cpnTy) - return mlir::emitError(loc, "invalid coordinate/check failed"); - - // check if the i-th coordinate relates to an array - if (dims) { - arrIdx.push_back(nxtOpnd); - int dimsLeft = *dims; - if (dimsLeft > 1) { - dims = dimsLeft - 1; - continue; - } - cpnTy = mlir::cast(cpnTy).getElementType(); - // append array range in reverse (FIR arrays are column-major) - offs.append(arrIdx.rbegin(), arrIdx.rend()); - arrIdx.clear(); - dims.reset(); + int nextIndexValue = 1; + for (auto index : coor.getIndices()) { + if (auto intAttr = llvm::dyn_cast(index)) { + // Addressing derived type component. + auto recordType = llvm::dyn_cast(cpnTy); + if (!recordType) + return mlir::emitError( + loc, + "fir.coordinate base type is not consistent with operands"); + int fieldId = intAttr.getInt(); + cpnTy = recordType.getType(fieldId); + offs.push_back(fieldId); continue; } - if (auto arrTy = mlir::dyn_cast(cpnTy)) { - int d = arrTy.getDimension() - 1; - if (d > 0) { - dims = d; - arrIdx.push_back(nxtOpnd); - continue; + // Value index (addressing array, tuple, or complex part). + mlir::Value indexValue = operands[nextIndexValue++]; + if (auto tupTy = mlir::dyn_cast(cpnTy)) { + cpnTy = tupTy.getType(getConstantIntValue(indexValue)); + offs.push_back(indexValue); + } else { + if (!dims) { + if (auto arrayType = llvm::dyn_cast(cpnTy)) { + // Starting addressing array or array component. + dims = arrayType.getDimension(); + cpnTy = arrayType.getElementType(); + } + } + if (dims) { + arrIdx.push_back(indexValue); + if (--(*dims) == 0) { + // Append array range in reverse (FIR arrays are column-major). + offs.append(arrIdx.rbegin(), arrIdx.rend()); + arrIdx.clear(); + dims.reset(); + } + } else { + offs.push_back(indexValue); } - cpnTy = mlir::cast(cpnTy).getElementType(); - offs.push_back(nxtOpnd); - continue; } - - // check if the i-th coordinate relates to a field - if (auto recTy = mlir::dyn_cast(cpnTy)) - cpnTy = recTy.getType(getFieldNumber(recTy, nxtOpnd)); - else if (auto tupTy = mlir::dyn_cast(cpnTy)) - cpnTy = tupTy.getType(getConstantIntValue(nxtOpnd)); - else - cpnTy = nullptr; - - offs.push_back(nxtOpnd); } - if (dims) + // It is possible the fir.coordinate_of result is a sub-array, in which + // case there may be some "unfinished" array indices to reverse and push. + if (!arrIdx.empty()) offs.append(arrIdx.rbegin(), arrIdx.rend()); + mlir::Value base = operands[0]; mlir::Value retval = genGEP(loc, llvmObjectTy, rewriter, base, offs); rewriter.replaceOp(coor, retval); diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 7e50622db08c9..7efb733eb565c 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -1460,9 +1460,89 @@ llvm::LogicalResult fir::ConvertOp::verify() { // CoordinateOp //===----------------------------------------------------------------------===// +void fir::CoordinateOp::build(mlir::OpBuilder &builder, + mlir::OperationState &result, + mlir::Type resultType, mlir::Value ref, + mlir::ValueRange coor) { + llvm::SmallVector fieldIndices; + llvm::SmallVector dynamicIndices; + bool anyField = false; + for (mlir::Value index : coor) { + if (auto field = index.getDefiningOp()) { + auto recTy = mlir::cast(field.getOnType()); + fieldIndices.push_back(recTy.getFieldIndex(field.getFieldId())); + anyField = true; + } else { + fieldIndices.push_back(fir::CoordinateOp::kDynamicIndex); + dynamicIndices.push_back(index); + } + } + auto typeAttr = mlir::TypeAttr::get(ref.getType()); + if (anyField) { + build(builder, result, resultType, ref, dynamicIndices, typeAttr, + builder.getDenseI32ArrayAttr(fieldIndices)); + } else { + build(builder, result, resultType, ref, dynamicIndices, typeAttr, nullptr); + } +} + +void fir::CoordinateOp::build(mlir::OpBuilder &builder, + mlir::OperationState &result, + mlir::Type resultType, mlir::Value ref, + llvm::ArrayRef coor) { + llvm::SmallVector fieldIndices; + llvm::SmallVector dynamicIndices; + bool anyField = false; + for (fir::IntOrValue index : coor) { + llvm::TypeSwitch(index) + .Case([&](mlir::IntegerAttr intAttr) { + fieldIndices.push_back(intAttr.getInt()); + anyField = true; + }) + .Case([&](mlir::Value value) { + dynamicIndices.push_back(value); + fieldIndices.push_back(fir::CoordinateOp::kDynamicIndex); + }); + } + auto typeAttr = mlir::TypeAttr::get(ref.getType()); + if (anyField) { + build(builder, result, resultType, ref, dynamicIndices, typeAttr, + builder.getDenseI32ArrayAttr(fieldIndices)); + } else { + build(builder, result, resultType, ref, dynamicIndices, typeAttr, nullptr); + } +} + void fir::CoordinateOp::print(mlir::OpAsmPrinter &p) { - p << ' ' << getRef() << ", " << getCoor(); - p.printOptionalAttrDict((*this)->getAttrs(), /*elideAttrs=*/{"baseType"}); + p << ' ' << getRef(); + if (!getFieldIndicesAttr()) { + p << ", " << getCoor(); + } else { + mlir::Type eleTy = fir::getFortranElementType(getRef().getType()); + for (auto index : getIndices()) { + p << ", "; + llvm::TypeSwitch(index) + .Case([&](mlir::IntegerAttr intAttr) { + if (auto recordType = llvm::dyn_cast(eleTy)) { + int fieldId = intAttr.getInt(); + if (fieldId < static_cast(recordType.getNumFields())) { + auto nameAndType = recordType.getTypeList()[fieldId]; + p << std::get(nameAndType); + eleTy = fir::getFortranElementType( + std::get(nameAndType)); + return; + } + } + // Invalid index, still print it so that invalid IR can be + // investigated. + p << intAttr; + }) + .Case([&](mlir::Value value) { p << value; }); + } + } + p.printOptionalAttrDict( + (*this)->getAttrs(), + /*elideAttrs=*/{getBaseTypeAttrName(), getFieldIndicesAttrName()}); p << " : "; p.printFunctionalType(getOperandTypes(), (*this)->getResultTypes()); } @@ -1473,8 +1553,24 @@ mlir::ParseResult fir::CoordinateOp::parse(mlir::OpAsmParser &parser, if (parser.parseOperand(memref) || parser.parseComma()) return mlir::failure(); llvm::SmallVector coorOperands; - if (parser.parseOperandList(coorOperands)) - return mlir::failure(); + llvm::SmallVector> fieldNames; + llvm::SmallVector fieldIndices; + while (true) { + llvm::StringRef fieldName; + if (mlir::succeeded(parser.parseOptionalKeyword(&fieldName))) { + fieldNames.push_back({fieldName, static_cast(fieldIndices.size())}); + // Actual value will be computed later when base type has been parsed. + fieldIndices.push_back(0); + } else { + mlir::OpAsmParser::UnresolvedOperand index; + if (parser.parseOperand(index)) + return mlir::failure(); + fieldIndices.push_back(fir::CoordinateOp::kDynamicIndex); + coorOperands.push_back(index); + } + if (mlir::failed(parser.parseOptionalComma())) + break; + } llvm::SmallVector allOperands; allOperands.push_back(memref); allOperands.append(coorOperands.begin(), coorOperands.end()); @@ -1486,7 +1582,27 @@ mlir::ParseResult fir::CoordinateOp::parse(mlir::OpAsmParser &parser, result.operands) || parser.addTypesToList(funcTy.getResults(), result.types)) return mlir::failure(); - result.addAttribute("baseType", mlir::TypeAttr::get(funcTy.getInput(0))); + result.addAttribute(getBaseTypeAttrName(result.name), + mlir::TypeAttr::get(funcTy.getInput(0))); + if (!fieldNames.empty()) { + mlir::Type eleTy = fir::getFortranElementType(funcTy.getInput(0)); + for (auto [fieldName, operandPosition] : fieldNames) { + auto recTy = llvm::dyn_cast(eleTy); + if (!recTy) + return parser.emitError( + loc, "base must be a derived type when field name appears"); + unsigned fieldNum = recTy.getFieldIndex(fieldName); + if (fieldNum > recTy.getNumFields()) + return parser.emitError(loc) + << "field '" << fieldName + << "' is not a component or subcomponent of the base type"; + fieldIndices[operandPosition] = fieldNum; + eleTy = fir::getFortranElementType( + std::get(recTy.getTypeList()[fieldNum])); + } + result.addAttribute(getFieldIndicesAttrName(result.name), + parser.getBuilder().getDenseI32ArrayAttr(fieldIndices)); + } return mlir::success(); } @@ -1567,6 +1683,10 @@ llvm::LogicalResult fir::CoordinateOp::verify() { return mlir::success(); } +fir::CoordinateIndicesAdaptor fir::CoordinateOp::getIndices() { + return CoordinateIndicesAdaptor(getFieldIndicesAttr(), getCoor()); +} + //===----------------------------------------------------------------------===// // DispatchOp //===----------------------------------------------------------------------===// diff --git a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp index bf94166edc079..b0014a3aced6b 100644 --- a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp +++ b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp @@ -15,6 +15,8 @@ #include "mlir/Transforms/DialectConversion.h" #include +#include +#include namespace flangomp { #define GEN_PASS_DEF_GENERICLOOPCONVERSIONPASS @@ -58,7 +60,7 @@ class GenericLoopConversionPattern if (teamsLoopCanBeParallelFor(loopOp)) rewriteToDistributeParallelDo(loopOp, rewriter); else - rewriteToDistrbute(loopOp, rewriter); + rewriteToDistribute(loopOp, rewriter); break; } @@ -77,9 +79,6 @@ class GenericLoopConversionPattern if (loopOp.getOrder()) return todo("order"); - if (!loopOp.getReductionVars().empty()) - return todo("reduction"); - return mlir::success(); } @@ -168,7 +167,7 @@ class GenericLoopConversionPattern case ClauseBindKind::Parallel: return rewriteToWsloop(loopOp, rewriter); case ClauseBindKind::Teams: - return rewriteToDistrbute(loopOp, rewriter); + return rewriteToDistribute(loopOp, rewriter); case ClauseBindKind::Thread: return rewriteToSimdLoop(loopOp, rewriter); } @@ -211,8 +210,9 @@ class GenericLoopConversionPattern loopOp, rewriter); } - void rewriteToDistrbute(mlir::omp::LoopOp loopOp, - mlir::ConversionPatternRewriter &rewriter) const { + void rewriteToDistribute(mlir::omp::LoopOp loopOp, + mlir::ConversionPatternRewriter &rewriter) const { + assert(loopOp.getReductionVars().empty()); rewriteToSingleWrapperOp(loopOp, rewriter); } @@ -246,6 +246,12 @@ class GenericLoopConversionPattern Fortran::common::openmp::EntryBlockArgs args; args.priv.vars = clauseOps.privateVars; + if constexpr (!std::is_same_v) { + populateReductionClauseOps(loopOp, clauseOps); + args.reduction.vars = clauseOps.reductionVars; + } + auto wrapperOp = rewriter.create(loopOp.getLoc(), clauseOps); mlir::Block *opBlock = genEntryBlock(rewriter, args, wrapperOp.getRegion()); @@ -275,8 +281,7 @@ class GenericLoopConversionPattern auto parallelOp = rewriter.create(loopOp.getLoc(), parallelClauseOps); - mlir::Block *parallelBlock = - genEntryBlock(rewriter, parallelArgs, parallelOp.getRegion()); + genEntryBlock(rewriter, parallelArgs, parallelOp.getRegion()); parallelOp.setComposite(true); rewriter.setInsertionPoint( rewriter.create(loopOp.getLoc())); @@ -288,20 +293,54 @@ class GenericLoopConversionPattern rewriter.createBlock(&distributeOp.getRegion()); mlir::omp::WsloopOperands wsloopClauseOps; + populateReductionClauseOps(loopOp, wsloopClauseOps); + Fortran::common::openmp::EntryBlockArgs wsloopArgs; + wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars; + auto wsloopOp = rewriter.create(loopOp.getLoc(), wsloopClauseOps); wsloopOp.setComposite(true); - rewriter.createBlock(&wsloopOp.getRegion()); + genEntryBlock(rewriter, wsloopArgs, wsloopOp.getRegion()); mlir::IRMapping mapper; - mlir::Block &loopBlock = *loopOp.getRegion().begin(); - for (auto [loopOpArg, parallelOpArg] : llvm::zip_equal( - loopBlock.getArguments(), parallelBlock->getArguments())) + auto loopBlockInterface = + llvm::cast(*loopOp); + auto parallelBlockInterface = + llvm::cast(*parallelOp); + auto wsloopBlockInterface = + llvm::cast(*wsloopOp); + + for (auto [loopOpArg, parallelOpArg] : + llvm::zip_equal(loopBlockInterface.getPrivateBlockArgs(), + parallelBlockInterface.getPrivateBlockArgs())) mapper.map(loopOpArg, parallelOpArg); + for (auto [loopOpArg, wsloopOpArg] : + llvm::zip_equal(loopBlockInterface.getReductionBlockArgs(), + wsloopBlockInterface.getReductionBlockArgs())) + mapper.map(loopOpArg, wsloopOpArg); + rewriter.clone(*loopOp.begin(), mapper); } + + void + populateReductionClauseOps(mlir::omp::LoopOp loopOp, + mlir::omp::ReductionClauseOps &clauseOps) const { + clauseOps.reductionMod = loopOp.getReductionModAttr(); + clauseOps.reductionVars = loopOp.getReductionVars(); + + std::optional reductionSyms = loopOp.getReductionSyms(); + if (reductionSyms) + clauseOps.reductionSyms.assign(reductionSyms->begin(), + reductionSyms->end()); + + std::optional> reductionByref = + loopOp.getReductionByref(); + if (reductionByref) + clauseOps.reductionByref.assign(reductionByref->begin(), + reductionByref->end()); + } }; class GenericLoopConversionPass diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp index beea7543e54b3..ab4dc582d5804 100644 --- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp +++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp @@ -579,7 +579,7 @@ class MapInfoFinalizationPass if (!shouldMapField) continue; - int64_t fieldIdx = recordType.getFieldIndex(field); + int32_t fieldIdx = recordType.getFieldIndex(field); bool alreadyMapped = [&]() { if (op.getMembersIndexAttr()) for (auto indexList : op.getMembersIndexAttr()) { @@ -597,12 +597,11 @@ class MapInfoFinalizationPass continue; builder.setInsertionPoint(op); - mlir::Value fieldIdxVal = builder.createIntegerConstant( - op.getLoc(), mlir::IndexType::get(builder.getContext()), - fieldIdx); + fir::IntOrValue idxConst = + mlir::IntegerAttr::get(builder.getI32Type(), fieldIdx); auto fieldCoord = builder.create( op.getLoc(), builder.getRefType(memTy), op.getVarPtr(), - fieldIdxVal); + llvm::SmallVector{idxConst}); fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr( builder, fieldCoord, /*isOptional=*/false, op.getLoc()); diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index e396ece303103..4042d7504396c 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -535,9 +535,6 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, if (actualLastSymbol) { actualLastSymbol = &ResolveAssociations(*actualLastSymbol); } - const ObjectEntityDetails *actualLastObject{actualLastSymbol - ? actualLastSymbol->detailsIf() - : nullptr}; int actualRank{actualType.Rank()}; if (dummy.type.attrs().test( characteristics::TypeAndShape::Attr::AssumedShape)) { @@ -689,6 +686,9 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, } } } + const ObjectEntityDetails *actualLastObject{actualLastSymbol + ? actualLastSymbol->detailsIf() + : nullptr}; if (actualLastObject && actualLastObject->IsCoarray() && dummy.attrs.test(characteristics::DummyDataObject::Attr::Allocatable) && dummy.intent == common::Intent::Out && @@ -793,21 +793,21 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, } } else if (actualIsNull) { if (dummyIsOptional) { - } else if (dummy.intent == common::Intent::In) { - // Extension (Intel, NAG, XLF): a NULL() pointer is an acceptable - // actual argument for an INTENT(IN) allocatable dummy, and it - // is treated as an unassociated allocatable. - if (context.ShouldWarn( - common::LanguageFeature::NullActualForAllocatable)) { - messages.Say(common::LanguageFeature::NullActualForAllocatable, - "Allocatable %s is associated with a null pointer"_port_en_US, - dummyName); - } - } else { + } else if (dummy.intent == common::Intent::Default && + context.ShouldWarn( + common::UsageWarning::NullActualForDefaultIntentAllocatable)) { messages.Say( - "A null pointer may not be associated with allocatable %s without INTENT(IN)"_err_en_US, + "A null pointer should not be associated with allocatable %s without INTENT(IN)"_warn_en_US, + dummyName); + } else if (dummy.intent == common::Intent::In && + context.ShouldWarn( + common::LanguageFeature::NullActualForAllocatable)) { + messages.Say(common::LanguageFeature::NullActualForAllocatable, + "Allocatable %s is associated with a null pointer"_port_en_US, dummyName); } + // INTENT(OUT) and INTENT(IN OUT) cases are caught elsewhere as being + // undefinable actual arguments. } else { messages.Say( "ALLOCATABLE %s must be associated with an ALLOCATABLE actual argument"_err_en_US, @@ -1049,8 +1049,8 @@ static void CheckProcedureArg(evaluate::ActualArgument &arg, SemanticsContext &context, bool ignoreImplicitVsExplicit) { evaluate::FoldingContext &foldingContext{context.foldingContext()}; parser::ContextualMessages &messages{foldingContext.messages()}; - auto restorer{ - messages.SetLocation(arg.sourceLocation().value_or(messages.at()))}; + parser::CharBlock location{arg.sourceLocation().value_or(messages.at())}; + auto restorer{messages.SetLocation(location)}; const characteristics::Procedure &interface { dummy.procedure.value() }; if (const auto *expr{arg.UnwrapExpr()}) { bool dummyIsPointer{ @@ -1175,22 +1175,30 @@ static void CheckProcedureArg(evaluate::ActualArgument &arg, dummyName); } } - if (dummyIsPointer && dummy.intent != common::Intent::In) { - const Symbol *last{GetLastSymbol(*expr)}; - if (last && IsProcedurePointer(*last)) { - if (dummy.intent != common::Intent::Default && - IsIntentIn(last->GetUltimate())) { // 19.6.8 - messages.Say( - "Actual argument associated with procedure pointer %s may not be INTENT(IN)"_err_en_US, - dummyName); - } - } else if (!(dummy.intent == common::Intent::Default && - IsNullProcedurePointer(*expr))) { - // 15.5.2.9(5) -- dummy procedure POINTER - // Interface compatibility has already been checked above + if (dummyIsPointer) { + if (dummy.intent == common::Intent::In) { + // need not be definable, can be a target + } else if (!IsProcedurePointer(*expr)) { messages.Say( - "Actual argument associated with procedure pointer %s must be a pointer unless INTENT(IN)"_err_en_US, + "Actual argument associated with procedure pointer %s is not a procedure pointer"_err_en_US, dummyName); + } else if (dummy.intent == common::Intent::Default) { + // ok, needs to be definable only if defined at run time + } else { + DefinabilityFlags flags{DefinabilityFlag::PointerDefinition}; + if (dummy.intent != common::Intent::Out) { + flags.set(DefinabilityFlag::DoNotNoteDefinition); + } + if (auto whyNot{WhyNotDefinable( + location, context.FindScope(location), flags, *expr)}) { + if (auto *msg{messages.Say( + "Actual argument associated with INTENT(%s) procedure pointer %s is not definable"_err_en_US, + dummy.intent == common::Intent::Out ? "OUT" : "IN OUT", + dummyName)}) { + msg->Attach( + std::move(whyNot->set_severity(parser::Severity::Because))); + } + } } } } else { @@ -1292,19 +1300,24 @@ static void CheckExplicitInterfaceArg(evaluate::ActualArgument &arg, } else if (object.attrs.test(characteristics::DummyDataObject:: Attr::Allocatable) && evaluate::IsNullPointer(*expr)) { - if (object.intent == common::Intent::In) { - // Extension (Intel, NAG, XLF); see CheckExplicitDataArg. - if (context.ShouldWarn(common::LanguageFeature:: - NullActualForAllocatable)) { - messages.Say( - common::LanguageFeature::NullActualForAllocatable, - "Allocatable %s is associated with NULL()"_port_en_US, - dummyName); - } - } else { + if (object.intent == common::Intent::Out || + object.intent == common::Intent::InOut) { messages.Say( - "NULL() actual argument '%s' may not be associated with allocatable %s without INTENT(IN)"_err_en_US, + "NULL() actual argument '%s' may not be associated with allocatable dummy argument %s that is INTENT(OUT) or INTENT(IN OUT)"_err_en_US, + expr->AsFortran(), dummyName); + } else if (object.intent == common::Intent::Default && + context.ShouldWarn(common::UsageWarning:: + NullActualForDefaultIntentAllocatable)) { + messages.Say(common::UsageWarning:: + NullActualForDefaultIntentAllocatable, + "NULL() actual argument '%s' should not be associated with allocatable dummy argument %s without INTENT(IN)"_warn_en_US, expr->AsFortran(), dummyName); + } else if (context.ShouldWarn(common::LanguageFeature:: + NullActualForAllocatable)) { + messages.Say( + common::LanguageFeature::NullActualForAllocatable, + "Allocatable %s is associated with %s"_port_en_US, + dummyName, expr->AsFortran()); } } else { messages.Say( @@ -1478,6 +1491,8 @@ static void CheckAssociated(evaluate::ActualArguments &arguments, } if (const auto &targetArg{arguments[1]}) { // The standard requires that the TARGET= argument, when present, + // be type compatible with the POINTER= for a data pointer. In + // the case of procedure pointers, the standard requires that it // be a valid RHS for a pointer assignment that has the POINTER= // argument as its LHS. Some popular compilers misinterpret this // requirement more strongly than necessary, and actually validate @@ -1584,7 +1599,8 @@ static void CheckAssociated(evaluate::ActualArguments &arguments, } if (const auto pointerType{pointerArg->GetType()}) { if (const auto targetType{targetArg->GetType()}) { - ok = pointerType->IsTkCompatibleWith(*targetType); + ok = pointerType->IsTkCompatibleWith(*targetType) || + targetType->IsTkCompatibleWith(*pointerType); } } } else { diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index bf4dc16a15b4a..c30c15a290b84 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -33,6 +33,8 @@ using characteristics::DummyProcedure; using characteristics::FunctionResult; using characteristics::Procedure; +class DistinguishabilityHelper; + class CheckHelper { public: explicit CheckHelper(SemanticsContext &c) : context_{c} {} @@ -89,6 +91,8 @@ class CheckHelper { const SourceName &, const Symbol &, const Procedure &, std::size_t); bool CheckDefinedAssignment(const Symbol &, const Procedure &); bool CheckDefinedAssignmentArg(const Symbol &, const DummyArgument &, int); + void CollectSpecifics( + DistinguishabilityHelper &, const Symbol &, const GenericDetails &); void CheckSpecifics(const Symbol &, const GenericDetails &); void CheckEquivalenceSet(const EquivalenceSet &); void CheckEquivalenceObject(const EquivalenceObject &); @@ -161,8 +165,8 @@ class CheckHelper { void CheckDioDummyIsDefaultInteger(const Symbol &, const Symbol &); void CheckDioDummyIsScalar(const Symbol &, const Symbol &); void CheckDioDummyAttrs(const Symbol &, const Symbol &, Attr); - void CheckDioDtvArg( - const Symbol &, const Symbol *, common::DefinedIo, const Symbol &); + void CheckDioDtvArg(const Symbol &proc, const Symbol &subp, const Symbol *arg, + common::DefinedIo, const Symbol &generic); void CheckGenericVsIntrinsic(const Symbol &, const GenericDetails &); void CheckDefaultIntegerArg(const Symbol &, const Symbol *, Attr); void CheckDioAssumedLenCharacterArg( @@ -683,20 +687,10 @@ void CheckHelper::CheckObjectEntity( const DeclTypeSpec *type{details.type()}; const DerivedTypeSpec *derived{type ? type->AsDerived() : nullptr}; bool isComponent{symbol.owner().IsDerivedType()}; - if (details.coshape().empty()) { // not a coarray - if (!isComponent && !IsPointer(symbol) && derived) { - if (IsEventTypeOrLockType(derived)) { - messages_.Say( - "Variable '%s' with EVENT_TYPE or LOCK_TYPE must be a coarray"_err_en_US, - symbol.name()); - } else if (auto component{FindEventOrLockPotentialComponent( - *derived, /*ignoreCoarrays=*/true)}) { - messages_.Say( - "Variable '%s' with EVENT_TYPE or LOCK_TYPE potential component '%s' must be a coarray"_err_en_US, - symbol.name(), component.BuildResultDesignatorName()); - } - } - } else { // it's a coarray + const Symbol *commonBlock{FindCommonBlockContaining(symbol)}; + bool isLocalVariable{!commonBlock && !isComponent && !details.isDummy() && + symbol.owner().kind() != Scope::Kind::OtherConstruct}; + if (int corank{evaluate::GetCorank(symbol)}; corank > 0) { // it's a coarray bool isDeferredCoshape{details.coshape().CanBeDeferredShape()}; if (IsAllocatable(symbol)) { if (!isDeferredCoshape) { // C827 @@ -726,6 +720,46 @@ void CheckHelper::CheckObjectEntity( messages_.Say("Coarray '%s' may not be an assumed-rank array"_err_en_US, symbol.name()); } + if (IsNamedConstant(symbol)) { + messages_.Say( + "Coarray '%s' may not be a named constant"_err_en_US, symbol.name()); + } + if (IsFunctionResult(symbol)) { + messages_.Say("Function result may not be a coarray"_err_en_US); + } else if (commonBlock) { + messages_.Say("Coarray '%s' may not be in COMMON block '/%s/'"_err_en_US, + symbol.name(), commonBlock->name()); + } else if (isLocalVariable && !IsAllocatableOrPointer(symbol) && + !IsSaved(symbol)) { + messages_.Say("Local coarray must have the SAVE attribute"_err_en_US); + } + for (int j{0}; j < corank; ++j) { + if (auto lcbv{evaluate::ToInt64(evaluate::Fold( + context().foldingContext(), evaluate::GetLCOBOUND(symbol, j)))}) { + if (auto ucbv{ + evaluate::ToInt64(evaluate::Fold(context().foldingContext(), + evaluate::GetUCOBOUND(symbol, j)))}) { + if (ucbv < lcbv) { + messages_.Say( + "Cobounds %jd:%jd of codimension %d produce an empty coarray"_err_en_US, + std::intmax_t{*lcbv}, std::intmax_t{*ucbv}, j + 1); + } + } + } + } + } else { // not a coarray + if (!isComponent && !IsPointer(symbol) && derived) { + if (IsEventTypeOrLockType(derived)) { + messages_.Say( + "Variable '%s' with EVENT_TYPE or LOCK_TYPE must be a coarray"_err_en_US, + symbol.name()); + } else if (auto component{FindEventOrLockPotentialComponent( + *derived, /*ignoreCoarrays=*/true)}) { + messages_.Say( + "Variable '%s' with EVENT_TYPE or LOCK_TYPE potential component '%s' must be a coarray"_err_en_US, + symbol.name(), component.BuildResultDesignatorName()); + } + } } if (details.isDummy()) { if (IsIntentOut(symbol)) { @@ -926,6 +960,42 @@ void CheckHelper::CheckObjectEntity( symbol.name()); } + if (derived) { + bool isUnsavedLocal{ + isLocalVariable && !IsAllocatable(symbol) && !IsSaved(symbol)}; + if (IsFunctionResult(symbol) || IsPointer(symbol) || + evaluate::IsCoarray(symbol) || isUnsavedLocal) { + if (auto badPotential{FindCoarrayPotentialComponent(*derived)}) { + if (IsFunctionResult(symbol)) { // F'2023 C825 + SayWithDeclaration(*badPotential, + "Function result '%s' may not have a coarray potential component '%s'"_err_en_US, + symbol.name(), badPotential.BuildResultDesignatorName()); + } else if (IsPointer(symbol)) { // F'2023 C825 + SayWithDeclaration(*badPotential, + "Pointer '%s' may not have a coarray potential component '%s'"_err_en_US, + symbol.name(), badPotential.BuildResultDesignatorName()); + } else if (evaluate::IsCoarray(symbol)) { // F'2023 C825 + SayWithDeclaration(*badPotential, + "Coarray '%s' may not have a coarray potential component '%s'"_err_en_US, + symbol.name(), badPotential.BuildResultDesignatorName()); + } else if (isUnsavedLocal) { // F'2023 C826 + SayWithDeclaration(*badPotential, + "Local variable '%s' without the SAVE attribute may not have a coarray potential subobject component '%s'"_err_en_US, + symbol.name(), badPotential.BuildResultDesignatorName()); + } else { + DIE("caught unexpected bad coarray potential component"); + } + } + } else if (isComponent && (IsAllocatable(symbol) || symbol.Rank() > 0)) { + if (auto badUltimate{FindCoarrayUltimateComponent(*derived)}) { + // TODO: still an error in F'2023? + SayWithDeclaration(*badUltimate, + "Allocatable or array component '%s' may not have a coarray ultimate component '%s'"_err_en_US, + symbol.name(), badUltimate.BuildResultDesignatorName()); + } + } + } + // Check CUDA attributes and special circumstances of being in device // subprograms const Scope &progUnit{GetProgramUnitContaining(symbol)}; @@ -1496,6 +1566,14 @@ void CheckHelper::CheckSubprogram( messages_.Say(details.result().name(), "A function interface may not declare an assumed-length CHARACTER(*) result"_err_en_US); } + if (symbol.attrs().test(Attr::ABSTRACT) && + (symbol.name() == "integer" || symbol.name() == "unsigned" || + symbol.name() == "real" || symbol.name() == "complex" || + symbol.name() == "character" || + symbol.name() == "logical")) { // F'2023 C1503 + messages_.Say( + "An ABSTRACT interface may not have the same name as an intrinsic type"_err_en_US); + } } CheckExternal(symbol); CheckModuleProcedureDef(symbol); @@ -1857,10 +1935,9 @@ void CheckHelper::CheckGeneric( } // Check that the specifics of this generic are distinguishable from each other -void CheckHelper::CheckSpecifics( +void CheckHelper::CollectSpecifics(DistinguishabilityHelper &helper, const Symbol &generic, const GenericDetails &details) { GenericKind kind{details.kind()}; - DistinguishabilityHelper helper{context_}; for (const Symbol &specific : details.specificProcs()) { if (specific.attrs().test(Attr::ABSTRACT)) { if (auto *msg{messages_.Say(generic.name(), @@ -1915,6 +1992,22 @@ void CheckHelper::CheckSpecifics( } } } + if (const Scope * parent{generic.owner().GetDerivedTypeParent()}) { + if (const Symbol * inherited{parent->FindComponent(generic.name())}) { + if (IsAccessible(*inherited, generic.owner().parent())) { + if (const auto *details{inherited->detailsIf()}) { + // Include specifics of inherited generic of the same name, too + CollectSpecifics(helper, *inherited, *details); + } + } + } + } +} + +void CheckHelper::CheckSpecifics( + const Symbol &generic, const GenericDetails &details) { + DistinguishabilityHelper helper{context_}; + CollectSpecifics(helper, generic, details); helper.Check(generic.owner()); } @@ -3161,10 +3254,6 @@ parser::Messages CheckHelper::WhyNotInteroperableFunctionResult( msgs.Say(symbol.name(), "Interoperable function result must be scalar"_err_en_US); } - if (symbol.Corank()) { - msgs.Say(symbol.name(), - "Interoperable function result may not be a coarray"_err_en_US); - } return msgs; } @@ -3336,13 +3425,20 @@ void CheckHelper::CheckAlreadySeenDefinedIo(const DerivedTypeSpec &derivedType, return; } if (const Scope * dtScope{derivedType.scope()}) { - if (auto iter{dtScope->find(generic.name())}; iter != dtScope->end()) { + if (auto iter{dtScope->find(generic.name())}; iter != dtScope->end() && + IsAccessible(*iter->second, generic.owner())) { for (auto specRef : iter->second->get().specificProcs()) { - const Symbol &specific{specRef->get().symbol()}; - if (specific == proc) { // unambiguous, accept - continue; + const Symbol *specific{&specRef->get().symbol()}; + if (specific == &proc) { + continue; // unambiguous, accept + } + if (const auto *peDetails{specific->detailsIf()}) { + specific = peDetails->procInterface(); + if (!specific) { + continue; + } } - if (const auto *specDT{GetDtvArgDerivedType(specific)}; + if (const auto *specDT{GetDtvArgDerivedType(*specific)}; specDT && evaluate::AreSameDerivedType(derivedType, *specDT)) { SayWithDeclaration(*specRef, proc.name(), "Derived type '%s' has conflicting type-bound input/output procedure '%s'"_err_en_US, @@ -3354,11 +3450,11 @@ void CheckHelper::CheckAlreadySeenDefinedIo(const DerivedTypeSpec &derivedType, } } -void CheckHelper::CheckDioDummyIsDerived(const Symbol &subp, const Symbol &arg, +void CheckHelper::CheckDioDummyIsDerived(const Symbol &proc, const Symbol &arg, common::DefinedIo ioKind, const Symbol &generic) { if (const DeclTypeSpec *type{arg.GetType()}) { if (const DerivedTypeSpec *derivedType{type->AsDerived()}) { - CheckAlreadySeenDefinedIo(*derivedType, ioKind, subp, generic); + CheckAlreadySeenDefinedIo(*derivedType, ioKind, proc, generic); bool isPolymorphic{type->IsPolymorphic()}; if (isPolymorphic != IsExtensibleType(derivedType)) { messages_.Say(arg.name(), @@ -3368,8 +3464,7 @@ void CheckHelper::CheckDioDummyIsDerived(const Symbol &subp, const Symbol &arg, } } else { messages_.Say(arg.name(), - "Dummy argument '%s' of a defined input/output procedure must have a" - " derived type"_err_en_US, + "Dummy argument '%s' of a defined input/output procedure must have a derived type"_err_en_US, arg.name()); } } @@ -3385,25 +3480,23 @@ void CheckHelper::CheckDioDummyIsDefaultInteger( } } messages_.Say(arg.name(), - "Dummy argument '%s' of a defined input/output procedure" - " must be an INTEGER of default KIND"_err_en_US, + "Dummy argument '%s' of a defined input/output procedure must be an INTEGER of default KIND"_err_en_US, arg.name()); } void CheckHelper::CheckDioDummyIsScalar(const Symbol &subp, const Symbol &arg) { if (arg.Rank() > 0 || arg.Corank() > 0) { messages_.Say(arg.name(), - "Dummy argument '%s' of a defined input/output procedure" - " must be a scalar"_err_en_US, + "Dummy argument '%s' of a defined input/output procedure must be a scalar"_err_en_US, arg.name()); } } -void CheckHelper::CheckDioDtvArg(const Symbol &subp, const Symbol *arg, - common::DefinedIo ioKind, const Symbol &generic) { +void CheckHelper::CheckDioDtvArg(const Symbol &proc, const Symbol &subp, + const Symbol *arg, common::DefinedIo ioKind, const Symbol &generic) { // Dtv argument looks like: dtv-type-spec, INTENT(INOUT) :: dtv if (CheckDioDummyIsData(subp, arg, 0)) { - CheckDioDummyIsDerived(subp, *arg, ioKind, generic); + CheckDioDummyIsDerived(proc, *arg, ioKind, generic); CheckDioDummyAttrs(subp, *arg, ioKind == common::DefinedIo::ReadFormatted || ioKind == common::DefinedIo::ReadUnformatted @@ -3471,8 +3564,7 @@ void CheckHelper::CheckDioAssumedLenCharacterArg(const Symbol &subp, context_.defaultKinds().GetDefaultKind( TypeCategory::Character))) { messages_.Say(arg->name(), - "Dummy argument '%s' of a defined input/output procedure" - " must be assumed-length CHARACTER of default kind"_err_en_US, + "Dummy argument '%s' of a defined input/output procedure must be assumed-length CHARACTER of default kind"_err_en_US, arg->name()); } } @@ -3485,10 +3577,9 @@ void CheckHelper::CheckDioVlistArg( CheckDioDummyIsDefaultInteger(subp, *arg); CheckDioDummyAttrs(subp, *arg, Attr::INTENT_IN); const auto *objectDetails{arg->detailsIf()}; - if (!objectDetails || !objectDetails->shape().CanBeDeferredShape()) { + if (!objectDetails || !objectDetails->shape().CanBeAssumedShape()) { messages_.Say(arg->name(), - "Dummy argument '%s' of a defined input/output procedure must be" - " deferred shape"_err_en_US, + "Dummy argument '%s' of a defined input/output procedure must be assumed shape"_err_en_US, arg->name()); } } @@ -3503,8 +3594,7 @@ void CheckHelper::CheckDioArgCount( : 4)}; if (argCount != requiredArgCount) { SayWithDeclaration(subp, - "Defined input/output procedure '%s' must have" - " %d dummy arguments rather than %d"_err_en_US, + "Defined input/output procedure '%s' must have %d dummy arguments rather than %d"_err_en_US, subp.name(), requiredArgCount, argCount); context_.SetError(subp); } @@ -3516,15 +3606,13 @@ void CheckHelper::CheckDioDummyAttrs( Attrs attrs{arg.attrs()}; if (!attrs.test(goodIntent)) { messages_.Say(arg.name(), - "Dummy argument '%s' of a defined input/output procedure" - " must have intent '%s'"_err_en_US, + "Dummy argument '%s' of a defined input/output procedure must have intent '%s'"_err_en_US, arg.name(), AttrToString(goodIntent)); } attrs = attrs - Attr::INTENT_IN - Attr::INTENT_OUT - Attr::INTENT_INOUT; if (!attrs.empty()) { messages_.Say(arg.name(), - "Dummy argument '%s' of a defined input/output procedure may not have" - " any attributes"_err_en_US, + "Dummy argument '%s' of a defined input/output procedure may not have any attributes"_err_en_US, arg.name()); } } @@ -3535,57 +3623,64 @@ void CheckHelper::CheckDefinedIoProc(const Symbol &symbol, for (auto ref : details.specificProcs()) { const Symbol &ultimate{ref->GetUltimate()}; const auto *binding{ultimate.detailsIf()}; - const Symbol &specific{*(binding ? &binding->symbol() : &ultimate)}; if (ultimate.attrs().test(Attr::NOPASS)) { // C774 - messages_.Say("Defined input/output procedure '%s' may not have NOPASS " - "attribute"_err_en_US, + messages_.Say( + "Defined input/output procedure '%s' may not have NOPASS attribute"_err_en_US, ultimate.name()); context_.SetError(ultimate); } - if (const auto *subpDetails{specific.detailsIf()}) { + const Symbol *specificProc{binding ? &binding->symbol() : &ultimate}; + const Symbol *specificSubp{specificProc}; + if (const auto *peDetails{specificSubp->detailsIf()}) { + specificSubp = peDetails->procInterface(); + if (!specificSubp) { + continue; + } + } + if (const auto *subpDetails{specificSubp->detailsIf()}) { const std::vector &dummyArgs{subpDetails->dummyArgs()}; - CheckDioArgCount(specific, ioKind, dummyArgs.size()); + CheckDioArgCount(*specificSubp, ioKind, dummyArgs.size()); int argCount{0}; for (auto *arg : dummyArgs) { switch (argCount++) { case 0: // dtv-type-spec, INTENT(INOUT) :: dtv - CheckDioDtvArg(specific, arg, ioKind, symbol); + CheckDioDtvArg(*specificProc, *specificSubp, arg, ioKind, symbol); break; case 1: // INTEGER, INTENT(IN) :: unit - CheckDefaultIntegerArg(specific, arg, Attr::INTENT_IN); + CheckDefaultIntegerArg(*specificSubp, arg, Attr::INTENT_IN); break; case 2: if (ioKind == common::DefinedIo::ReadFormatted || ioKind == common::DefinedIo::WriteFormatted) { // CHARACTER (LEN=*), INTENT(IN) :: iotype CheckDioAssumedLenCharacterArg( - specific, arg, argCount, Attr::INTENT_IN); + *specificSubp, arg, argCount, Attr::INTENT_IN); } else { // INTEGER, INTENT(OUT) :: iostat - CheckDefaultIntegerArg(specific, arg, Attr::INTENT_OUT); + CheckDefaultIntegerArg(*specificSubp, arg, Attr::INTENT_OUT); } break; case 3: if (ioKind == common::DefinedIo::ReadFormatted || ioKind == common::DefinedIo::WriteFormatted) { // INTEGER, INTENT(IN) :: v_list(:) - CheckDioVlistArg(specific, arg, argCount); + CheckDioVlistArg(*specificSubp, arg, argCount); } else { // CHARACTER (LEN=*), INTENT(INOUT) :: iomsg CheckDioAssumedLenCharacterArg( - specific, arg, argCount, Attr::INTENT_INOUT); + *specificSubp, arg, argCount, Attr::INTENT_INOUT); } break; case 4: // INTEGER, INTENT(OUT) :: iostat - CheckDefaultIntegerArg(specific, arg, Attr::INTENT_OUT); + CheckDefaultIntegerArg(*specificSubp, arg, Attr::INTENT_OUT); break; case 5: // CHARACTER (LEN=*), INTENT(INOUT) :: iomsg CheckDioAssumedLenCharacterArg( - specific, arg, argCount, Attr::INTENT_INOUT); + *specificSubp, arg, argCount, Attr::INTENT_INOUT); break; default:; } @@ -3884,10 +3979,11 @@ evaluate::Shape SubprogramMatchHelper::FoldShape(const evaluate::Shape &shape) { } void DistinguishabilityHelper::Add(const Symbol &generic, GenericKind kind, - const Symbol &ultimateSpecific, const Procedure &procedure) { - if (!context_.HasError(ultimateSpecific)) { + const Symbol &specific, const Procedure &procedure) { + const Symbol &ultimate{specific.GetUltimate()}; + if (!context_.HasError(ultimate)) { nameToSpecifics_[generic.name()].emplace( - &ultimateSpecific, ProcedureInfo{kind, procedure}); + &ultimate, ProcedureInfo{kind, procedure}); } } @@ -3902,6 +3998,18 @@ void DistinguishabilityHelper::Check(const Scope &scope) { const auto &[ultimate, procInfo]{*iter1}; const auto &[kind, proc]{procInfo}; for (auto iter2{iter1}; ++iter2 != info.end();) { + if (&*ultimate == &*iter2->first) { + continue; // ok, actually the same procedure + } else if (const auto *binding1{ + ultimate->detailsIf()}) { + if (const auto *binding2{ + iter2->first->detailsIf()}) { + if (&binding1->symbol().GetUltimate() == + &binding2->symbol().GetUltimate()) { + continue; // ok, bindings resolve identically + } + } + } auto distinguishable{kind.IsName() ? evaluate::characteristics::Distinguishable : evaluate::characteristics::DistinguishableOpOrAssign}; diff --git a/flang/lib/Semantics/check-do-forall.cpp b/flang/lib/Semantics/check-do-forall.cpp index 84e6b6455cc61..cc1d4bf58745a 100644 --- a/flang/lib/Semantics/check-do-forall.cpp +++ b/flang/lib/Semantics/check-do-forall.cpp @@ -154,7 +154,8 @@ class DoConcurrentBodyEnforce { // of its components? static bool MightDeallocatePolymorphic(const Symbol &original, const std::function &WillDeallocate) { - const Symbol &symbol{ResolveAssociations(original)}; + const Symbol &symbol{ + ResolveAssociations(original, /*stopAtTypeGuard=*/true)}; // Check the entity itself, no coarray exception here if (IsPolymorphicAllocatable(symbol)) { return true; @@ -182,11 +183,10 @@ class DoConcurrentBodyEnforce { impure.name(), reason); } - void SayDeallocateOfPolymorph( + void SayDeallocateOfPolymorphic( parser::CharBlock location, const Symbol &entity, const char *reason) { context_.SayWithDecl(entity, location, - "Deallocation of a polymorphic entity caused by %s" - " not allowed in DO CONCURRENT"_err_en_US, + "Deallocation of a polymorphic entity caused by %s not allowed in DO CONCURRENT"_err_en_US, reason); } @@ -206,7 +206,7 @@ class DoConcurrentBodyEnforce { const Symbol &entity{*pair.second}; if (IsAllocatable(entity) && !IsSaved(entity) && MightDeallocatePolymorphic(entity, DeallocateAll)) { - SayDeallocateOfPolymorph(endBlockStmt.source, entity, reason); + SayDeallocateOfPolymorphic(endBlockStmt.source, entity, reason); } if (const Symbol * impure{HasImpureFinal(entity)}) { SayDeallocateWithImpureFinal(entity, reason, *impure); @@ -222,7 +222,7 @@ class DoConcurrentBodyEnforce { if (const Symbol * entity{GetLastName(variable).symbol}) { const char *reason{"assignment"}; if (MightDeallocatePolymorphic(*entity, DeallocateNonCoarray)) { - SayDeallocateOfPolymorph(variable.GetSource(), *entity, reason); + SayDeallocateOfPolymorphic(variable.GetSource(), *entity, reason); } if (const auto *assignment{GetAssignment(stmt)}) { const auto &lhs{assignment->lhs}; @@ -257,7 +257,7 @@ class DoConcurrentBodyEnforce { const DeclTypeSpec *entityType{entity.GetType()}; if ((entityType && entityType->IsPolymorphic()) || // POINTER case MightDeallocatePolymorphic(entity, DeallocateAll)) { - SayDeallocateOfPolymorph( + SayDeallocateOfPolymorphic( currentStatementSourcePosition_, entity, reason); } if (const Symbol * impure{HasImpureFinal(entity)}) { diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index 6949e5693d08f..3efdfb3fa49b8 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -78,8 +78,10 @@ static std::optional AnalyzeTypeSpec( const semantics::CharacterTypeSpec &cts{ typeSpec->characterTypeSpec()}; const semantics::ParamValue &len{cts.length()}; - // N.B. CHARACTER(LEN=*) is allowed in type-specs in ALLOCATE() & - // type guards, but not in array constructors. + if (len.isAssumed() || len.isDeferred()) { + context.messages().Say( + "A length specifier of '*' or ':' may not appear in the type of an array constructor"_err_en_US); + } DynamicTypeWithLength type{DynamicType{kind, len}}; if (auto lenExpr{type.LEN()}) { type.length = Fold(context, @@ -3289,7 +3291,7 @@ const Assignment *ExpressionAnalyzer::Analyze(const parser::AssignmentStmt &x) { dyType && dyType->IsPolymorphic()) { // 10.2.1.2p1(1) const Symbol *lastWhole0{UnwrapWholeSymbolOrComponentDataRef(lhs)}; const Symbol *lastWhole{ - lastWhole0 ? &lastWhole0->GetUltimate() : nullptr}; + lastWhole0 ? &ResolveAssociations(*lastWhole0) : nullptr}; if (!lastWhole || !IsAllocatable(*lastWhole)) { Say("Left-hand side of assignment may not be polymorphic unless assignment is to an entire allocatable"_err_en_US); } else if (evaluate::IsCoarray(*lastWhole)) { diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp index 82c43d96bea44..1dfd9c35b3f43 100644 --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -1546,6 +1546,10 @@ Scope *ModFileReader::Read(SourceName name, std::optional isIntrinsic, Scope &hermeticScope{topScope.MakeScope(Scope::Kind::Global)}; context_.set_currentHermeticModuleFileScope(&hermeticScope); ResolveNames(context_, hermeticModules, hermeticScope); + for (auto &[_, ref] : hermeticScope) { + CHECK(ref->has()); + ref->set(Symbol::Flag::ModFile); + } } GetModuleDependences(context_.moduleDependences(), sourceFile->content()); ResolveNames(context_, parseTree, topScope); diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 17a6665dfb6a5..1514c01a49528 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -6127,32 +6127,6 @@ void DeclarationVisitor::Post(const parser::ComponentDecl &x) { "POINTER or ALLOCATABLE"_err_en_US); } } - // TODO: This would be more appropriate in CheckDerivedType() - if (auto it{FindCoarrayUltimateComponent(*derived)}) { // C748 - std::string ultimateName{it.BuildResultDesignatorName()}; - // Strip off the leading "%" - if (ultimateName.length() > 1) { - ultimateName.erase(0, 1); - if (attrs.HasAny({Attr::POINTER, Attr::ALLOCATABLE})) { - evaluate::AttachDeclaration( - Say(name.source, - "A component with a POINTER or ALLOCATABLE attribute may " - "not " - "be of a type with a coarray ultimate component (named " - "'%s')"_err_en_US, - ultimateName), - derived->typeSymbol()); - } - if (!arraySpec().empty() || !coarraySpec().empty()) { - evaluate::AttachDeclaration( - Say(name.source, - "An array or coarray component may not be of a type with a " - "coarray ultimate component (named '%s')"_err_en_US, - ultimateName), - derived->typeSymbol()); - } - } - } } } if (OkToAddComponent(name)) { @@ -7294,17 +7268,20 @@ bool DeclarationVisitor::OkToAddComponent( std::optional msg; std::optional warning; if (context().HasError(*prev)) { // don't pile on - } else if (extends) { - msg = "Type cannot be extended as it has a component named" - " '%s'"_err_en_US; } else if (CheckAccessibleSymbol(currScope(), *prev)) { // inaccessible component -- redeclaration is ok - if (context().ShouldWarn( - common::UsageWarning::RedeclaredInaccessibleComponent)) { + if (extends) { + // The parent type has a component of same name, but it remains + // extensible outside its module since that component is PRIVATE. + } else if (context().ShouldWarn( + common::UsageWarning::RedeclaredInaccessibleComponent)) { msg = "Component '%s' is inaccessibly declared in or as a parent of this derived type"_warn_en_US; warning = common::UsageWarning::RedeclaredInaccessibleComponent; } + } else if (extends) { + msg = + "Type cannot be extended as it has a component named '%s'"_err_en_US; } else if (prev->test(Symbol::Flag::ParentComp)) { msg = "'%s' is a parent type of this type and so cannot be a component"_err_en_US; @@ -7771,6 +7748,7 @@ void ConstructVisitor::Post(const parser::TypeGuardStmt::Guard &x) { SetTypeFromAssociation(*symbol); } else if (const auto *type{GetDeclTypeSpec()}) { symbol->SetType(*type); + symbol->get().set_isTypeGuard(); } SetAttrsFromAssociation(*symbol); } @@ -9889,6 +9867,21 @@ void ResolveNamesVisitor::ResolveSpecificationParts(ProgramTree &node) { object->set_cudaDataAttr(common::CUDADataAttr::Device); } } + // Main program local objects usually don't have an implied SAVE attribute, + // as one might think, but in the exceptional case of a derived type + // local object that contains a coarray, we have to mark it as an + // implied SAVE so that evaluate::IsSaved() will return true. + if (node.scope()->kind() == Scope::Kind::MainProgram) { + if (const auto *object{symbol.detailsIf()}) { + if (const DeclTypeSpec * type{object->type()}) { + if (const DerivedTypeSpec * derived{type->AsDerived()}) { + if (!IsSaved(symbol) && FindCoarrayPotentialComponent(*derived)) { + SetImplicitAttr(symbol, Attr::SAVE); + } + } + } + } + } } } diff --git a/flang/lib/Semantics/symbol.cpp b/flang/lib/Semantics/symbol.cpp index 61982295f323a..32eb6c2c5a188 100644 --- a/flang/lib/Semantics/symbol.cpp +++ b/flang/lib/Semantics/symbol.cpp @@ -155,6 +155,7 @@ void EntityDetails::set_type(const DeclTypeSpec &type) { void AssocEntityDetails::set_rank(int rank) { rank_ = rank; } void AssocEntityDetails::set_IsAssumedSize() { rank_ = isAssumedSize; } void AssocEntityDetails::set_IsAssumedRank() { rank_ = isAssumedRank; } +void AssocEntityDetails::set_isTypeGuard(bool yes) { isTypeGuard_ = yes; } void EntityDetails::ReplaceType(const DeclTypeSpec &type) { type_ = &type; } ObjectEntityDetails::ObjectEntityDetails(EntityDetails &&d) diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp index 7544731a682ec..5e58a0c75c77b 100644 --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -633,9 +633,9 @@ const EquivalenceSet *FindEquivalenceSet(const Symbol &symbol) { } bool IsOrContainsEventOrLockComponent(const Symbol &original) { - const Symbol &symbol{ResolveAssociations(original)}; - if (const auto *details{symbol.detailsIf()}) { - if (const DeclTypeSpec * type{details->type()}) { + const Symbol &symbol{ResolveAssociations(original, /*stopAtTypeGuard=*/true)}; + if (evaluate::IsVariable(symbol)) { + if (const DeclTypeSpec * type{symbol.GetType()}) { if (const DerivedTypeSpec * derived{type->AsDerived()}) { return IsEventTypeOrLockType(derived) || FindEventOrLockPotentialComponent(*derived); @@ -849,7 +849,7 @@ static const Symbol *HasImpureFinal( } const Symbol *HasImpureFinal(const Symbol &original, std::optional rank) { - const Symbol &symbol{ResolveAssociations(original)}; + const Symbol &symbol{ResolveAssociations(original, /*stopAtTypeGuard=*/true)}; if (symbol.has()) { if (const DeclTypeSpec * symType{symbol.GetType()}) { if (const DerivedTypeSpec * derived{symType->AsDerived()}) { @@ -1386,6 +1386,13 @@ template class ComponentIterator; template class ComponentIterator; template class ComponentIterator; +PotentialComponentIterator::const_iterator FindCoarrayPotentialComponent( + const DerivedTypeSpec &derived) { + PotentialComponentIterator potentials{derived}; + return std::find_if(potentials.begin(), potentials.end(), + [](const Symbol &symbol) { return evaluate::IsCoarray(symbol); }); +} + UltimateComponentIterator::const_iterator FindCoarrayUltimateComponent( const DerivedTypeSpec &derived) { UltimateComponentIterator ultimates{derived}; diff --git a/flang/lib/Support/Fortran-features.cpp b/flang/lib/Support/Fortran-features.cpp index bbeb4b15a0486..4bc92f3924ef6 100644 --- a/flang/lib/Support/Fortran-features.cpp +++ b/flang/lib/Support/Fortran-features.cpp @@ -84,8 +84,10 @@ LanguageFeatureControl::LanguageFeatureControl() { warnUsage_.set(UsageWarning::UndefinedFunctionResult); warnUsage_.set(UsageWarning::UselessIomsg); warnUsage_.set(UsageWarning::UnsignedLiteralTruncation); + warnUsage_.set(UsageWarning::NullActualForDefaultIntentAllocatable); // New warnings, on by default warnLanguage_.set(LanguageFeature::SavedLocalInSpecExpr); + warnLanguage_.set(LanguageFeature::NullActualForAllocatable); } // Ignore case and any inserted punctuation (like '-'/'_') diff --git a/flang/test/Driver/config-file.f90 b/flang/test/Driver/config-file.f90 index 6991fda9bd483..2ad9b71d01613 100644 --- a/flang/test/Driver/config-file.f90 +++ b/flang/test/Driver/config-file.f90 @@ -77,13 +77,13 @@ ! CHECK-LINKING-LIBOMP-GOES-AFTER: "--as-needed" "{{.*}}-{{.*}}.o" "-lmylib" "foo.a" "-lm" "-Bstatic" "-lhappy" "-Bdynamic" {{.*}}"-lomp" ! CHECK-NOLINKING: Configuration file: {{.*}}Inputs{{.}}config-l.cfg ! CHECK-NOLINKING: "-ffast-math" -! CHECK-NOLINKING-NO: "-lm" "-Bstatic" "-lhappy" "-Bdynamic" +! CHECK-NOLINKING-NOT: "-lm" "-Bstatic" "-lhappy" "-Bdynamic" ! CHECK-NOLINKING-OPENMP: Configuration file: {{.*}}Inputs{{.}}config-l.cfg ! CHECK-NOLINKING-OPENMP: "-ffast-math" {{.*}}"-fopenmp" -! CHECK-NOLINKING-OPENMP-NO: "-lm" "-Bstatic" "-lhappy" "-Bdynamic" {{.}}"-lomp" +! CHECK-NOLINKING-OPENMP-NOT: "-lm" "-Bstatic" "-lhappy" "-Bdynamic" {{.}}"-lomp" ! CHECK-LINKING-MSVC: Configuration file: {{.*}}Inputs{{.}}config-l.cfg ! CHECK-LINKING-MSVC: "-ffast-math" ! CHECK-LINKING-MSVC: "--as-needed" "{{.*}}-{{.*}}.o" "mylib.lib" "foo.lib" "m.lib" "-Bstatic" "happy.lib" "-Bdynamic" ! CHECK-NOLINKING-MSVC: Configuration file: {{.*}}Inputs{{.}}config-l.cfg ! CHECK-NOLINKING-MSVC: "-ffast-math" -! CHECK-NOLINKING-MSVC-NO: "m.lib" "-Bstatic" "happy.lib" "-Bdynamic" +! CHECK-NOLINKING-MSVC-NOT: "m.lib" "-Bstatic" "happy.lib" "-Bdynamic" diff --git a/flang/test/Fir/Todo/coordinate_of_2.fir b/flang/test/Fir/Todo/coordinate_of_2.fir index 7ceead8de5279..759f2eab097e9 100644 --- a/flang/test/Fir/Todo/coordinate_of_2.fir +++ b/flang/test/Fir/Todo/coordinate_of_2.fir @@ -4,7 +4,6 @@ // `!fir.coordinate_of` - `!fir.array` inside "boxed" `!fir.type` func.func @coordinate_box_array_inside_derived(%arg0: !fir.box, field_2:i32}>>, %arg1 : index) { - %idx0 = arith.constant 0 : i32 - %q = fir.coordinate_of %arg0, %idx0, %arg1 : (!fir.box, field_2:i32}>>, i32, index) -> !fir.ref + %q = fir.coordinate_of %arg0, field_1, %arg1 : (!fir.box, field_2:i32}>>, index) -> !fir.ref return } diff --git a/flang/test/Fir/Todo/coordinate_of_3.fir b/flang/test/Fir/Todo/coordinate_of_3.fir index 305422052be27..aff936d0e1a41 100644 --- a/flang/test/Fir/Todo/coordinate_of_3.fir +++ b/flang/test/Fir/Todo/coordinate_of_3.fir @@ -4,7 +4,6 @@ // `fir.coordinate_of` - `fir.array` inside "boxed" `!fir.type}` (i.e. nested `!fir.type`) func.func @coordinate_box_array_inside_derived(%arg0: !fir.box}>}>>, %arg1 : index) { - %idx0 = arith.constant 0 : i32 - %q = fir.coordinate_of %arg0, %idx0, %idx0, %arg1 : (!fir.box}>}>>, i32, i32, index) -> !fir.ref + %q = fir.coordinate_of %arg0, field_1, field_2, %arg1 : (!fir.box}>}>>, index) -> !fir.ref return } diff --git a/flang/test/Fir/abstract-results-bindc.fir b/flang/test/Fir/abstract-results-bindc.fir index b2efffea31fb1..695098a82098e 100644 --- a/flang/test/Fir/abstract-results-bindc.fir +++ b/flang/test/Fir/abstract-results-bindc.fir @@ -54,7 +54,6 @@ func.func @test_return_cptr(%x: !fir.ref) { // CHECK-LABEL: func.func @test_return_cptr( // CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>) { // CHECK: %[[VAL_1:.*]] = fir.call @return_cptr() : () -> !fir.ref -// CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -// CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_2]] : (!fir.ref>, !fir.field) -> !fir.ref +// CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref // CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_1]] : (!fir.ref) -> i64 // CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref diff --git a/flang/test/Fir/abstract-results.fir b/flang/test/Fir/abstract-results.fir index 93e63dc657f0c..d112ec66f434d 100644 --- a/flang/test/Fir/abstract-results.fir +++ b/flang/test/Fir/abstract-results.fir @@ -54,18 +54,17 @@ func.func private @arrayfunc_callee(%n : index) -> !fir.array { // FUNC-BOX-SAME: %[[box:.*]]: !fir.box>, %[[v:.*]]: f32) { func.func @derivedfunc_callee(%v: f32) -> !fir.type { %buffer = fir.alloca !fir.type - %0 = fir.field_index x, !fir.type - %1 = fir.coordinate_of %buffer, %0 : (!fir.ref>, !fir.field) -> !fir.ref + %1 = fir.coordinate_of %buffer, x : (!fir.ref>) -> !fir.ref fir.store %v to %1 : !fir.ref %res = fir.load %buffer : !fir.ref> return %res : !fir.type - // FUNC-REF: %[[coor:.*]] = fir.coordinate_of %[[buffer]], %{{.*}} : (!fir.ref>, !fir.field) -> !fir.ref + // FUNC-REF: %[[coor:.*]] = fir.coordinate_of %[[buffer]], x : (!fir.ref>) -> !fir.ref // FUNC-REF: fir.store %[[v]] to %[[coor]] : !fir.ref // FUNC-REF: return // FUNC-BOX: %[[buffer:.*]] = fir.box_addr %[[box]] : (!fir.box>) -> !fir.ref> - // FUNC-BOX: %[[coor:.*]] = fir.coordinate_of %[[buffer]], %{{.*}} : (!fir.ref>, !fir.field) -> !fir.ref + // FUNC-BOX: %[[coor:.*]] = fir.coordinate_of %[[buffer]], x : (!fir.ref>) -> !fir.ref // FUNC-BOX: fir.store %[[v]] to %[[coor]] : !fir.ref // FUNC-BOX: return } @@ -95,14 +94,12 @@ func.func @retcptr() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__addres return %1 : !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> // FUNC-REF: %[[ALLOC:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = "rec", uniq_name = "_QFrecErec"} - // FUNC-REF: %[[FIELD:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> - // FUNC-REF: %[[ADDR:.*]] = fir.coordinate_of %[[ALLOC]], %[[FIELD]] : (!fir.ref>, !fir.field) -> !fir.ref + // FUNC-REF: %[[ADDR:.*]] = fir.coordinate_of %[[ALLOC]], __address : (!fir.ref>) -> !fir.ref // FUNC-REF: %[[VAL:.*]] = fir.load %[[ADDR]] : !fir.ref // FUNC-REF: %[[CAST:.*]] = fir.convert %[[VAL]] : (i64) -> !fir.ref // FUNC-REF: return %[[CAST]] : !fir.ref // FUNC-BOX: %[[ALLOC:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = "rec", uniq_name = "_QFrecErec"} - // FUNC-BOX: %[[FIELD:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> - // FUNC-BOX: %[[ADDR:.*]] = fir.coordinate_of %[[ALLOC]], %[[FIELD]] : (!fir.ref>, !fir.field) -> !fir.ref + // FUNC-BOX: %[[ADDR:.*]] = fir.coordinate_of %[[ALLOC]], __address : (!fir.ref>) -> !fir.ref // FUNC-BOX: %[[VAL:.*]] = fir.load %[[ADDR]] : !fir.ref // FUNC-BOX: %[[CAST:.*]] = fir.convert %[[VAL]] : (i64) -> !fir.ref // FUNC-BOX: return %[[CAST]] : !fir.ref @@ -256,20 +253,17 @@ func.func @call_chararrayfunc() { func.func @_QPtest_return_cptr() { %0 = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = ".result"} %1 = fir.call @retcptr() : () -> i64 - %2 = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> - %3 = fir.coordinate_of %0, %2 : (!fir.ref>, !fir.field) -> !fir.ref + %3 = fir.coordinate_of %0, __address : (!fir.ref>) -> !fir.ref fir.store %1 to %3 : !fir.ref return // FUNC-REF: %[[ALLOC:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = ".result"} // FUNC-REF: %[[VAL:.*]] = fir.call @retcptr() : () -> i64 - // FUNC-REF: %[[FIELD:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> - // FUNC-REF: %[[ADDR:.*]] = fir.coordinate_of %[[ALLOC]], %[[FIELD]] : (!fir.ref>, !fir.field) -> !fir.ref + // FUNC-REF: %[[ADDR:.*]] = fir.coordinate_of %[[ALLOC]], __address : (!fir.ref>) -> !fir.ref // FUNC-REF: fir.store %[[VAL]] to %[[ADDR]] : !fir.ref // FUNC-BOX: %[[ALLOC:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = ".result"} // FUNC-BOX: %[[VAL:.*]] = fir.call @retcptr() : () -> i64 - // FUNC-BOX: %[[FIELD:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> - // FUNC-BOX: %[[ADDR:.*]] = fir.coordinate_of %[[ALLOC]], %[[FIELD]] : (!fir.ref>, !fir.field) -> !fir.ref + // FUNC-BOX: %[[ADDR:.*]] = fir.coordinate_of %[[ALLOC]], __address : (!fir.ref>) -> !fir.ref // FUNC-BOX: fir.store %[[VAL]] to %[[ADDR]] : !fir.ref } @@ -384,16 +378,14 @@ func.func @test_indirect_calls_return_cptr(%arg0: () -> ()) { // FUNC-REF: %[[VAL_1:.*]] = fir.convert %[[ARG0]] : (() -> ()) -> (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) // FUNC-REF: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) -> (() -> !fir.ref) // FUNC-REF: %[[VAL_3:.*]] = fir.call %[[VAL_2]]() : () -> !fir.ref - // FUNC-REF: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> - // FUNC-REF: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_4]] : (!fir.ref>, !fir.field) -> !fir.ref + // FUNC-REF: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref // FUNC-REF: %[[CAST:.*]] = fir.convert %[[VAL_3]] : (!fir.ref) -> i64 // FUNC-REF: fir.store %[[CAST]] to %[[VAL_5]] : !fir.ref // FUNC-BOX: %[[VAL_0:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = ".result"} // FUNC-BOX: %[[VAL_1:.*]] = fir.convert %[[ARG0]] : (() -> ()) -> (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) // FUNC-BOX: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) -> (() -> !fir.ref) // FUNC-BOX: %[[VAL_3:.*]] = fir.call %[[VAL_2]]() : () -> !fir.ref - // FUNC-BOX: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> - // FUNC-BOX: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_4]] : (!fir.ref>, !fir.field) -> !fir.ref + // FUNC-BOX: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref // FUNC-BOX: %[[CAST:.*]] = fir.convert %[[VAL_3]] : (!fir.ref) -> i64 // FUNC-BOX: fir.store %[[CAST]] to %[[VAL_5]] : !fir.ref } diff --git a/flang/test/Fir/array-value-copy.fir b/flang/test/Fir/array-value-copy.fir index 58db8b3ae4cd2..3d44407b5fcf8 100644 --- a/flang/test/Fir/array-value-copy.fir +++ b/flang/test/Fir/array-value-copy.fir @@ -333,8 +333,7 @@ func.func @array_of_types() { %c1_i64 = arith.constant 1 : i64 %9 = arith.subi %8, %c1_i64 : i64 %10 = fir.coordinate_of %1, %9 : (!fir.ref}>>>, i64) -> !fir.ref}>> - %11 = fir.field_index i, !fir.type<_QTd{i:!fir.array<10xi32>}> - %12 = fir.coordinate_of %10, %11 : (!fir.ref}>>, !fir.field) -> !fir.ref> + %12 = fir.coordinate_of %10, i : (!fir.ref}>>) -> !fir.ref> %c10 = arith.constant 10 : index %13 = arith.addi %c1_0, %c10 : index %14 = arith.subi %13, %c1_0 : index @@ -363,7 +362,7 @@ func.func @array_of_types() { // CHECK-LABEL: func @array_of_types() { // CHECK: %{{.*}} = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} -> index { -// CHECK: %{{.*}} = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%arg2 = %17) -> (!fir.array<10xi32>) { +// CHECK: %{{.*}} = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%{{.*}} = %{{.*}}) -> (!fir.array<10xi32>) { // CHECK-NOT: %{{.*}} = fir.array_update // CHECK: %[[COOR0:.*]] = fir.array_coor %{{.*}}(%{{.*}}) [%{{.*}}] %{{.*}} : (!fir.ref>, !fir.shape<1>, !fir.slice<1>, index) -> !fir.ref // CHECK: fir.store %{{.*}} to %[[COOR0]] : !fir.ref @@ -482,11 +481,9 @@ func.func @array_fetch_derived_type(%0 : !fir.ref}>>>) { // CHECK: %{{.*}} = fir.do_loop -// CHECK: %[[FIELD_MT:.*]] = fir.field_index mt, !fir.type<_QTu{mt:!fir.type<_QTt{mem:i32}>}> -// CHECK: %[[FIELD_MEM:.*]] = fir.field_index mem, !fir.type<_QTt{mem:i32}> // CHECK-NOT: %{{.*}} = fir.array_fetch // CHECK: %[[COOR0:.*]] = fir.array_coor %[[ARR0]](%{{.*}}) %{{.*}} : (!fir.ref}>>>, !fir.shape<1>, index) -> !fir.ref}>> -// CHECK: %[[COOR_OF:.*]] = fir.coordinate_of %[[COOR0]], %[[FIELD_MT]], %[[FIELD_MEM]] : (!fir.ref}>>, !fir.field, !fir.field) -> !fir.ref +// CHECK: %[[COOR_OF:.*]] = fir.coordinate_of %[[COOR0]], mt, mem : (!fir.ref}>>) -> !fir.ref // CHECK: %{{.*}} = fir.load %[[COOR_OF]] : !fir.ref // ----- diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir index 7cdcd2a10e975..a429a14518182 100644 --- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir @@ -948,13 +948,11 @@ func.func @omp_map_info_descriptor_type_conversion(%arg0 : !fir.ref,int:i32}>>) { // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ARG_0]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFderived_type", (f32, array<10 x i32>, i32)> - %0 = fir.field_index int, !fir.type<_QFderived_type{real:f32,array:!fir.array<10xi32>,int:i32}> - %1 = fir.coordinate_of %arg0, %0 : (!fir.ref,int:i32}>>, !fir.field) -> !fir.ref + %1 = fir.coordinate_of %arg0, int : (!fir.ref,int:i32}>>) -> !fir.ref // CHECK: %[[MAP_MEMBER_1:.*]] = omp.map.info var_ptr(%[[GEP]] : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "dtype%int"} %2 = omp.map.info var_ptr(%1 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "dtype%int"} // CHECK: %[[GEP_2:.*]] = llvm.getelementptr %[[ARG_0]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFderived_type", (f32, array<10 x i32>, i32)> - %3 = fir.field_index real, !fir.type<_QFderived_type{real:f32,array:!fir.array<10xi32>,int:i32}> - %4 = fir.coordinate_of %arg0, %3 : (!fir.ref,int:i32}>>, !fir.field) -> !fir.ref + %4 = fir.coordinate_of %arg0, real : (!fir.ref,int:i32}>>) -> !fir.ref // CHECK: %[[MAP_MEMBER_2:.*]] = omp.map.info var_ptr(%[[GEP_2]] : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "dtype%real"} %5 = omp.map.info var_ptr(%4 : !fir.ref, f32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "dtype%real"} // CHECK: %[[MAP_PARENT:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.struct<"_QFderived_type", (f32, array<10 x i32>, i32)>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBER_1]], %[[MAP_MEMBER_2]] : [2], [0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "dtype", partial_map = true} @@ -973,16 +971,13 @@ func.func @omp_map_info_derived_type_explicit_member_conversion(%arg0 : !fir.ref func.func @omp_map_info_nested_derived_type_explicit_member_conversion(%arg0 : !fir.ref,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>>) { // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ARG_0]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFTtop_layer", (array<10 x i32>, struct<"_QFTbottom_layer", (array<10 x f32>, f64)>, i32)> - %0 = fir.field_index nested, !fir.type<_QFTtop_layer{array_i:!fir.array<10xi32>,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}> - %1 = fir.coordinate_of %arg0, %0 : (!fir.ref,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>>, !fir.field) -> !fir.ref,i2:f64}>> + %1 = fir.coordinate_of %arg0, nested : (!fir.ref,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>>) -> !fir.ref,i2:f64}>> // CHECK: %[[GEP_2:.*]] = llvm.getelementptr %[[GEP]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFTbottom_layer", (array<10 x f32>, f64)> - %2 = fir.field_index i2, !fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}> - %3 = fir.coordinate_of %1, %2 : (!fir.ref,i2:f64}>>, !fir.field) -> !fir.ref + %3 = fir.coordinate_of %1, i2 : (!fir.ref,i2:f64}>>) -> !fir.ref // CHECK: %[[MAP_MEMBER_1:.*]] = omp.map.info var_ptr(%[[GEP_2]] : !llvm.ptr, f64) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr %4 = omp.map.info var_ptr(%3 : !fir.ref, f64) map_clauses(tofrom) capture(ByRef) -> !fir.ref // CHECK: %[[GEP_3:.*]] = llvm.getelementptr %[[ARG_0]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFTtop_layer", (array<10 x i32>, struct<"_QFTbottom_layer", (array<10 x f32>, f64)>, i32)> - %5 = fir.field_index k, !fir.type<_QFTtop_layer{array_i:!fir.array<10xi32>,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}> - %6 = fir.coordinate_of %arg0, %5 : (!fir.ref,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>>, !fir.field) -> !fir.ref + %6 = fir.coordinate_of %arg0, k : (!fir.ref,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>>) -> !fir.ref // CHECK: %[[MAP_MEMBER_2:.*]] = omp.map.info var_ptr(%[[GEP_3]] : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr %7 = omp.map.info var_ptr(%6 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref // CHECK: %[[PARENT_MAP:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.struct<"_QFTtop_layer", (array<10 x i32>, struct<"_QFTbottom_layer", (array<10 x f32>, f64)>, i32)>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBER_1]], %[[MAP_MEMBER_2]] : [1, 1], [2] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {partial_map = true} @@ -1131,7 +1126,7 @@ func.func @map_dtype_alloca_mem(%arg0 : !fir.ref !llvm.ptr, [[STRUCT_TY:!llvm.struct<"_QFRecTy", \(f32, struct<\(ptr, i64, i32, i8, i8, i8, i8\)>, array<10 x i32>, f32, struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>, i32\)>]] - %1 = fir.coordinate_of %arg0, %c4 : (!fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>, index) -> !fir.ref>>> + %1 = fir.coordinate_of %arg0, array_j : (!fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>) -> !fir.ref>>> // CHECK: %[[BADDR_GEP:.*]] = llvm.getelementptr %[[GEP]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[STRUCT_TY2:!llvm.struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>]] %2 = fir.box_offset %1 base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> // CHECK: %[[MAP_MEMBER_BADDR:.*]] = omp.map.info var_ptr(%[[GEP]] : !llvm.ptr, i32) var_ptr_ptr(%[[BADDR_GEP]] : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr @@ -1165,7 +1160,7 @@ func.func @map_dtype_alloca_mem2(%arg0 : !fir.ref !llvm.ptr, [[DESC_TY]] // CHECK: %[[LOAD_DTYPE_BADDR:.*]] = llvm.load %[[GEP_DTYPE_BADDR]] : !llvm.ptr -> !llvm.ptr // CHECK: %[[GEP_DTYPE_MEMBER:.*]] = llvm.getelementptr %[[LOAD_DTYPE_BADDR]][0, 4] : (!llvm.ptr) -> !llvm.ptr, [[REC_TY:!llvm.struct<"_QFRecTy", \(f32, struct<\(ptr, i64, i32, i8, i8, i8, i8\)>, array<10 x i32>, f32, struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>, i32\)>]] - %2 = fir.coordinate_of %1, %c4 : (!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>, index) -> !fir.ref>>> + %2 = fir.coordinate_of %1, array_j : (!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>) -> !fir.ref>>> // CHECK: %[[DTYPE_MEMBER_BADDR:.*]] = llvm.getelementptr %[[GEP_DTYPE_MEMBER]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[DESC_TY2:!llvm.struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>]] %3 = fir.box_offset %2 base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> // CHECK: %[[MAP_MEMBER_BADDR:.*]] = omp.map.info var_ptr(%[[GEP_DTYPE_MEMBER]] : !llvm.ptr, i32) var_ptr_ptr(%[[DTYPE_MEMBER_BADDR]] : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr @@ -1177,7 +1172,7 @@ func.func @map_dtype_alloca_mem2(%arg0 : !fir.ref !llvm.ptr, [[DESC_TY]] // CHECK: %[[LOAD_DTYPE_BADDR:.*]] = llvm.load %[[GEP_DTYPE_BADDR]] : !llvm.ptr -> !llvm.ptr // CHECK: %[[GEP_DTYPE_REGULAR_MEMBER:.*]] = llvm.getelementptr %[[LOAD_DTYPE_BADDR]][0, 5] : (!llvm.ptr) -> !llvm.ptr, [[REC_TY]] - %7 = fir.coordinate_of %6, %c5 : (!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>, index) -> !fir.ref + %7 = fir.coordinate_of %6, k : (!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>) -> !fir.ref // CHECK: %[[MAP_REGULAR_MEMBER:.*]] = omp.map.info var_ptr(%[[GEP_DTYPE_REGULAR_MEMBER]] : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr %8 = omp.map.info var_ptr(%7 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref // CHECK: %[[GEP_DTYPE_BADDR:.*]] = llvm.getelementptr %[[ARG_0]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[DESC_TY]] @@ -1213,9 +1208,9 @@ func.func @map_nested_dtype_alloca_mem(%arg0 : !fir.ref !llvm.ptr, [[DESC_TY]] // CHECK: %[[LOAD_GEP_DTYPE_BADDR:.*]] = llvm.load %[[GEP_DTYPE_BADDR]] : !llvm.ptr -> !llvm.ptr // CHECK: %[[LOAD_NESTED_DTYPE:.*]] = llvm.getelementptr %[[LOAD_GEP_DTYPE_BADDR]][0, 6] : (!llvm.ptr) -> !llvm.ptr, [[REC_TY:!llvm.struct<"_QFRecTy", \(f32, struct<\(ptr, i64, i32, i8, i8, i8, i8\)>, array<10 x i32>, f32, struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>, i32, struct<"_QFRecTy2", \(f32, array<10 x i32>, struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>, i32\)>\)>]] - %2 = fir.coordinate_of %1, %c6 : (!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>, index) -> !fir.ref,array_k:!fir.box>>,k:i32}>> + %2 = fir.coordinate_of %1, nest : (!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>) -> !fir.ref,array_k:!fir.box>>,k:i32}>> // CHECK: %[[GEP_NESTED_DTYPE_ALLOCATABLE_MEMBER:.*]] = llvm.getelementptr %[[LOAD_NESTED_DTYPE]][0, 2] : (!llvm.ptr) -> !llvm.ptr, [[REC_TY2:!llvm.struct<"_QFRecTy2", \(f32, array<10 x i32>, struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>, i32\)>]] - %3 = fir.coordinate_of %2, %c2 : (!fir.ref,array_k:!fir.box>>,k:i32}>>, index) -> !fir.ref>>> + %3 = fir.coordinate_of %2, array_k : (!fir.ref,array_k:!fir.box>>,k:i32}>>) -> !fir.ref>>> // CHECK: %[[GEP_NESTED_DTYPE_ALLOCATABLE_MEMBER_BADDR:.*]] = llvm.getelementptr %[[GEP_NESTED_DTYPE_ALLOCATABLE_MEMBER]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[DESC_TY2:!llvm.struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>]] %4 = fir.box_offset %3 base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> // CHECK: %[[MAP_NESTED_MEMBER_BADDR:.*]] = omp.map.info var_ptr(%[[GEP_NESTED_DTYPE_ALLOCATABLE_MEMBER]] : !llvm.ptr, i32) var_ptr_ptr(%[[GEP_NESTED_DTYPE_ALLOCATABLE_MEMBER_BADDR]] : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr @@ -1227,9 +1222,9 @@ func.func @map_nested_dtype_alloca_mem(%arg0 : !fir.ref !llvm.ptr %7 = fir.load %arg0 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>> // CHECK: %[[LOAD_NESTED_DTYPE:.*]] = llvm.getelementptr %[[LOAD_GEP_DTYPE_BADDR]][0, 6] : (!llvm.ptr) -> !llvm.ptr, [[REC_TY]] - %8 = fir.coordinate_of %7, %c6 : (!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>, index) -> !fir.ref,array_k:!fir.box>>,k:i32}>> + %8 = fir.coordinate_of %7, nest : (!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>) -> !fir.ref,array_k:!fir.box>>,k:i32}>> // CHECK: %[[NESTED_DTYPE_REGULAR_MEMBER_GEP:.*]] = llvm.getelementptr %[[LOAD_NESTED_DTYPE]][0, 3] : (!llvm.ptr) -> !llvm.ptr, [[REC_TY2]] - %9 = fir.coordinate_of %8, %c3 : (!fir.ref,array_k:!fir.box>>,k:i32}>>, index) -> !fir.ref + %9 = fir.coordinate_of %8, k : (!fir.ref,array_k:!fir.box>>,k:i32}>>) -> !fir.ref // CHECK: %[[MAP_REGULAR_NESTED_MEMBER:.*]] = omp.map.info var_ptr(%[[NESTED_DTYPE_REGULAR_MEMBER_GEP]] : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr %10 = omp.map.info var_ptr(%9 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref // CHECK: %[[DTYPE_BADDR_GEP:.*]] = llvm.getelementptr %[[ARG_0]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[DESC_TY]] @@ -1258,9 +1253,9 @@ func.func @map_nested_dtype_alloca_mem2(%arg0 : !fir.ref !llvm.ptr, [[REC_TY:!llvm.struct<"_QFRecTy", \(f32, struct<\(ptr, i64, i32, i8, i8, i8, i8\)>, array<10 x i32>, f32, struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>, i32, struct<"_QFRecTy2", \(f32, array<10 x i32>, struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>, i32\)>\)>]] - %1 = fir.coordinate_of %arg0, %c6 : (!fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>, index) -> !fir.ref,array_k:!fir.box>>,k:i32}>> + %1 = fir.coordinate_of %arg0, nest : (!fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>) -> !fir.ref,array_k:!fir.box>>,k:i32}>> // CHECK: %[[NESTED_ALLOCATABLE_MEMBER_GEP:.*]] = llvm.getelementptr %[[NESTED_DTYPE_MEMBER_GEP]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFRecTy2", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)> - %2 = fir.coordinate_of %1, %c2 : (!fir.ref,array_k:!fir.box>>,k:i32}>>, index) -> !fir.ref>>> + %2 = fir.coordinate_of %1, array_k : (!fir.ref,array_k:!fir.box>>,k:i32}>>) -> !fir.ref>>> // CHECK: %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP:.*]] = llvm.getelementptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[DESC_TY2]] %3 = fir.box_offset %2 base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> // CHECK: %[[MAP_NESTED_ALLOCATABLE_MEMBER_BADDR:.*]] = omp.map.info var_ptr(%[[NESTED_ALLOCATABLE_MEMBER_GEP]] : !llvm.ptr, i32) var_ptr_ptr(%[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]] : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr @@ -1282,10 +1277,8 @@ func.func @map_nested_dtype_alloca_mem2(%arg0 : !fir.ref { // CHECK: ^bb0(%[[VAL_0:.*]]: !llvm.ptr): ^bb0(%0: !fir.ref>): -// CHECK: %[[VAL_1:.*]] = llvm.mlir.constant(0 : i32) : i32 - %1 = fir.field_index data, !fir.type<_QFdeclare_mapperTmy_type{data:i32}> // CHECK: %[[VAL_2:.*]] = llvm.getelementptr %[[VAL_0]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFdeclare_mapperTmy_type", (i32)> - %2 = fir.coordinate_of %0, %1 : (!fir.ref>, !fir.field) -> !fir.ref + %2 = fir.coordinate_of %0, data : (!fir.ref>) -> !fir.ref // CHECK: %[[VAL_3:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "var%[[VAL_4:.*]]"} %3 = omp.map.info var_ptr(%2 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "var%data"} // CHECK: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !llvm.ptr, !llvm.struct<"_QFdeclare_mapperTmy_type", (i32)>) map_clauses(tofrom) capture(ByRef) members(%[[VAL_3]] : [0] : !llvm.ptr) -> !llvm.ptr {name = "var", partial_map = true} diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir index 8727c0ab08e70..c7037019ee701 100644 --- a/flang/test/Fir/convert-to-llvm.fir +++ b/flang/test/Fir/convert-to-llvm.fir @@ -2575,13 +2575,11 @@ func.func @coordinate_box_complex(%arg0: !fir.box>) { // 2. BOX TYPE (objects wrapped in `fir.box`) // Derived type - basic case (1 index) func.func @coordinate_box_derived_1(%arg0: !fir.box>) { - %idx = fir.field_index field_2, !fir.type - %q = fir.coordinate_of %arg0, %idx : (!fir.box>, !fir.field) -> !fir.ref + %q = fir.coordinate_of %arg0, field_2 : (!fir.box>) -> !fir.ref return } // CHECK-LABEL: llvm.func @coordinate_box_derived_1 // CHECK-SAME: %[[BOX:.*]]: !llvm.ptr) -// CHECK: %[[COORDINATE:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[DERIVED_ADDR:.*]] = llvm.getelementptr %[[BOX]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i64>)> // CHECK: %[[DERIVED_VAL:.*]] = llvm.load %[[DERIVED_ADDR]] : !llvm.ptr -> !llvm.ptr // CHECK: %[[SUBOBJECT_ADDR:.*]] = llvm.getelementptr %[[DERIVED_VAL]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"derived_1", (i32, i32)> @@ -2589,16 +2587,12 @@ func.func @coordinate_box_derived_1(%arg0: !fir.box, field_2:i32}>>) { - %idx0 = fir.field_index field_1, !fir.type, field_2:i32}> - %idx1 = fir.field_index inner2, !fir.type - %q = fir.coordinate_of %arg0, %idx0, %idx1 : (!fir.box, field_2:i32}>>, !fir.field, !fir.field) -> !fir.ref + %q = fir.coordinate_of %arg0, field_1, inner2 : (!fir.box, field_2:i32}>>) -> !fir.ref return } // CHECK-LABEL: llvm.func @coordinate_box_derived_2 // CHECK-SAME: (%[[BOX:.*]]: !llvm.ptr) -// CHECK-NEXT: %[[C0_0:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK-NEXT: %[[C1:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[DERIVED_ADDR:.*]] = llvm.getelementptr %[[BOX]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i{{.*}}, i{{.*}}32, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i64>)> // CHECK-NEXT: %[[DERIVED_VAL:.*]] = llvm.load %[[DERIVED_ADDR]] : !llvm.ptr -> !llvm.ptr // CHECK-NEXT: %[[ANOTHER_DERIVED_ADDR:.*]] = llvm.getelementptr %[[DERIVED_VAL]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"derived_2", (struct<"another_derived", (i32, f32)>, i32)> @@ -2683,8 +2677,7 @@ func.func @coordinate_box_array_2d(%arg0: !fir.box>, % // 4. BOX TYPE - `fir.derived` inside `fir.array` func.func @coordinate_box_derived_inside_array(%arg0: !fir.box>>, %arg1 : index) { - %idx0 = fir.field_index field_2, !fir.type - %q = fir.coordinate_of %arg0, %arg1, %idx0 : (!fir.box>>, index, !fir.field) -> !fir.ref + %q = fir.coordinate_of %arg0, %arg1, field_2 : (!fir.box>>, index) -> !fir.ref return } // CHECK-LABEL: llvm.func @coordinate_box_derived_inside_array( @@ -2761,8 +2754,7 @@ func.func @coordinate_array_known_size_2d_get_array(%arg0: !fir.ref>) { - %idx = fir.field_index field_2, !fir.type - %q = fir.coordinate_of %arg0, %idx : (!fir.ref>, !fir.field) -> !fir.ref + %q = fir.coordinate_of %arg0, field_2 : (!fir.ref>) -> !fir.ref return } // CHECK-LABEL: llvm.func @coordinate_ref_derived( @@ -2774,9 +2766,7 @@ func.func @coordinate_ref_derived(%arg0: !fir.ref, field_2:i32}>>) { - %idx0 = fir.field_index field_1, !fir.type, field_2:i32}> - %idx1 = fir.field_index inner2, !fir.type - %q = fir.coordinate_of %arg0, %idx0, %idx1 : (!fir.ref, field_2:i32}>>, !fir.field, !fir.field) -> !fir.ref + %q = fir.coordinate_of %arg0, field_1, inner2 : (!fir.ref, field_2:i32}>>) -> !fir.ref return } // CHECK-LABEL: llvm.func @coordinate_ref_derived_nested( @@ -2788,15 +2778,15 @@ func.func @coordinate_ref_derived_nested(%arg0: !fir.ref>) { +func.func @test_coordinate_of_char(%arr : !fir.ref>) { %1 = arith.constant 10 : i32 - %2 = fir.coordinate_of %arr, %1 : (!fir.ref>, i32) -> !fir.ref> + %2 = fir.coordinate_of %arr, %1 : (!fir.ref>, i32) -> !fir.ref> return } // CHECK-LABEL: llvm.func @test_coordinate_of_char( // CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) { // CHECK: %[[VAL_1:.*]] = llvm.mlir.constant(10 : i32) : i32 -// CHECK: %[[VAL_2:.*]] = llvm.getelementptr %[[VAL_0]]{{\[}}%[[VAL_1]]] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<2 x i80> +// CHECK: %[[VAL_2:.*]] = llvm.getelementptr %[[VAL_0]]{{\[}}0, %[[VAL_1]]] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<10 x i16> // CHECK: llvm.return // CHECK: } diff --git a/flang/test/Fir/dispatch.f90 b/flang/test/Fir/dispatch.f90 index 2ffdcd5b1884d..2b1ae225986ca 100644 --- a/flang/test/Fir/dispatch.f90 +++ b/flang/test/Fir/dispatch.f90 @@ -200,15 +200,12 @@ program test_type_to_class ! Check dynamic dispatch equal to `call p%display2()` with binding index = 2. ! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG_DECL]]#0 : ([[CLASS]]) -> !fir.tdesc ! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> -! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] -! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], binding : (!fir.ref<[[TYPEINFO]]>) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> ! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> ! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> ! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c2{{.*}} : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> -! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> -! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] -! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], proc : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], __address ! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref ! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]]) -> ()) ! CHECK: fir.call %[[FUNC_PTR]](%[[ARG_DECL]]#0) : (!fir.class>) -> () @@ -216,15 +213,12 @@ program test_type_to_class ! Check dynamic dispatch equal to `call p%display1()` with binding index = 1. ! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG_DECL]]#0 : ([[CLASS]]) -> !fir.tdesc ! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> -! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] -! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], binding : (!fir.ref<[[TYPEINFO]]>) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> ! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> ! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> ! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c1{{.*}} : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> -! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> -! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] -! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], proc : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], __address ! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref ! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]]) -> ()) ! CHECK: fir.call %[[FUNC_PTR]](%[[ARG_DECL]]#0) : (!fir.class>) -> () @@ -232,15 +226,12 @@ program test_type_to_class ! Check dynamic dispatch equal to `call p%aproc()` with binding index = 0. ! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG_DECL]]#0 : ([[CLASS]]) -> !fir.tdesc ! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> -! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] -! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], binding : (!fir.ref<[[TYPEINFO]]>) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> ! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> ! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> ! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c0{{.*}}: (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> -! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> -! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] -! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], proc : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], __address ! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref ! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]]) -> ()) ! CHECK: fir.call %[[FUNC_PTR]](%[[ARG_DECL]]#0) : (!fir.class>) -> () @@ -248,15 +239,12 @@ program test_type_to_class ! Check dynamic dispatch of a function with result. ! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG_DECL]]#0 : ([[CLASS]]) -> !fir.tdesc ! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> -! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] -! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], binding : (!fir.ref<[[TYPEINFO]]>) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> ! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> ! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> ! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c3 : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> -! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> -! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] -! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], proc : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], __address ! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref ! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]]) -> i32) ! CHECK: %[[RES:.*]] = fir.call %[[FUNC_PTR]](%[[ARG_DECL]]#0) : (!fir.class>) -> i32 @@ -264,15 +252,12 @@ program test_type_to_class ! Check dynamic dispatch of call with passed-object and additional argument ! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG_DECL]]#0 : ([[CLASS]]) -> !fir.tdesc ! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> -! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] -! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], binding : (!fir.ref<[[TYPEINFO]]>) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> ! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> ! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> ! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c6{{.*}} : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> -! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> -! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] -! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], proc : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], __address ! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref ! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]], !fir.ref) -> ()) ! CHECK: fir.call %[[FUNC_PTR]](%[[ARG_DECL]]#0, %{{.*}}) : (!fir.class>, !fir.ref) -> () @@ -280,30 +265,24 @@ program test_type_to_class ! Check dynamic dispatch of a call with NOPASS ! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG_DECL]]#1 : ([[CLASS]]) -> !fir.tdesc ! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> -! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] -! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], binding : (!fir.ref<[[TYPEINFO]]>) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> ! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref>>>> ! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : (!fir.box>> ! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c4{{.*}} : (!fir.ptr>>, index) -> !fir.ref> -! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> -! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] -! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], proc : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], __address ! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref ! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (() -> ()) ! CHECK: fir.call %[[FUNC_PTR]]() : () -> () ! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG_DECL]]#0 : ([[CLASS]]) -> !fir.tdesc ! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> -! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] -! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], binding : (!fir.ref<[[TYPEINFO]]>) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> ! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> ! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> ! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c5{{.*}} : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> -! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> -! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] -! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], proc : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], __address ! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref ! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> ((!fir.ref, [[CLASS]]) -> ()) ! CHECK: fir.call %[[FUNC_PTR]](%{{.*}}, %[[ARG_DECL]]#0) : (!fir.ref, [[CLASS]]) -> () diff --git a/flang/test/Fir/field-index.fir b/flang/test/Fir/field-index.fir index 4f2551b380d55..55d173201f29a 100644 --- a/flang/test/Fir/field-index.fir +++ b/flang/test/Fir/field-index.fir @@ -1,4 +1,4 @@ -// Test fir.field_index llvm code generation +// Test llvm code generation of fir.coordinate_of with field names // RUN: fir-opt %s | tco | FileCheck %s @@ -9,9 +9,8 @@ // CHECK-LABEL: @simple_field // CHECK-SAME: (ptr captures(none) %[[arg0:.*]]) func.func @simple_field(%arg0: !fir.ref>) -> i32 { - %1 = fir.field_index i, !fir.type // CHECK: %[[GEP:.*]] = getelementptr %a, ptr %[[arg0]], i32 0, i32 1 - %2 = fir.coordinate_of %arg0, %1 : (!fir.ref>, !fir.field) -> !fir.ref + %2 = fir.coordinate_of %arg0, i : (!fir.ref>) -> !fir.ref // CHECK: load i32, ptr %[[GEP]] %3 = fir.load %2 : !fir.ref return %3 : i32 @@ -20,10 +19,8 @@ func.func @simple_field(%arg0: !fir.ref>) -> i32 { // CHECK-LABEL: @derived_field // CHECK-SAME: (ptr captures(none) %[[arg0:.*]]) func.func @derived_field(%arg0: !fir.ref}>>) -> i32 { - %1 = fir.field_index some_b, !fir.type}> - %2 = fir.field_index i, !fir.type // CHECK: %[[GEP:.*]] = getelementptr %c, ptr %[[arg0]], i32 0, i32 1, i32 1 - %3 = fir.coordinate_of %arg0, %1, %2 : (!fir.ref}>>, !fir.field, !fir.field) -> !fir.ref + %3 = fir.coordinate_of %arg0, some_b, i : (!fir.ref}>>) -> !fir.ref // CHECK: load i32, ptr %[[GEP]] %4 = fir.load %3 : !fir.ref return %4 : i32 diff --git a/flang/test/Fir/pdt.fir b/flang/test/Fir/pdt.fir index ce1fb7a379b8b..a200cd7e7cc03 100644 --- a/flang/test/Fir/pdt.fir +++ b/flang/test/Fir/pdt.fir @@ -49,8 +49,7 @@ func.func @_QQmain(%arg0 : i32, %arg1 : i16) { // CHECK: %[[size:.*]] = call i64 @_QTtP.mem.size(i32 %0, i16 %1) // CHECK: %[[alloc:.*]] = alloca i8, i64 %[[size]] %0 = fir.alloca !fir.type<_QTt(p1:i32,p2:i16){f1:i32,f2:f32}>(%arg0, %arg1 : i32, i16) {name = "_QEvar"} - %1 = fir.field_index f1, !fir.type<_QTt(p1:i32,p2:i16){f1:i32,f2:f32}>(%arg0, %arg1 : i32, i16) - %2 = fir.coordinate_of %0, %1 : (!fir.ref>, !fir.field) -> !fir.ref + %2 = fir.coordinate_of %0, f1 : (!fir.ref>) -> !fir.ref %c4_i32 = arith.constant 4 : i32 fir.store %c4_i32 to %2 : !fir.ref return @@ -102,8 +101,7 @@ func.func @_QPfoo(%arg0 : i32, %arg1 : i32) { // CHECK: %[[size:.*]] = call i64 @_QTt1P.mem.size(i32 %0, i32 %1) // CHECK: %[[alloc:.*]] = alloca i8, i64 %[[size]] %0 = fir.alloca !fir.type<_QTt1(p1:i32,p2:i32){f1:!fir.char<1,?>,f2:!fir.char<1,?>}>(%arg0, %arg1 : i32, i32) - %1 = fir.field_index f2, !fir.type<_QTt1>(%arg0, %arg1 : i32, i32) - //%2 = fir.coordinate_of %0, %1 : (!fir.ref>, !fir.field) -> !fir.ref> + //%2 = fir.coordinate_of %0, f2 : (!fir.ref>) -> !fir.ref> %2 = fir.zero_bits !fir.ref> fir.call @bar(%2) : (!fir.ref>) -> () return diff --git a/flang/test/HLFIR/assign-codegen-derived.fir b/flang/test/HLFIR/assign-codegen-derived.fir index c45c118ed46c5..9bba0d31a6ea6 100644 --- a/flang/test/HLFIR/assign-codegen-derived.fir +++ b/flang/test/HLFIR/assign-codegen-derived.fir @@ -12,8 +12,8 @@ func.func @test_simple(%a: !fir.ref, %b: !fir.ref) { } // CHECK-LABEL: func.func @test_simple( // CHECK-NOT: Destroy -// CHECK: %[[VAL_1:.*]] = fir.coordinate_of %{{.*}}, %{{.*}} : (!fir.ref>, !fir.field) -> !fir.ref -// CHECK: %[[VAL_3:.*]] = fir.coordinate_of %{{.*}}, %{{.*}} : (!fir.ref>, !fir.field) -> !fir.ref +// CHECK: %[[VAL_1:.*]] = fir.coordinate_of %{{.*}}, i : (!fir.ref>) -> !fir.ref +// CHECK: %[[VAL_3:.*]] = fir.coordinate_of %{{.*}}, i : (!fir.ref>) -> !fir.ref // CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_1]] : !fir.ref // CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref diff --git a/flang/test/HLFIR/c_ptr_byvalue.f90 b/flang/test/HLFIR/c_ptr_byvalue.f90 index b2c8da5e22579..f39059a8cfa8d 100644 --- a/flang/test/HLFIR/c_ptr_byvalue.f90 +++ b/flang/test/HLFIR/c_ptr_byvalue.f90 @@ -2,8 +2,7 @@ ! CHECK-LABEL: func.func @_QPtest1() { ! CHECK: %[[VAL_110:.*]]:3 = hlfir.associate %{{.*}} {uniq_name = "adapt.cptrbyval"} : (!hlfir.expr>) -> (!fir.ref>, !fir.ref>, i1) -! CHECK: %[[VAL_111:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_112:.*]] = fir.coordinate_of %[[VAL_110]]#1, %[[VAL_111]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_112:.*]] = fir.coordinate_of %[[VAL_110]]#1, __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_113:.*]] = fir.load %[[VAL_112]] : !fir.ref ! CHECK: %[[VAL_114:.*]] = fir.convert %[[VAL_113]] : (i64) -> !fir.ref ! CHECK: hlfir.end_associate %[[VAL_110]]#1, %[[VAL_110]]#2 : !fir.ref>, i1 @@ -24,8 +23,7 @@ end subroutine get_expected_f ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> {fir.bindc_name = "cptr"}) { ! CHECK: %[[DSCOPE:.*]] = fir.dummy_scope : !fir.dscope ! CHECK: %[[VAL_97:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[DSCOPE]] {uniq_name = "_QFtest2Ecptr"} : (!fir.ref>, !fir.dscope) -> (!fir.ref>, !fir.ref>) -! CHECK: %[[VAL_98:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_99:.*]] = fir.coordinate_of %[[VAL_97]]#0, %[[VAL_98]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_99:.*]] = fir.coordinate_of %[[VAL_97]]#0, __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_100:.*]] = fir.load %[[VAL_99]] : !fir.ref ! CHECK: %[[VAL_101:.*]] = fir.convert %[[VAL_100]] : (i64) -> !fir.ref ! CHECK: fir.call @get_expected_f(%[[VAL_101]]) proc_attrs fastmath : (!fir.ref) -> () diff --git a/flang/test/HLFIR/designate-codegen-component-refs.fir b/flang/test/HLFIR/designate-codegen-component-refs.fir index 0e9d81f5cff8b..278a7be0e2da1 100644 --- a/flang/test/HLFIR/designate-codegen-component-refs.fir +++ b/flang/test/HLFIR/designate-codegen-component-refs.fir @@ -10,8 +10,7 @@ func.func @test_scalar(%arg0: !fir.ref> // CHECK-LABEL: func.func @test_scalar( // CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>) { // CHECK: %[[VAL_1:.*]] = fir.declare %[[VAL_0]] {uniq_name = "a"} -// CHECK: %[[VAL_2:.*]] = fir.field_index scalar_x, !fir.type -// CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_2]] : (!fir.ref>, !fir.field) -> !fir.ref +// CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1]], scalar_x : (!fir.ref>) -> !fir.ref func.func @test_array_char_comp_1(%arg0: !fir.ref>}>>) { %0:2 = hlfir.declare %arg0 {uniq_name = "a"} : (!fir.ref>}>>) -> (!fir.ref>}>>, !fir.ref>}>>) @@ -29,8 +28,7 @@ func.func @test_array_char_comp_1(%arg0: !fir.ref !fir.shape<2> // CHECK: %[[VAL_5:.*]] = arith.constant 5 : index -// CHECK: %[[VAL_6:.*]] = fir.field_index array_char_comp, !fir.type>}> -// CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_6]] : (!fir.ref>}>>, !fir.field) -> !fir.ref>> +// CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_1]], array_char_comp : (!fir.ref>}>>) -> !fir.ref>> func.func @test_array(%arg0: !fir.box>>) { %0:2 = hlfir.declare %arg0 {uniq_name = "a"} : (!fir.box>>) -> (!fir.box>>, !fir.box>>) @@ -189,8 +187,7 @@ func.func @test_array_comp_slice(%arg0: !fir.ref !fir.shape<2> -// CHECK: %[[VAL_9:.*]] = fir.field_index array_comp, !fir.type}> -// CHECK: %[[VAL_10:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_9]] : (!fir.ref}>>, !fir.field) -> !fir.ref> +// CHECK: %[[VAL_10:.*]] = fir.coordinate_of %[[VAL_1]], array_comp : (!fir.ref}>>) -> !fir.ref> // CHECK: %[[VAL_11:.*]] = fir.array_coor %[[VAL_10]](%[[VAL_4]]) %[[VAL_5]], %[[VAL_6]] : (!fir.ref>, !fir.shape<2>, index, index) -> !fir.ref // CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (!fir.ref) -> !fir.ref> @@ -219,8 +216,7 @@ func.func @test_array_comp_non_contiguous_slice(%arg0: !fir.ref !fir.shape<2> -// CHECK: %[[VAL_10:.*]] = fir.field_index array_comp, !fir.type}> -// CHECK: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_10]] : (!fir.ref}>>, !fir.field) -> !fir.ref> +// CHECK: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_1]], array_comp : (!fir.ref}>>) -> !fir.ref> // CHECK: %[[VAL_12:.*]] = fir.undefined index // CHECK: %[[VAL_13:.*]] = fir.slice %[[VAL_5]], %[[VAL_6]], %[[VAL_5]], %[[VAL_7]], %[[VAL_3]], %[[VAL_5]] : (index, index, index, index, index, index) -> !fir.slice<2> // CHECK: %[[VAL_14:.*]] = fir.embox %[[VAL_11]](%[[VAL_4]]) {{\[}}%[[VAL_13]]] : (!fir.ref>, !fir.shape<2>, !fir.slice<2>) -> !fir.box> diff --git a/flang/test/Integration/OpenMP/map-types-and-sizes.f90 b/flang/test/Integration/OpenMP/map-types-and-sizes.f90 index e0221ef254192..70ae353ced214 100644 --- a/flang/test/Integration/OpenMP/map-types-and-sizes.f90 +++ b/flang/test/Integration/OpenMP/map-types-and-sizes.f90 @@ -504,10 +504,10 @@ end subroutine mapType_common_block_members !CHECK-LABEL: define {{.*}} @{{.*}}maptype_derived_type_alloca_{{.*}} !CHECK: %[[ALLOCATABLE_DESC_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 !CHECK: %[[ALLOCA:.*]] = alloca %_QFmaptype_derived_type_allocaTone_layer, i64 1, align 8 +!CHECK: %[[MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_derived_type_allocaTone_layer, ptr %[[ALLOCA]], i32 0, i32 4 !CHECK: %[[DESC_BOUND_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ALLOCATABLE_DESC_ALLOCA]], i32 0, i32 7, i64 0, i32 1 !CHECK: %[[DESC_BOUND_ACCESS_LOAD:.*]] = load i64, ptr %[[DESC_BOUND_ACCESS]], align 8 !CHECK: %[[OFFSET_UB:.*]] = sub i64 %[[DESC_BOUND_ACCESS_LOAD]], 1 -!CHECK: %[[MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_derived_type_allocaTone_layer, ptr %[[ALLOCA]], i32 0, i32 4 !CHECK: %[[MEMBER_DESCRIPTOR_BASE_ADDR:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[MEMBER_ACCESS]], i32 0, i32 0 !CHECK: %[[CALCULATE_DIM_SIZE:.*]] = sub i64 %[[OFFSET_UB]], 0 !CHECK: %[[RESTORE_OFFSET:.*]] = add i64 %[[CALCULATE_DIM_SIZE]], 1 @@ -549,12 +549,12 @@ end subroutine mapType_common_block_members !CHECK: %[[DTYPE_ARRAY_MEMBER_DESC_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 !CHECK: %[[DTYPE_DESC_ALLOCA_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8 !CHECK: %[[DTYPE_DESC_ALLOCA_3:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1, align 8 -!CHECK: %[[ACCESS_DESC_MEMBER_UB:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_ARRAY_MEMBER_DESC_ALLOCA]], i32 0, i32 7, i64 0, i32 1 -!CHECK: %[[LOAD_DESC_MEMBER_UB:.*]] = load i64, ptr %[[ACCESS_DESC_MEMBER_UB]], align 8 -!CHECK: %[[OFFSET_MEMBER_UB:.*]] = sub i64 %[[LOAD_DESC_MEMBER_UB]], 1 !CHECK: %[[DTYPE_BASE_ADDR_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_2]], i32 0, i32 0 !CHECK: %[[DTYPE_BASE_ADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS]], align 8 !CHECK: %[[DTYPE_ALLOCA_MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_alloca_derived_typeTone_layer, ptr %[[DTYPE_BASE_ADDR_LOAD]], i32 0, i32 4 +!CHECK: %[[ACCESS_DESC_MEMBER_UB:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_ARRAY_MEMBER_DESC_ALLOCA]], i32 0, i32 7, i64 0, i32 1 +!CHECK: %[[LOAD_DESC_MEMBER_UB:.*]] = load i64, ptr %[[ACCESS_DESC_MEMBER_UB]], align 8 +!CHECK: %[[OFFSET_MEMBER_UB:.*]] = sub i64 %[[LOAD_DESC_MEMBER_UB]], 1 !CHECK: %[[DTYPE_ALLOCA_MEMBER_BASE_ADDR_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_ALLOCA_MEMBER_ACCESS]], i32 0, i32 0 !CHECK: %[[DTYPE_BASE_ADDR_ACCESS_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA]], i32 0, i32 0 !CHECK: %[[DTYPE_BASE_ADDR_LOAD_2:.*]] = load ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS_2]], align 8 @@ -729,13 +729,12 @@ end subroutine mapType_common_block_members !CHECK: %[[ALLOCA_1:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 !CHECK: %[[ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, align 8 !CHECK: %[[BASE_PTR_1:.*]] = alloca %_QFmaptype_nested_derived_type_member_idxTdtype, i64 1, align 8 -!CHECK: %{{.*}} = getelementptr %_QFmaptype_nested_derived_type_member_idxTdtype, ptr %[[BASE_PTR_1]], i32 0, i32 1 +!CHECK: %[[OFF_PTR_1:.*]] = getelementptr %_QFmaptype_nested_derived_type_member_idxTdtype, ptr %[[BASE_PTR_1]], i32 0, i32 1 !CHECK: %[[BOUNDS_ACC:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[ALLOCA]], i32 0, i32 7, i64 0, i32 1 !CHECK: %[[BOUNDS_LD:.*]] = load i64, ptr %[[BOUNDS_ACC]], align 8 !CHECK: %[[BOUNDS_ACC_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ALLOCA_1]], i32 0, i32 7, i64 0, i32 1 !CHECK: %[[BOUNDS_LD_2:.*]] = load i64, ptr %[[BOUNDS_ACC_2]], align 8 !CHECK: %[[BOUNDS_CALC:.*]] = sub i64 %[[BOUNDS_LD_2]], 1 -!CHECK: %[[OFF_PTR_1:.*]] = getelementptr %_QFmaptype_nested_derived_type_member_idxTdtype, ptr %[[BASE_PTR_1]], i32 0, i32 1 !CHECK: %[[OFF_PTR_CALC_0:.*]] = sub i64 %[[BOUNDS_LD]], 1 !CHECK: %[[OFF_PTR_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[OFF_PTR_1]], i32 0, i32 0 !CHECK: %[[GEP_DESC_PTR:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[ALLOCA_0]], i32 0, i32 0 diff --git a/flang/test/Lower/CUDA/cuda-cdevloc.cuf b/flang/test/Lower/CUDA/cuda-cdevloc.cuf index a71490207909a..d663e6eda478b 100644 --- a/flang/test/Lower/CUDA/cuda-cdevloc.cuf +++ b/flang/test/Lower/CUDA/cuda-cdevloc.cuf @@ -12,10 +12,8 @@ end ! CHECK: %[[A1:.*]] = hlfir.designate %[[A]]#0 (%c1{{.*}}) : (!fir.ref>, index) -> !fir.ref ! CHECK: %[[BOX:.*]] = fir.embox %[[A1]] : (!fir.ref) -> !fir.box ! CHECK: %[[CDEVPTR:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> -! CHECK: %[[FIELD_CPTR:.*]] = fir.field_index cptr, !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> -! CHECK: %[[COORD_CPTR:.*]] = fir.coordinate_of %[[CDEVPTR]], %[[FIELD_CPTR]] : (!fir.ref}>>, !fir.field) -> !fir.ref> -! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[COORD_ADDRESS:.*]] = fir.coordinate_of %[[COORD_CPTR]], %[[FIELD_ADDRESS]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_CPTR:.*]] = fir.coordinate_of %[[CDEVPTR]], cptr : (!fir.ref}>>) -> !fir.ref> +! CHECK: %[[COORD_ADDRESS:.*]] = fir.coordinate_of %[[COORD_CPTR]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX]] : (!fir.box) -> !fir.ref ! CHECK: %[[ADDRESS_A1:.*]] = fir.convert %[[BOX_ADDR]] : (!fir.ref) -> i64 ! CHECK: fir.store %[[ADDRESS_A1]] to %[[COORD_ADDRESS]] : !fir.ref diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf index cbddcd79c6333..1c03a76cae76a 100644 --- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf +++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf @@ -393,4 +393,4 @@ end subroutine ! CHECK: %[[ALLOC_TMP:.*]] = fir.allocmem !fir.array<10xi32> {bindc_name = ".tmp", uniq_name = ""} ! CHECK: %[[TMP:.*]]:2 = hlfir.declare %[[ALLOC_TMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> (!fir.heap>, !fir.heap>) ! CHECK: cuf.data_transfer %[[ADEV_DECL]]#1 to %[[TMP]]#0 {transfer_kind = #cuf.cuda_transfer} : !fir.ref>, !fir.heap> -! CHECL: hlfir.assign +! CHECK: hlfir.assign diff --git a/flang/test/Lower/CUDA/cuda-devptr.cuf b/flang/test/Lower/CUDA/cuda-devptr.cuf index 0a9087cf6c133..2d6af2a9693a4 100644 --- a/flang/test/Lower/CUDA/cuda-devptr.cuf +++ b/flang/test/Lower/CUDA/cuda-devptr.cuf @@ -48,10 +48,8 @@ end ! CHECK-LABEL: func.func @_QPsub2() ! CHECK: %[[X:.*]] = fir.declare %{{.*}} {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub2Ex"} : (!fir.ref>>>) -> !fir.ref>>> -! CHECK: %[[CPTR:.*]] = fir.field_index cptr, !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{{[<]?}}{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}{{[>]?}}> -! CHECK: %[[CPTR_COORD:.*]] = fir.coordinate_of %{{.*}}, %[[CPTR]] : (!fir.ref}{{[>]?}}>>, !fir.field) -> !fir.ref> -! CHECK: %[[ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[ADDRESS_COORD:.*]] = fir.coordinate_of %[[CPTR_COORD]], %[[ADDRESS]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[CPTR_COORD:.*]] = fir.coordinate_of %{{.*}}, cptr : (!fir.ref}{{[>]?}}>>) -> !fir.ref> +! CHECK: %[[ADDRESS_COORD:.*]] = fir.coordinate_of %[[CPTR_COORD]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[ADDRESS_LOADED:.*]] = fir.load %[[ADDRESS_COORD]] : !fir.ref ! CHECK: %[[ADDRESS_IDX:.*]] = fir.convert %[[ADDRESS_LOADED]] : (i64) -> !fir.ptr> ! CHECK: %[[EMBOX:.*]] = fir.embox %[[ADDRESS_IDX]](%{{.*}}) : (!fir.ptr>, !fir.shape<1>) -> !fir.box>> @@ -68,14 +66,10 @@ end subroutine ! CHECK-LABEL: func.func @_QPassign_c_devptr ! CHECK: %[[P:.*]] = fir.declare %arg0 dummy_scope %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFassign_c_devptrEp"} ! CHECK: %[[C_DEVLOC_RES:.*]] = fir.declare %15 {uniq_name = ".tmp.intrinsic_result"} : (!fir.ref}>>) -> !fir.ref}>> -! CHECK: %[[CPTR_FIELD:.*]] = fir.field_index cptr, !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> -! CHECK: %[[RES_CPTR_COORD:.*]] = fir.coordinate_of %[[C_DEVLOC_RES]], %[[CPTR_FIELD]] : (!fir.ref}>>, !fir.field) -> !fir.ref> -! CHECK: %[[CPTR_FIELD:.*]] = fir.field_index cptr, !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> -! CHECK: %[[P_CPTR_COORD:.*]] = fir.coordinate_of %[[P]], %[[CPTR_FIELD]] : (!fir.ref}>>, !fir.field) -> !fir.ref> -! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[RES_ADDR_COORD:.*]] = fir.coordinate_of %[[RES_CPTR_COORD]], %[[ADDRESS_FIELD]] : (!fir.ref>, !fir.field) -> !fir.ref -! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[P_ADDR_COORD:.*]] = fir.coordinate_of %[[P_CPTR_COORD]], %[[ADDRESS_FIELD]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[RES_CPTR_COORD:.*]] = fir.coordinate_of %[[C_DEVLOC_RES]], cptr : (!fir.ref}>>) -> !fir.ref> +! CHECK: %[[P_CPTR_COORD:.*]] = fir.coordinate_of %[[P]], cptr : (!fir.ref}>>) -> !fir.ref> +! CHECK: %[[RES_ADDR_COORD:.*]] = fir.coordinate_of %[[RES_CPTR_COORD]], __address : (!fir.ref>) -> !fir.ref +! CHECK: %[[P_ADDR_COORD:.*]] = fir.coordinate_of %[[P_CPTR_COORD]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[ADDR:.*]] = fir.load %[[RES_ADDR_COORD]] : !fir.ref ! CHECK: fir.store %[[ADDR]] to %[[P_ADDR_COORD]] : !fir.ref diff --git a/flang/test/Lower/HLFIR/assumed-rank-inquiries.f90 b/flang/test/Lower/HLFIR/assumed-rank-inquiries.f90 index 6a44cbd86e80d..d55ebaaad99eb 100644 --- a/flang/test/Lower/HLFIR/assumed-rank-inquiries.f90 +++ b/flang/test/Lower/HLFIR/assumed-rank-inquiries.f90 @@ -346,8 +346,7 @@ subroutine c_loc_2(x) ! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope ! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFc_loc_1Ex"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) ! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_4]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_3]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_6:.*]] = fir.box_addr %[[VAL_2]]#0 : (!fir.box>) -> !fir.ref> ! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (!fir.ref>) -> i64 ! CHECK: fir.store %[[VAL_7]] to %[[VAL_5]] : !fir.ref @@ -367,8 +366,7 @@ subroutine c_loc_2(x) ! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFc_loc_2Ex"} : (!fir.ref>>>, !fir.dscope) -> (!fir.ref>>>, !fir.ref>>>) ! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref>>> ! CHECK: %[[VAL_4:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_5:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_6:.*]] = fir.coordinate_of %[[VAL_4]], %[[VAL_5]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_6:.*]] = fir.coordinate_of %[[VAL_4]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_7:.*]] = fir.box_addr %[[VAL_3]] : (!fir.box>>) -> !fir.ptr> ! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (!fir.ptr>) -> i64 ! CHECK: fir.store %[[VAL_8]] to %[[VAL_6]] : !fir.ref diff --git a/flang/test/Lower/HLFIR/c_ptr-constant-init.f90 b/flang/test/Lower/HLFIR/c_ptr-constant-init.f90 index fcf2d1e31475d..1797d473fda0d 100644 --- a/flang/test/Lower/HLFIR/c_ptr-constant-init.f90 +++ b/flang/test/Lower/HLFIR/c_ptr-constant-init.f90 @@ -15,7 +15,6 @@ end subroutine test ! CHECK: %[[VAL_1:.*]] = fir.field_index d, !fir.type<_QFtestTt1{d:!fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>}> ! CHECK: %[[VAL_2:.*]] = fir.undefined !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>> ! CHECK: %[[VAL_3:.*]] = fir.undefined !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK: %[[VAL_5:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_6:.*]] = fir.insert_value %[[VAL_3]], %[[VAL_5]], ["__address", !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>] : (!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>, i64) -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK: %[[VAL_7:.*]] = fir.insert_value %[[VAL_2]], %[[VAL_6]], [0 : index] : (!fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) -> !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>> @@ -39,7 +38,6 @@ end subroutine test2 ! CHECK: %[[VAL_1:.*]] = fir.field_index d, !fir.type<_QFtest2Tt1{d:!fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>>}> ! CHECK: %[[VAL_2:.*]] = fir.undefined !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>> ! CHECK: %[[VAL_3:.*]] = fir.undefined !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> ! CHECK: %[[VAL_5:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_6:.*]] = fir.insert_value %[[VAL_3]], %[[VAL_5]], ["__address", !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>] : (!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>, i64) -> !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> ! CHECK: %[[VAL_7:.*]] = fir.insert_value %[[VAL_2]], %[[VAL_6]], [0 : index] : (!fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>>, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>) -> !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>> diff --git a/flang/test/Lower/HLFIR/intrinsic-module-procedures.f90 b/flang/test/Lower/HLFIR/intrinsic-module-procedures.f90 index 6406d00bebb59..8a5a52be68019 100644 --- a/flang/test/Lower/HLFIR/intrinsic-module-procedures.f90 +++ b/flang/test/Lower/HLFIR/intrinsic-module-procedures.f90 @@ -16,8 +16,7 @@ subroutine foo(cptr, x) ! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare {{.*}}Ex" ! CHECK: %[[VAL_4:.*]] = fir.embox %[[VAL_3]]#1 : (!fir.ref) -> !fir.box ! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_6:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_5]], %[[VAL_6]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_5]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_8:.*]] = fir.box_addr %[[VAL_4]] : (!fir.box) -> !fir.ref ! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (!fir.ref) -> i64 ! CHECK: fir.store %[[VAL_9]] to %[[VAL_7]] : !fir.ref diff --git a/flang/test/Lower/HLFIR/type-info-components.f90 b/flang/test/Lower/HLFIR/type-info-components.f90 index ee36f9cf6588f..9faf35656166e 100644 --- a/flang/test/Lower/HLFIR/type-info-components.f90 +++ b/flang/test/Lower/HLFIR/type-info-components.f90 @@ -17,7 +17,7 @@ subroutine test_1(x) type(sometype) :: x end subroutine ! CHECK-LABEL: fir.type_info @_QFtest_1Tsometype -! CHECK-SAME component_info { +! CHECK-SAME: component_info { ! CHECK: fir.dt_component "i" lbs [-1] init @_QFtest_1E.di.sometype.i ! CHECK-NOT: fir.dt_component "j" ! CHECK: fir.dt_component "p" init @_QFtest_1E.di.sometype.p @@ -35,7 +35,7 @@ subroutine test_nesting(x) type(sometype2) :: x end subroutine ! CHECK-LABEL: fir.type_info @_QFtest_nestingTsome_sub_type -! CHECK-SAME component_info { +! CHECK-SAME: component_info { ! CHECK: fir.dt_component "i" init @_QFtest_nestingE.di.some_sub_type.i ! CHECK: } @@ -50,6 +50,6 @@ subroutine data_like(x) type(sometype3) :: x end subroutine ! CHECK-LABEL: fir.type_info @_QFdata_likeTsometype3 -! CHECK-SAME component_info { +! CHECK-SAME: component_info { ! CHECK: fir.dt_component "i" init @_QFdata_likeE.di.sometype3.i ! CHECK: } diff --git a/flang/test/Lower/Intrinsics/c_associated.f90 b/flang/test/Lower/Intrinsics/c_associated.f90 index ba2d7f130f760..3956957853372 100644 --- a/flang/test/Lower/Intrinsics/c_associated.f90 +++ b/flang/test/Lower/Intrinsics/c_associated.f90 @@ -8,15 +8,13 @@ ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref> {fir.bindc_name = "cptr2"}) { ! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z1", uniq_name = "_QFtest_c_ptrEz1"} ! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z2", uniq_name = "_QFtest_c_ptrEz2"} -! CHECK: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_4]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_7:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_8:.*]] = arith.cmpi ne, %[[VAL_6]], %[[VAL_7]] : i64 ! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4> ! CHECK: fir.store %[[VAL_9]] to %[[VAL_2]] : !fir.ref> -! CHECK: %[[VAL_10:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_10]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_11]] : !fir.ref ! CHECK: %[[VAL_13:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_14:.*]] = arith.cmpi ne, %[[VAL_12]], %[[VAL_13]] : i64 @@ -26,8 +24,7 @@ ! CHECK: %[[VAL_18:.*]] = fir.if %[[VAL_17]] -> (i1) { ! CHECK: fir.result %[[VAL_14]] : i1 ! CHECK: } else { -! CHECK: %[[VAL_19:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_20:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_19]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_20:.*]] = fir.coordinate_of %[[VAL_1]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref ! CHECK: %[[VAL_22:.*]] = arith.cmpi eq, %[[VAL_12]], %[[VAL_21]] : i64 ! CHECK: %[[VAL_23:.*]] = arith.andi %[[VAL_14]], %[[VAL_22]] : i1 @@ -53,15 +50,13 @@ subroutine test_c_ptr(cptr1, cptr2) ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref> {fir.bindc_name = "cptr2"}) { ! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z1", uniq_name = "_QFtest_c_funptrEz1"} ! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z2", uniq_name = "_QFtest_c_funptrEz2"} -! CHECK: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_4]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_7:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_8:.*]] = arith.cmpi ne, %[[VAL_6]], %[[VAL_7]] : i64 ! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4> ! CHECK: fir.store %[[VAL_9]] to %[[VAL_2]] : !fir.ref> -! CHECK: %[[VAL_10:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_10]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_11]] : !fir.ref ! CHECK: %[[VAL_13:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_14:.*]] = arith.cmpi ne, %[[VAL_12]], %[[VAL_13]] : i64 @@ -71,8 +66,7 @@ subroutine test_c_ptr(cptr1, cptr2) ! CHECK: %[[VAL_18:.*]] = fir.if %[[VAL_17]] -> (i1) { ! CHECK: fir.result %[[VAL_14]] : i1 ! CHECK: } else { -! CHECK: %[[VAL_19:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_20:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_19]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_20:.*]] = fir.coordinate_of %[[VAL_1]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref ! CHECK: %[[VAL_22:.*]] = arith.cmpi eq, %[[VAL_12]], %[[VAL_21]] : i64 ! CHECK: %[[VAL_23:.*]] = arith.andi %[[VAL_14]], %[[VAL_22]] : i1 @@ -100,8 +94,7 @@ subroutine test_c_funptr(cptr1, cptr2) ! CHECK-SAME: %[[VAL_3:.*]]: !fir.ref> {fir.bindc_name = "cfunptr2", fir.optional}) { ! CHECK: %[[VAL_4:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z1", uniq_name = "_QFtest_optional_argumentEz1"} ! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z2", uniq_name = "_QFtest_optional_argumentEz2"} -! CHECK: %[[VAL_6:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_6]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_7]] : !fir.ref ! CHECK: %[[VAL_9:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_10:.*]] = arith.cmpi ne, %[[VAL_8]], %[[VAL_9]] : i64 @@ -111,8 +104,7 @@ subroutine test_c_funptr(cptr1, cptr2) ! CHECK: %[[VAL_14:.*]] = fir.if %[[VAL_13]] -> (i1) { ! CHECK: fir.result %[[VAL_10]] : i1 ! CHECK: } else { -! CHECK: %[[VAL_15:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_15]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_1]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref ! CHECK: %[[VAL_18:.*]] = arith.cmpi eq, %[[VAL_8]], %[[VAL_17]] : i64 ! CHECK: %[[VAL_19:.*]] = arith.andi %[[VAL_10]], %[[VAL_18]] : i1 @@ -120,8 +112,7 @@ subroutine test_c_funptr(cptr1, cptr2) ! CHECK: } ! CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_21:.*]] : (i1) -> !fir.logical<4> ! CHECK: fir.store %[[VAL_20]] to %[[VAL_4]] : !fir.ref> -! CHECK: %[[VAL_22:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_23:.*]] = fir.coordinate_of %[[VAL_2]], %[[VAL_22]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_23:.*]] = fir.coordinate_of %[[VAL_2]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref ! CHECK: %[[VAL_25:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_26:.*]] = arith.cmpi ne, %[[VAL_24]], %[[VAL_25]] : i64 @@ -131,8 +122,7 @@ subroutine test_c_funptr(cptr1, cptr2) ! CHECK: %[[VAL_30:.*]] = fir.if %[[VAL_29]] -> (i1) { ! CHECK: fir.result %[[VAL_26]] : i1 ! CHECK: } else { -! CHECK: %[[VAL_31:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_32:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_31]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_32:.*]] = fir.coordinate_of %[[VAL_3]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_33:.*]] = fir.load %[[VAL_32]] : !fir.ref ! CHECK: %[[VAL_34:.*]] = arith.cmpi eq, %[[VAL_24]], %[[VAL_33]] : i64 ! CHECK: %[[VAL_35:.*]] = arith.andi %[[VAL_26]], %[[VAL_34]] : i1 diff --git a/flang/test/Lower/Intrinsics/c_f_pointer.f90 b/flang/test/Lower/Intrinsics/c_f_pointer.f90 index 8e8680777275d..67817e39d5c2b 100644 --- a/flang/test/Lower/Intrinsics/c_f_pointer.f90 +++ b/flang/test/Lower/Intrinsics/c_f_pointer.f90 @@ -6,8 +6,7 @@ ! CHECK-LABEL: func.func @_QPtest_scalar( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> {fir.bindc_name = "cptr"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref>> {fir.bindc_name = "fptr"}) { -! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_2]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]] : !fir.ref ! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i64) -> !fir.ptr ! CHECK: %[[VAL_6:.*]] = fir.embox %[[VAL_5]] : (!fir.ptr) -> !fir.box> @@ -26,8 +25,7 @@ subroutine test_scalar(cptr, fptr) ! CHECK-LABEL: func.func @_QPtest_array( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> {fir.bindc_name = "cptr"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref>>> {fir.bindc_name = "fptr"}) { -! CHECK: %[[VAL_65:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_66:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_65]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_66:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_67:.*]] = fir.load %[[VAL_66]] : !fir.ref ! CHECK: %[[VAL_68:.*]] = fir.convert %[[VAL_67]] : (i64) -> !fir.ptr> ! CHECK: %[[VAL_69:.*]] = arith.constant 0 : index @@ -56,8 +54,7 @@ subroutine test_array(cptr, fptr) ! CHECK-LABEL: func.func @_QPtest_char( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> {fir.bindc_name = "cptr"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref>>> {fir.bindc_name = "fptr"}) { -! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_2]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]] : !fir.ref ! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i64) -> !fir.ptr> ! CHECK: %[[VAL_6:.*]] = fir.embox %[[VAL_5]] : (!fir.ptr>) -> !fir.box>> @@ -81,8 +78,7 @@ subroutine test_char(cptr, fptr) ! CHECK: %[[VAL_8:.*]] = arith.constant 0 : i32 ! CHECK: %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_7]], %[[VAL_8]] : i32 ! CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_9]], %[[VAL_7]], %[[VAL_8]] : i32 -! CHECK: %[[VAL_70:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_71:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_70]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_71:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_72:.*]] = fir.load %[[VAL_71]] : !fir.ref ! CHECK: %[[VAL_73:.*]] = fir.convert %[[VAL_72]] : (i64) -> !fir.ptr>> ! CHECK: %[[VAL_74:.*]] = arith.constant 0 : index diff --git a/flang/test/Lower/Intrinsics/c_f_procpointer.f90 b/flang/test/Lower/Intrinsics/c_f_procpointer.f90 index f8792e4c1be0f..69f3f398cb12e 100644 --- a/flang/test/Lower/Intrinsics/c_f_procpointer.f90 +++ b/flang/test/Lower/Intrinsics/c_f_procpointer.f90 @@ -12,8 +12,7 @@ subroutine test_c_funloc(fptr, cptr) ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref> {fir.bindc_name = "cptr"}) { ! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFtest_c_funlocEcptr"} : (!fir.ref>, !fir.dscope) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_c_funlocEfptr"} : (!fir.ref ()>>, !fir.dscope) -> (!fir.ref ()>>, !fir.ref ()>>) -! CHECK: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_2]]#1, %[[VAL_4]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_2]]#1, __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (i64) -> (() -> ()) ! CHECK: %[[VAL_8:.*]] = fir.emboxproc %[[VAL_7]] : (() -> ()) -> !fir.boxproc<() -> ()> @@ -34,8 +33,7 @@ character(10) function char_func() ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref> {fir.bindc_name = "cptr"}) { ! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFtest_c_funloc_charEcptr"} : (!fir.ref>, !fir.dscope) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_c_funloc_charEfptr"} : (!fir.ref ()>>, !fir.dscope) -> (!fir.ref ()>>, !fir.ref ()>>) -! CHECK: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_2]]#1, %[[VAL_4]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_2]]#1, __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (i64) -> (() -> ()) ! CHECK: %[[VAL_8:.*]] = fir.emboxproc %[[VAL_7]] : (() -> ()) -> !fir.boxproc<() -> ()> diff --git a/flang/test/Lower/Intrinsics/c_funloc-proc-pointers.f90 b/flang/test/Lower/Intrinsics/c_funloc-proc-pointers.f90 index 0f398a346d459..fbd196832ba65 100644 --- a/flang/test/Lower/Intrinsics/c_funloc-proc-pointers.f90 +++ b/flang/test/Lower/Intrinsics/c_funloc-proc-pointers.f90 @@ -11,8 +11,7 @@ subroutine test_c_funloc(p) ! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_c_funlocEp"} : (!fir.ref ()>>, !fir.dscope) -> (!fir.ref ()>>, !fir.ref ()>>) ! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref ()>> ! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_4]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_3]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_6:.*]] = fir.box_addr %[[VAL_2]] : (!fir.boxproc<() -> ()>) -> (() -> ()) ! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (() -> ()) -> i64 ! CHECK: fir.store %[[VAL_7]] to %[[VAL_5]] : !fir.ref @@ -31,8 +30,7 @@ character(10) function char_func() ! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_c_funloc_charEp"} : (!fir.ref ()>>, !fir.dscope) -> (!fir.ref ()>>, !fir.ref ()>>) ! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref ()>> ! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_4]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_3]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_6:.*]] = fir.box_addr %[[VAL_2]] : (!fir.boxproc<() -> ()>) -> (() -> ()) ! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (() -> ()) -> i64 ! CHECK: fir.store %[[VAL_7]] to %[[VAL_5]] : !fir.ref diff --git a/flang/test/Lower/Intrinsics/c_funloc.f90 b/flang/test/Lower/Intrinsics/c_funloc.f90 index 29a0e10e2b94f..93be2215ffef4 100644 --- a/flang/test/Lower/Intrinsics/c_funloc.f90 +++ b/flang/test/Lower/Intrinsics/c_funloc.f90 @@ -9,8 +9,7 @@ ! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> ! CHECK-DAG: %[[VAL_4:.*]] = fir.box_addr %[[VAL_2]] : (!fir.boxproc<(!fir.ref) -> ()>) -> ((!fir.ref) -> ()) ! CHECK-DAG: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : ((!fir.ref) -> ()) -> i64 -! CHECK-DAG: %[[VAL_6:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK-DAG: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_6]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_3]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_5]] to %[[VAL_7]] : !fir.ref subroutine test() diff --git a/flang/test/Lower/Intrinsics/c_loc.f90 b/flang/test/Lower/Intrinsics/c_loc.f90 index f46b80fd9b980..ecd5ce590fd5d 100644 --- a/flang/test/Lower/Intrinsics/c_loc.f90 +++ b/flang/test/Lower/Intrinsics/c_loc.f90 @@ -10,8 +10,7 @@ ! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_4:.*]] = fir.box_addr %[[VAL_2]] : (!fir.box) -> !fir.ref ! CHECK-DAG: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (!fir.ref) -> i64 -! CHECK-DAG: %[[VAL_6:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_6]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_3]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_5]] to %[[VAL_7]] : !fir.ref ! CHECK: } @@ -29,8 +28,7 @@ subroutine c_loc_scalar() ! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_4:.*]] = fir.box_addr %[[VAL_2]] : (!fir.box>) -> !fir.ref> ! CHECK-DAG: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (!fir.ref>) -> i64 -! CHECK-DAG: %[[VAL_6:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_6]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_3]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_5]] to %[[VAL_7]] : !fir.ref ! CHECK: } @@ -62,8 +60,7 @@ subroutine c_loc_char() ! CHECK: %[[VAL_17:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_18:.*]] = fir.box_addr %[[VAL_16]] : (!fir.box>) -> !fir.ref> ! CHECK-DAG: %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (!fir.ref>) -> i64 -! CHECK-DAG: %[[VAL_20:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_21:.*]] = fir.coordinate_of %[[VAL_17]], %[[VAL_20]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_21:.*]] = fir.coordinate_of %[[VAL_17]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_19]] to %[[VAL_21]] : !fir.ref ! CHECK: } @@ -83,8 +80,7 @@ subroutine c_loc_substring() ! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_6:.*]] = fir.box_addr %[[VAL_4]] : (!fir.box>) -> !fir.ref> ! CHECK-DAG: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (!fir.ref>) -> i64 -! CHECK-DAG: %[[VAL_8:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_5]], %[[VAL_8]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_5]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_7]] to %[[VAL_9]] : !fir.ref ! CHECK: } @@ -104,8 +100,7 @@ subroutine c_loc_array ! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_6:.*]] = fir.box_addr %[[VAL_4]] : (!fir.box>>) -> !fir.ref>> ! CHECK-DAG: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (!fir.ref>>) -> i64 -! CHECK-DAG: %[[VAL_8:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_5]], %[[VAL_8]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_5]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_7]] to %[[VAL_9]] : !fir.ref ! CHECK: } @@ -127,8 +122,7 @@ subroutine c_loc_chararray() ! CHECK: %[[VAL_7:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_8:.*]] = fir.box_addr %[[VAL_6]] : (!fir.box) -> !fir.ref ! CHECK-DAG: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (!fir.ref) -> i64 -! CHECK-DAG: %[[VAL_10:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_7]], %[[VAL_10]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_7]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_9]] to %[[VAL_11]] : !fir.ref ! CHECK: } @@ -158,8 +152,7 @@ subroutine c_loc_arrayelement() ! CHECK: %[[VAL_15:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_16:.*]] = fir.box_addr %[[VAL_14]] : (!fir.box>) -> !fir.ref> ! CHECK-DAG: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (!fir.ref>) -> i64 -! CHECK-DAG: %[[VAL_18:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_19:.*]] = fir.coordinate_of %[[VAL_15]], %[[VAL_18]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_19:.*]] = fir.coordinate_of %[[VAL_15]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_17]] to %[[VAL_19]] : !fir.ref ! CHECK: } @@ -196,15 +189,12 @@ subroutine c_loc_arraysection() ! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_1:.*]] : !fir.ref> ! CHECK: %[[VAL_15:.*]] = fir.embox %[[VAL_14:.*]] : (!fir.ptr) -> !fir.box ! CHECK: %[[VAL_16:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_17:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_16:.*]], %[[VAL_17:.*]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_16:.*]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_19:.*]] = fir.box_addr %[[VAL_15:.*]] : (!fir.box) -> !fir.ref ! CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19:.*]] : (!fir.ref) -> i64 ! CHECK: fir.store %[[VAL_20:.*]] to %[[VAL_18:.*]] : !fir.ref -! CHECK: %[[VAL_21:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_22:.*]] = fir.coordinate_of %[[VAL_16:.*]], %[[VAL_21:.*]] : (!fir.ref>, !fir.field) -> !fir.ref -! CHECK: %[[VAL_23:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_3:.*]], %[[VAL_23:.*]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_22:.*]] = fir.coordinate_of %[[VAL_16:.*]], __address : (!fir.ref>) -> !fir.ref +! CHECK: %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_3:.*]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_22:.*]] : !fir.ref ! CHECK: fir.store %[[VAL_25:.*]] to %[[VAL_24:.*]] : !fir.ref ! CHECK: return @@ -227,8 +217,7 @@ subroutine c_loc_non_save_pointer_scalar() ! CHECK: %[[VAL_10:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_11:.*]] = fir.box_addr %[[VAL_9]] : (!fir.box) -> !fir.ref ! CHECK-DAG: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (!fir.ref) -> i64 -! CHECK-DAG: %[[VAL_13:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_14:.*]] = fir.coordinate_of %[[VAL_10]], %[[VAL_13]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_14:.*]] = fir.coordinate_of %[[VAL_10]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_12]] to %[[VAL_14]] : !fir.ref ! CHECK: } @@ -247,8 +236,7 @@ subroutine c_loc_save_pointer_scalar() ! CHECK: %[[VAL_9:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_10:.*]] = fir.box_addr %[[VAL_8:.*]] : (!fir.box>) -> !fir.ref> ! CHECK-DAG: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (!fir.ref>) -> i64 -! CHECK-DAG: %[[VAL_12:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_13:.*]] = fir.coordinate_of %[[VAL_9]], %[[VAL_12]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_13:.*]] = fir.coordinate_of %[[VAL_9]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_11]] to %[[VAL_13]] : !fir.ref ! CHECK: } @@ -268,8 +256,7 @@ subroutine c_loc_derived_type ! CHECK: %[[VAL_31:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_32:.*]] = fir.box_addr %[[VAL_30:.*]] : (!fir.box>>) -> !fir.ptr> ! CHECK-DAG: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (!fir.ptr>) -> i64 -! CHECK-DAG: %[[VAL_34:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_31]], %[[VAL_34]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_31]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_33]] to %[[VAL_35]] : !fir.ref ! CHECK: } diff --git a/flang/test/Lower/Intrinsics/c_ptr_eq_ne.f90 b/flang/test/Lower/Intrinsics/c_ptr_eq_ne.f90 index c6a2f186e4c12..b304ee924ec57 100644 --- a/flang/test/Lower/Intrinsics/c_ptr_eq_ne.f90 +++ b/flang/test/Lower/Intrinsics/c_ptr_eq_ne.f90 @@ -14,11 +14,9 @@ function test_c_ptr_eq(ptr1, ptr2) ! CHECK: %[[DECL_ARG1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_c_ptr_eqEptr2"} : (!fir.ref>, !fir.dscope) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.logical<4> {bindc_name = "test_c_ptr_eq", uniq_name = "_QFtest_c_ptr_eqEtest_c_ptr_eq"} ! CHECK: %[[DECL_RET:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "_QFtest_c_ptr_eqEtest_c_ptr_eq"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[COORD_ADDRESS0:.*]] = fir.coordinate_of %[[DECL_ARG0]]#1, %[[FIELD_ADDRESS]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_ADDRESS0:.*]] = fir.coordinate_of %[[DECL_ARG0]]#1, __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[ADDRESS0:.*]] = fir.load %[[COORD_ADDRESS0]] : !fir.ref -! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[COORD_ADDRESS1:.*]] = fir.coordinate_of %[[DECL_ARG1]]#1, %[[FIELD_ADDRESS]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_ADDRESS1:.*]] = fir.coordinate_of %[[DECL_ARG1]]#1, __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[ADDRESS1:.*]] = fir.load %[[COORD_ADDRESS1]] : !fir.ref ! CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[ADDRESS0]], %[[ADDRESS1]] : i64 ! CHECK: %[[RES:.*]] = fir.convert %[[CMP]] : (i1) -> !fir.logical<4> @@ -41,11 +39,9 @@ function test_c_ptr_ne(ptr1, ptr2) ! CHECK: %[[DECL_ARG1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_c_ptr_neEptr2"} : (!fir.ref>, !fir.dscope) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.logical<4> {bindc_name = "test_c_ptr_ne", uniq_name = "_QFtest_c_ptr_neEtest_c_ptr_ne"} ! CHECK: %[[DECL_RET:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "_QFtest_c_ptr_neEtest_c_ptr_ne"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[COORD_ADDRESS0:.*]] = fir.coordinate_of %[[DECL_ARG0]]#1, %[[FIELD_ADDRESS]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_ADDRESS0:.*]] = fir.coordinate_of %[[DECL_ARG0]]#1, __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[ADDRESS0:.*]] = fir.load %[[COORD_ADDRESS0]] : !fir.ref -! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[COORD_ADDRESS1:.*]] = fir.coordinate_of %[[DECL_ARG1]]#1, %[[FIELD_ADDRESS]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_ADDRESS1:.*]] = fir.coordinate_of %[[DECL_ARG1]]#1, __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[ADDRESS1:.*]] = fir.load %[[COORD_ADDRESS1]] : !fir.ref ! CHECK: %[[CMP:.*]] = arith.cmpi ne, %[[ADDRESS0]], %[[ADDRESS1]] : i64 ! CHECK: %[[RES:.*]] = fir.convert %[[CMP]] : (i1) -> !fir.logical<4> diff --git a/flang/test/Lower/Intrinsics/ieee_class.f90 b/flang/test/Lower/Intrinsics/ieee_class.f90 index 2c1cdf95275c9..acef959656539 100644 --- a/flang/test/Lower/Intrinsics/ieee_class.f90 +++ b/flang/test/Lower/Intrinsics/ieee_class.f90 @@ -65,23 +65,18 @@ subroutine classify(x) ! CHECK: %[[V_25:[0-9]+]] = arith.ori %[[V_22]], %[[V_24]] : i64 ! CHECK: %[[V_26:[0-9]+]] = fir.address_of(@_FortranAIeeeClassTable) : !fir.ref> ! CHECK: %[[V_27:[0-9]+]] = fir.coordinate_of %[[V_26]], %[[V_25]] : (!fir.ref>, i64) -> !fir.ref> - ! CHECK: %[[V_28:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> - ! CHECK: %[[V_29:[0-9]+]] = fir.coordinate_of %[[V_27]], %[[V_28]] : (!fir.ref>, !fir.field) -> !fir.ref - ! CHECK: %[[V_30:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> - ! CHECK: %[[V_31:[0-9]+]] = fir.coordinate_of %[[V_2]], %[[V_30]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_29:[0-9]+]] = fir.coordinate_of %[[V_27]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref + ! CHECK: %[[V_31:[0-9]+]] = fir.coordinate_of %[[V_2]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_32:[0-9]+]] = fir.load %[[V_29]] : !fir.ref ! CHECK: fir.store %[[V_32]] to %[[V_31]] : !fir.ref r = ieee_class(x) ! if (r==ieee_signaling_nan) call out(x, 1) ! if (r==ieee_quiet_nan) call out(x, 2) - ! CHECK: %[[V_38:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> - ! CHECK: %[[V_39:[0-9]+]] = fir.coordinate_of %[[V_1]], %[[V_38]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_39:[0-9]+]] = fir.coordinate_of %[[V_1]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %c3{{.*}} to %[[V_39]] : !fir.ref - ! CHECK: %[[V_40:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> - ! CHECK: %[[V_41:[0-9]+]] = fir.coordinate_of %[[V_2]], %[[V_40]] : (!fir.ref>, !fir.field) -> !fir.ref - ! CHECK: %[[V_42:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> - ! CHECK: %[[V_43:[0-9]+]] = fir.coordinate_of %[[V_1]], %[[V_42]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_41:[0-9]+]] = fir.coordinate_of %[[V_2]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref + ! CHECK: %[[V_43:[0-9]+]] = fir.coordinate_of %[[V_1]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_44:[0-9]+]] = fir.load %[[V_41]] : !fir.ref ! CHECK: %[[V_45:[0-9]+]] = fir.load %[[V_43]] : !fir.ref ! CHECK: %[[V_46:[0-9]+]] = arith.cmpi eq, %[[V_44]], %[[V_45]] : i8 @@ -111,11 +106,9 @@ program p ! x(2) = ieee_value(x(1), ieee_quiet_nan) ! CHECK: %[[V_0:[0-9]+]] = fir.alloca !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> ! CHECK: %[[V_2:[0-9]+]] = fir.address_of(@_QFEx) : !fir.ref> - ! CHECK: %[[V_8:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> - ! CHECK: %[[V_9:[0-9]+]] = fir.coordinate_of %[[V_0]], %[[V_8]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_9:[0-9]+]] = fir.coordinate_of %[[V_0]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %c3{{.*}} to %[[V_9]] : !fir.ref - ! CHECK: %[[V_10:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> - ! CHECK: %[[V_11:[0-9]+]] = fir.coordinate_of %[[V_0]], %[[V_10]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_11:[0-9]+]] = fir.coordinate_of %[[V_0]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_12:[0-9]+]] = fir.load %[[V_11]] : !fir.ref ! CHECK: %[[V_13:[0-9]+]] = fir.address_of(@_FortranAIeeeValueTable_8) : !fir.ref> ! CHECK: %[[V_14:[0-9]+]] = fir.coordinate_of %[[V_13]], %[[V_12]] : (!fir.ref>, i8) -> !fir.ref diff --git a/flang/test/Lower/Intrinsics/ieee_flag.f90 b/flang/test/Lower/Intrinsics/ieee_flag.f90 index e4addc0d658dc..13ca7ba48a74c 100644 --- a/flang/test/Lower/Intrinsics/ieee_flag.f90 +++ b/flang/test/Lower/Intrinsics/ieee_flag.f90 @@ -33,8 +33,7 @@ ! CHECK: %[[V_80:[0-9]+]] = fir.address_of(@_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0) : !fir.ref> ! CHECK: %[[V_95:[0-9]+]] = fir.declare %[[V_80]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_82:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_flag_type.flag, !fir.type<_QM__fortran_builtinsT__builtin_ieee_flag_type{_QM__fortran_builtinsT__builtin_ieee_flag_type.flag:i8}> - ! CHECK: %[[V_96:[0-9]+]] = fir.coordinate_of %[[V_95]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_96:[0-9]+]] = fir.coordinate_of %[[V_95]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_97:[0-9]+]] = fir.load %[[V_96]] : !fir.ref ! CHECK: %[[V_98:[0-9]+]] = fir.convert %[[V_97]] : (i8) -> i32 ! CHECK: %[[V_99:[0-9]+]] = fir.call @_FortranAMapException(%[[V_98]]) fastmath : (i32) -> i32 @@ -46,7 +45,7 @@ call ieee_set_flag(ieee_invalid, .false.) ! CHECK: %[[V_100:[0-9]+]] = fir.declare %[[V_80]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_101:[0-9]+]] = fir.coordinate_of %[[V_100]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_101:[0-9]+]] = fir.coordinate_of %[[V_100]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_102:[0-9]+]] = fir.load %[[V_101]] : !fir.ref ! CHECK: %[[V_103:[0-9]+]] = fir.convert %[[V_102]] : (i8) -> i32 ! CHECK: %[[V_104:[0-9]+]] = fir.call @_FortranAMapException(%[[V_103]]) fastmath : (i32) -> i32 @@ -60,7 +59,7 @@ print*, 'invalid[F]: ', v ! CHECK: %[[V_118:[0-9]+]] = fir.declare %[[V_80]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_119:[0-9]+]] = fir.coordinate_of %[[V_118]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_119:[0-9]+]] = fir.coordinate_of %[[V_118]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_120:[0-9]+]] = fir.load %[[V_119]] : !fir.ref ! CHECK: %[[V_121:[0-9]+]] = fir.convert %[[V_120]] : (i8) -> i32 ! CHECK: %[[V_122:[0-9]+]] = fir.call @_FortranAMapException(%[[V_121]]) fastmath : (i32) -> i32 @@ -72,7 +71,7 @@ call ieee_set_flag(ieee_invalid, .true.) ! CHECK: %[[V_123:[0-9]+]] = fir.declare %[[V_80]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_124:[0-9]+]] = fir.coordinate_of %[[V_123]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_124:[0-9]+]] = fir.coordinate_of %[[V_123]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_125:[0-9]+]] = fir.load %[[V_124]] : !fir.ref ! CHECK: %[[V_126:[0-9]+]] = fir.convert %[[V_125]] : (i8) -> i32 ! CHECK: %[[V_127:[0-9]+]] = fir.call @_FortranAMapException(%[[V_126]]) fastmath : (i32) -> i32 @@ -89,7 +88,7 @@ ! CHECK: %[[V_141:[0-9]+]] = fir.declare %[[V_140]](%[[V_59]]) {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro.2x_QM__fortran_builtinsT__builtin_ieee_flag_type.1"} : (!fir.ref>>, !fir.shape<1>) -> !fir.ref>> ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c2{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_141]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.convert %[[V_312]] : (i8) -> i32 ! CHECK: %[[V_314:[0-9]+]] = fir.call @_FortranAMapException(%[[V_313]]) fastmath : (i32) -> i32 @@ -106,7 +105,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c2{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_143]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_60]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.call @_FortranAMapException(%[[V_314]]) fastmath : (i32) -> i32 @@ -127,7 +126,7 @@ ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_154]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_156]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref> - ! CHECK: %[[V_313:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_313:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.load %[[V_313]] : !fir.ref ! CHECK: %[[V_315:[0-9]+]] = fir.convert %[[V_314]] : (i8) -> i32 ! CHECK: %[[V_316:[0-9]+]] = fir.call @_FortranAMapException(%[[V_315]]) fastmath : (i32) -> i32 @@ -144,7 +143,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c2{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_157]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_60]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.call @_FortranAMapException(%[[V_314]]) fastmath : (i32) -> i32 @@ -162,7 +161,7 @@ ! CHECK: %[[V_166:[0-9]+]] = fir.declare %[[V_165]](%[[V_54]]) {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro.3x_QM__fortran_builtinsT__builtin_ieee_flag_type.4"} : (!fir.ref>>, !fir.shape<1>) -> !fir.ref>> ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c3{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_166]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.convert %[[V_312]] : (i8) -> i32 ! CHECK: %[[V_314:[0-9]+]] = fir.call @_FortranAMapException(%[[V_313]]) fastmath : (i32) -> i32 @@ -178,7 +177,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c3{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_167]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_64]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.call @_FortranAMapException(%[[V_314]]) fastmath : (i32) -> i32 @@ -199,7 +198,7 @@ ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_178]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_180]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref> - ! CHECK: %[[V_313:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_313:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.load %[[V_313]] : !fir.ref ! CHECK: %[[V_315:[0-9]+]] = fir.convert %[[V_314]] : (i8) -> i32 ! CHECK: %[[V_316:[0-9]+]] = fir.call @_FortranAMapException(%[[V_315]]) fastmath : (i32) -> i32 @@ -216,7 +215,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c3{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_181]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_64]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.call @_FortranAMapException(%[[V_314]]) fastmath : (i32) -> i32 @@ -234,7 +233,7 @@ ! CHECK: %[[V_190:[0-9]+]] = fir.declare %[[V_189]](%[[V_1]]) {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro.5x_QM__fortran_builtinsT__builtin_ieee_flag_type.6"} : (!fir.ref>>, !fir.shape<1>) -> !fir.ref>> ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c5{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_190]](%[[V_1]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.convert %[[V_312]] : (i8) -> i32 ! CHECK: %[[V_314:[0-9]+]] = fir.call @_FortranAMapException(%[[V_313]]) fastmath : (i32) -> i32 @@ -250,7 +249,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c5{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_191]](%[[V_1]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_62]](%[[V_1]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.call @_FortranAMapException(%[[V_314]]) fastmath : (i32) -> i32 @@ -275,7 +274,7 @@ print*, 'support invalid: ', ieee_support_halting(ieee_invalid) ! CHECK: %[[V_222:[0-9]+]] = fir.declare %[[V_80]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_223:[0-9]+]] = fir.coordinate_of %[[V_222]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_223:[0-9]+]] = fir.coordinate_of %[[V_222]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_224:[0-9]+]] = fir.load %[[V_223]] : !fir.ref ! CHECK: %[[V_225:[0-9]+]] = fir.convert %[[V_224]] : (i8) -> i32 ! CHECK: %[[V_226:[0-9]+]] = fir.call @_FortranAMapException(%[[V_225]]) fastmath : (i32) -> i32 @@ -287,7 +286,7 @@ call ieee_set_halting_mode(ieee_invalid, .false.) ! CHECK: %[[V_227:[0-9]+]] = fir.declare %[[V_80]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_228:[0-9]+]] = fir.coordinate_of %[[V_227]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_228:[0-9]+]] = fir.coordinate_of %[[V_227]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_229:[0-9]+]] = fir.load %[[V_228]] : !fir.ref ! CHECK: %[[V_230:[0-9]+]] = fir.call @fegetexcept() fastmath : () -> i32 ! CHECK: %[[V_231:[0-9]+]] = fir.convert %[[V_229]] : (i8) -> i32 @@ -302,7 +301,7 @@ print*, 'invalid[F]: ', v ! CHECK: %[[V_244:[0-9]+]] = fir.declare %[[V_80]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_245:[0-9]+]] = fir.coordinate_of %[[V_244]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_245:[0-9]+]] = fir.coordinate_of %[[V_244]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_246:[0-9]+]] = fir.load %[[V_245]] : !fir.ref ! CHECK: %[[V_247:[0-9]+]] = fir.convert %[[V_246]] : (i8) -> i32 ! CHECK: %[[V_248:[0-9]+]] = fir.call @_FortranAMapException(%[[V_247]]) fastmath : (i32) -> i32 @@ -314,7 +313,7 @@ call ieee_set_halting_mode(ieee_invalid, .true.) ! CHECK: %[[V_249:[0-9]+]] = fir.declare %[[V_80]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_250:[0-9]+]] = fir.coordinate_of %[[V_249]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_250:[0-9]+]] = fir.coordinate_of %[[V_249]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_251:[0-9]+]] = fir.load %[[V_250]] : !fir.ref ! CHECK: %[[V_252:[0-9]+]] = fir.call @fegetexcept() fastmath : () -> i32 ! CHECK: %[[V_253:[0-9]+]] = fir.convert %[[V_251]] : (i8) -> i32 @@ -331,7 +330,7 @@ ! CHECK: %[[V_266:[0-9]+]] = fir.declare %[[V_140]](%[[V_59]]) {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro.2x_QM__fortran_builtinsT__builtin_ieee_flag_type.1"} : (!fir.ref>>, !fir.shape<1>) -> !fir.ref>> ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c2{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_266]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.convert %[[V_312]] : (i8) -> i32 ! CHECK: %[[V_314:[0-9]+]] = fir.call @_FortranAMapException(%[[V_313]]) fastmath : (i32) -> i32 @@ -347,7 +346,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c2{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_267]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_60]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.call @fegetexcept() fastmath : () -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 @@ -368,7 +367,7 @@ ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_274]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_275]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref> - ! CHECK: %[[V_313:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_313:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.load %[[V_313]] : !fir.ref ! CHECK: %[[V_315:[0-9]+]] = fir.convert %[[V_314]] : (i8) -> i32 ! CHECK: %[[V_316:[0-9]+]] = fir.call @_FortranAMapException(%[[V_315]]) fastmath : (i32) -> i32 @@ -385,7 +384,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c2{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_276]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_60]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.call @fegetexcept() fastmath : () -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 @@ -403,7 +402,7 @@ ! CHECK: %[[V_283:[0-9]+]] = fir.declare %[[V_165]](%[[V_54]]) {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro.3x_QM__fortran_builtinsT__builtin_ieee_flag_type.4"} : (!fir.ref>>, !fir.shape<1>) -> !fir.ref>> ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c3{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_283]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.convert %[[V_312]] : (i8) -> i32 ! CHECK: %[[V_314:[0-9]+]] = fir.call @_FortranAMapException(%[[V_313]]) fastmath : (i32) -> i32 @@ -419,7 +418,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c3{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_284]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_64]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.call @fegetexcept() fastmath : () -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 @@ -440,7 +439,7 @@ ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_291]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_292]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref> - ! CHECK: %[[V_313:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_313:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.load %[[V_313]] : !fir.ref ! CHECK: %[[V_315:[0-9]+]] = fir.convert %[[V_314]] : (i8) -> i32 ! CHECK: %[[V_316:[0-9]+]] = fir.call @_FortranAMapException(%[[V_315]]) fastmath : (i32) -> i32 @@ -457,7 +456,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c3{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_293]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_64]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.call @fegetexcept() fastmath : () -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 @@ -475,7 +474,7 @@ ! CHECK: %[[V_300:[0-9]+]] = fir.declare %[[V_189]](%[[V_1]]) {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro.5x_QM__fortran_builtinsT__builtin_ieee_flag_type.6"} : (!fir.ref>>, !fir.shape<1>) -> !fir.ref>> ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c5{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_300]](%[[V_1]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.convert %[[V_312]] : (i8) -> i32 ! CHECK: %[[V_314:[0-9]+]] = fir.call @_FortranAMapException(%[[V_313]]) fastmath : (i32) -> i32 @@ -491,7 +490,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c5{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_301]](%[[V_1]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_62]](%[[V_1]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.call @fegetexcept() fastmath : () -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 diff --git a/flang/test/Lower/Intrinsics/ieee_logb.f90 b/flang/test/Lower/Intrinsics/ieee_logb.f90 index bbc65e68e0b46..d9252e22a5f9f 100644 --- a/flang/test/Lower/Intrinsics/ieee_logb.f90 +++ b/flang/test/Lower/Intrinsics/ieee_logb.f90 @@ -15,8 +15,7 @@ subroutine out(x) ! CHECK: %[[V_65:[0-9]+]] = fir.address_of(@_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0) : !fir.ref> ! CHECK: %[[V_66:[0-9]+]] = fir.declare %[[V_65]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_67:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_flag_type.flag, !fir.type<_QM__fortran_builtinsT__builtin_ieee_flag_type{_QM__fortran_builtinsT__builtin_ieee_flag_type.flag:i8}> - ! CHECK: %[[V_68:[0-9]+]] = fir.coordinate_of %[[V_66]], %[[V_67]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_68:[0-9]+]] = fir.coordinate_of %[[V_66]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_69:[0-9]+]] = fir.load %[[V_68]] : !fir.ref ! CHECK: %[[V_70:[0-9]+]] = fir.convert %[[V_69]] : (i8) -> i32 ! CHECK: %[[V_71:[0-9]+]] = fir.call @_FortranAMapException(%[[V_70]]) fastmath : (i32) -> i32 @@ -53,7 +52,7 @@ subroutine out(x) r = ieee_logb(x) ! CHECK: %[[V_76:[0-9]+]] = fir.declare %[[V_65]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_77:[0-9]+]] = fir.coordinate_of %[[V_76]], %[[V_67]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_77:[0-9]+]] = fir.coordinate_of %[[V_76]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_78:[0-9]+]] = fir.load %[[V_77]] : !fir.ref ! CHECK: %[[V_79:[0-9]+]] = fir.convert %[[V_78]] : (i8) -> i32 ! CHECK: %[[V_80:[0-9]+]] = fir.call @_FortranAMapException(%[[V_79]]) fastmath : (i32) -> i32 diff --git a/flang/test/Lower/Intrinsics/ieee_max_min.f90 b/flang/test/Lower/Intrinsics/ieee_max_min.f90 index 69ae05b8f2f8c..581f3d6c7f52c 100644 --- a/flang/test/Lower/Intrinsics/ieee_max_min.f90 +++ b/flang/test/Lower/Intrinsics/ieee_max_min.f90 @@ -69,8 +69,7 @@ program p ! CHECK: %[[V_201:[0-9]+]] = fir.address_of(@_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10) : !fir.ref> ! CHECK: %[[V_202:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_203:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_flag_type.flag, !fir.type<_QM__fortran_builtinsT__builtin_ieee_flag_type{_QM__fortran_builtinsT__builtin_ieee_flag_type.flag:i8}> - ! CHECK: %[[V_204:[0-9]+]] = fir.coordinate_of %[[V_202]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_204:[0-9]+]] = fir.coordinate_of %[[V_202]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_205:[0-9]+]] = fir.load %[[V_204]] : !fir.ref ! CHECK: %[[V_206:[0-9]+]] = fir.convert %[[V_205]] : (i8) -> i32 ! CHECK: %[[V_207:[0-9]+]] = fir.call @_FortranAMapException(%[[V_206]]) fastmath : (i32) -> i32 @@ -113,7 +112,7 @@ program p ! CHECK: } ! CHECK: fir.store %[[V_211]] to %[[V_83]] : !fir.ref ! CHECK: %[[V_212:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_213:[0-9]+]] = fir.coordinate_of %[[V_212]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_213:[0-9]+]] = fir.coordinate_of %[[V_212]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_214:[0-9]+]] = fir.load %[[V_213]] : !fir.ref ! CHECK: %[[V_215:[0-9]+]] = fir.convert %[[V_214]] : (i8) -> i32 ! CHECK: %[[V_216:[0-9]+]] = fir.call @_FortranAMapException(%[[V_215]]) fastmath : (i32) -> i32 @@ -127,7 +126,7 @@ program p write(*, 4) 'max ', a, a, b, b, r, flag_value, trim(tag(r)) ! CHECK: %[[V_268:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_269:[0-9]+]] = fir.coordinate_of %[[V_268]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_269:[0-9]+]] = fir.coordinate_of %[[V_268]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_270:[0-9]+]] = fir.load %[[V_269]] : !fir.ref ! CHECK: %[[V_271:[0-9]+]] = fir.convert %[[V_270]] : (i8) -> i32 ! CHECK: %[[V_272:[0-9]+]] = fir.call @_FortranAMapException(%[[V_271]]) fastmath : (i32) -> i32 @@ -172,7 +171,7 @@ program p ! CHECK: } ! CHECK: fir.store %[[V_278]] to %[[V_83]] : !fir.ref ! CHECK: %[[V_279:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_280:[0-9]+]] = fir.coordinate_of %[[V_279]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_280:[0-9]+]] = fir.coordinate_of %[[V_279]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_281:[0-9]+]] = fir.load %[[V_280]] : !fir.ref ! CHECK: %[[V_282:[0-9]+]] = fir.convert %[[V_281]] : (i8) -> i32 ! CHECK: %[[V_283:[0-9]+]] = fir.call @_FortranAMapException(%[[V_282]]) fastmath : (i32) -> i32 @@ -186,7 +185,7 @@ program p write(*, 4) 'mag ', a, a, b, b, r, flag_value, trim(tag(r)) ! CHECK: %[[V_329:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_330:[0-9]+]] = fir.coordinate_of %[[V_329]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_330:[0-9]+]] = fir.coordinate_of %[[V_329]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_331:[0-9]+]] = fir.load %[[V_330]] : !fir.ref ! CHECK: %[[V_332:[0-9]+]] = fir.convert %[[V_331]] : (i8) -> i32 ! CHECK: %[[V_333:[0-9]+]] = fir.call @_FortranAMapException(%[[V_332]]) fastmath : (i32) -> i32 @@ -233,7 +232,7 @@ program p ! CHECK: } ! CHECK: fir.store %[[V_337]] to %[[V_83]] : !fir.ref ! CHECK: %[[V_338:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_339:[0-9]+]] = fir.coordinate_of %[[V_338]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_339:[0-9]+]] = fir.coordinate_of %[[V_338]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_340:[0-9]+]] = fir.load %[[V_339]] : !fir.ref ! CHECK: %[[V_341:[0-9]+]] = fir.convert %[[V_340]] : (i8) -> i32 ! CHECK: %[[V_342:[0-9]+]] = fir.call @_FortranAMapException(%[[V_341]]) fastmath : (i32) -> i32 @@ -247,7 +246,7 @@ program p write(*, 4) 'max_num', a, a, b, b, r, flag_value, trim(tag(r)) ! CHECK: %[[V_388:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_389:[0-9]+]] = fir.coordinate_of %[[V_388]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_389:[0-9]+]] = fir.coordinate_of %[[V_388]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_390:[0-9]+]] = fir.load %[[V_389]] : !fir.ref ! CHECK: %[[V_391:[0-9]+]] = fir.convert %[[V_390]] : (i8) -> i32 ! CHECK: %[[V_392:[0-9]+]] = fir.call @_FortranAMapException(%[[V_391]]) fastmath : (i32) -> i32 @@ -296,7 +295,7 @@ program p ! CHECK: } ! CHECK: fir.store %[[V_398]] to %[[V_83]] : !fir.ref ! CHECK: %[[V_399:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_400:[0-9]+]] = fir.coordinate_of %[[V_399]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_400:[0-9]+]] = fir.coordinate_of %[[V_399]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_401:[0-9]+]] = fir.load %[[V_400]] : !fir.ref ! CHECK: %[[V_402:[0-9]+]] = fir.convert %[[V_401]] : (i8) -> i32 ! CHECK: %[[V_403:[0-9]+]] = fir.call @_FortranAMapException(%[[V_402]]) fastmath : (i32) -> i32 @@ -310,7 +309,7 @@ program p write(*, 4) 'mag_num', a, a, b, b, r, flag_value, trim(tag(r)) ! CHECK: %[[V_449:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_450:[0-9]+]] = fir.coordinate_of %[[V_449]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_450:[0-9]+]] = fir.coordinate_of %[[V_449]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_451:[0-9]+]] = fir.load %[[V_450]] : !fir.ref ! CHECK: %[[V_452:[0-9]+]] = fir.convert %[[V_451]] : (i8) -> i32 ! CHECK: %[[V_453:[0-9]+]] = fir.call @_FortranAMapException(%[[V_452]]) fastmath : (i32) -> i32 @@ -353,7 +352,7 @@ program p ! CHECK: } ! CHECK: fir.store %[[V_457]] to %[[V_83]] : !fir.ref ! CHECK: %[[V_458:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_459:[0-9]+]] = fir.coordinate_of %[[V_458]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_459:[0-9]+]] = fir.coordinate_of %[[V_458]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_460:[0-9]+]] = fir.load %[[V_459]] : !fir.ref ! CHECK: %[[V_461:[0-9]+]] = fir.convert %[[V_460]] : (i8) -> i32 ! CHECK: %[[V_462:[0-9]+]] = fir.call @_FortranAMapException(%[[V_461]]) fastmath : (i32) -> i32 @@ -367,7 +366,7 @@ program p write(*, 4) 'min ', a, a, b, b, r, flag_value, trim(tag(r)) ! CHECK: %[[V_508:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_509:[0-9]+]] = fir.coordinate_of %[[V_508]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_509:[0-9]+]] = fir.coordinate_of %[[V_508]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_510:[0-9]+]] = fir.load %[[V_509]] : !fir.ref ! CHECK: %[[V_511:[0-9]+]] = fir.convert %[[V_510]] : (i8) -> i32 ! CHECK: %[[V_512:[0-9]+]] = fir.call @_FortranAMapException(%[[V_511]]) fastmath : (i32) -> i32 @@ -412,7 +411,7 @@ program p ! CHECK: } ! CHECK: fir.store %[[V_518]] to %[[V_83]] : !fir.ref ! CHECK: %[[V_519:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_520:[0-9]+]] = fir.coordinate_of %[[V_519]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_520:[0-9]+]] = fir.coordinate_of %[[V_519]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_521:[0-9]+]] = fir.load %[[V_520]] : !fir.ref ! CHECK: %[[V_522:[0-9]+]] = fir.convert %[[V_521]] : (i8) -> i32 ! CHECK: %[[V_523:[0-9]+]] = fir.call @_FortranAMapException(%[[V_522]]) fastmath : (i32) -> i32 @@ -426,7 +425,7 @@ program p write(*, 4) 'mig ', a, a, b, b, r, flag_value, trim(tag(r)) ! CHECK: %[[V_569:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_570:[0-9]+]] = fir.coordinate_of %[[V_569]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_570:[0-9]+]] = fir.coordinate_of %[[V_569]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_571:[0-9]+]] = fir.load %[[V_570]] : !fir.ref ! CHECK: %[[V_572:[0-9]+]] = fir.convert %[[V_571]] : (i8) -> i32 ! CHECK: %[[V_573:[0-9]+]] = fir.call @_FortranAMapException(%[[V_572]]) fastmath : (i32) -> i32 @@ -473,7 +472,7 @@ program p ! CHECK: } ! CHECK: fir.store %[[V_577]] to %[[V_83]] : !fir.ref ! CHECK: %[[V_578:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_579:[0-9]+]] = fir.coordinate_of %[[V_578]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_579:[0-9]+]] = fir.coordinate_of %[[V_578]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_580:[0-9]+]] = fir.load %[[V_579]] : !fir.ref ! CHECK: %[[V_581:[0-9]+]] = fir.convert %[[V_580]] : (i8) -> i32 ! CHECK: %[[V_582:[0-9]+]] = fir.call @_FortranAMapException(%[[V_581]]) fastmath : (i32) -> i32 @@ -487,7 +486,7 @@ program p write(*, 4) 'min_num', a, a, b, b, r, flag_value, trim(tag(r)) ! CHECK: %[[V_628:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_629:[0-9]+]] = fir.coordinate_of %[[V_628]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_629:[0-9]+]] = fir.coordinate_of %[[V_628]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_630:[0-9]+]] = fir.load %[[V_629]] : !fir.ref ! CHECK: %[[V_631:[0-9]+]] = fir.convert %[[V_630]] : (i8) -> i32 ! CHECK: %[[V_632:[0-9]+]] = fir.call @_FortranAMapException(%[[V_631]]) fastmath : (i32) -> i32 @@ -536,7 +535,7 @@ program p ! CHECK: } ! CHECK: fir.store %[[V_638]] to %[[V_83]] : !fir.ref ! CHECK: %[[V_639:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_640:[0-9]+]] = fir.coordinate_of %[[V_639]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_640:[0-9]+]] = fir.coordinate_of %[[V_639]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_641:[0-9]+]] = fir.load %[[V_640]] : !fir.ref ! CHECK: %[[V_642:[0-9]+]] = fir.convert %[[V_641]] : (i8) -> i32 ! CHECK: %[[V_643:[0-9]+]] = fir.call @_FortranAMapException(%[[V_642]]) fastmath : (i32) -> i32 diff --git a/flang/test/Lower/Intrinsics/ieee_operator_eq.f90 b/flang/test/Lower/Intrinsics/ieee_operator_eq.f90 index d2067602babb3..8f77460a010fd 100644 --- a/flang/test/Lower/Intrinsics/ieee_operator_eq.f90 +++ b/flang/test/Lower/Intrinsics/ieee_operator_eq.f90 @@ -4,10 +4,8 @@ subroutine s(r1,r2) use ieee_arithmetic, only: ieee_round_type, operator(==) type(ieee_round_type) :: r1, r2 - ! CHECK: %[[V_3:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_4:[0-9]+]] = fir.coordinate_of %arg0, %[[V_3]] : (!fir.ref>, !fir.field) -> !fir.ref - ! CHECK: %[[V_5:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_6:[0-9]+]] = fir.coordinate_of %arg1, %[[V_5]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_4:[0-9]+]] = fir.coordinate_of %arg0, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref + ! CHECK: %[[V_6:[0-9]+]] = fir.coordinate_of %arg1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_7:[0-9]+]] = fir.load %[[V_4]] : !fir.ref ! CHECK: %[[V_8:[0-9]+]] = fir.load %[[V_6]] : !fir.ref ! CHECK: %[[V_9:[0-9]+]] = arith.cmpi eq, %[[V_7]], %[[V_8]] : i8 @@ -30,20 +28,16 @@ subroutine s(r1,r2) ! CHECK: %[[V_1:[0-9]+]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> ! CHECK: %[[V_2:[0-9]+]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> ! CHECK: %[[V_3:[0-9]+]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_9:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_10:[0-9]+]] = fir.coordinate_of %[[V_3]], %[[V_9]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_10:[0-9]+]] = fir.coordinate_of %[[V_3]], _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %c0{{.*}} to %[[V_10]] : !fir.ref - ! CHECK: %[[V_16:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_17:[0-9]+]] = fir.coordinate_of %[[V_2]], %[[V_16]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_17:[0-9]+]] = fir.coordinate_of %[[V_2]], _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %c1{{.*}} to %[[V_17]] : !fir.ref ! CHECK: fir.call @_QPs(%[[V_3]], %[[V_2]]) {{.*}} : (!fir.ref>, !fir.ref>) -> () call s(ieee_to_zero, ieee_nearest) - ! CHECK: %[[V_23:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_24:[0-9]+]] = fir.coordinate_of %[[V_1]], %[[V_23]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_24:[0-9]+]] = fir.coordinate_of %[[V_1]], _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %c1{{.*}} to %[[V_24]] : !fir.ref - ! CHECK: %[[V_30:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_31:[0-9]+]] = fir.coordinate_of %[[V_0]], %[[V_30]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_31:[0-9]+]] = fir.coordinate_of %[[V_0]], _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %c1{{.*}} to %[[V_31]] : !fir.ref ! CHECK: fir.call @_QPs(%[[V_1]], %[[V_0]]) {{.*}} : (!fir.ref>, !fir.ref>) -> () call s(ieee_nearest, ieee_nearest) diff --git a/flang/test/Lower/Intrinsics/ieee_rint_int.f90 b/flang/test/Lower/Intrinsics/ieee_rint_int.f90 index 86a4aff5005bc..be3a3b92a4584 100644 --- a/flang/test/Lower/Intrinsics/ieee_rint_int.f90 +++ b/flang/test/Lower/Intrinsics/ieee_rint_int.f90 @@ -43,8 +43,7 @@ program p ! CHECK: %[[V_35:[0-9]+]]:2 = hlfir.declare %[[V_34]] ! CHECK: %[[V_36:[0-9]+]] = fir.load %[[V_19]]#0 : !fir.ref ! CHECK: %[[V_37:[0-9]+]] = fir.call @llvm.get.rounding() fastmath : () -> i32 - ! CHECK: %[[V_38:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_39:[0-9]+]] = fir.coordinate_of %[[V_35]]#1, %[[V_38]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_39:[0-9]+]] = fir.coordinate_of %[[V_35]]#1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_40:[0-9]+]] = fir.load %[[V_39]] : !fir.ref ! CHECK: %[[V_41:[0-9]+]] = arith.shli %c-1{{.*}}, %c2{{.*}} : i8 ! CHECK: %[[V_42:[0-9]+]] = arith.andi %[[V_40]], %[[V_41]] : i8 @@ -62,8 +61,7 @@ program p ! CHECK: %[[V_49:[0-9]+]]:2 = hlfir.declare %[[V_48]] ! CHECK: %[[V_50:[0-9]+]] = fir.load %[[V_19]]#0 : !fir.ref ! CHECK: %[[V_51:[0-9]+]] = fir.call @llvm.get.rounding() fastmath : () -> i32 - ! CHECK: %[[V_52:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_53:[0-9]+]] = fir.coordinate_of %[[V_49]]#1, %[[V_52]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_53:[0-9]+]] = fir.coordinate_of %[[V_49]]#1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_54:[0-9]+]] = fir.load %[[V_53]] : !fir.ref ! CHECK: %[[V_55:[0-9]+]] = arith.shli %c-1{{.*}}, %c2{{.*}} : i8 ! CHECK: %[[V_56:[0-9]+]] = arith.andi %[[V_54]], %[[V_55]] : i8 @@ -104,8 +102,7 @@ program p ! CHECK: %[[V_69:[0-9]+]]:2 = hlfir.declare %[[V_68]] ! CHECK: %[[V_70:[0-9]+]] = fir.load %[[V_21]]#0 : !fir.ref ! CHECK: %[[V_71:[0-9]+]] = fir.call @llvm.get.rounding() fastmath : () -> i32 - ! CHECK: %[[V_72:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_73:[0-9]+]] = fir.coordinate_of %[[V_69]]#1, %[[V_72]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_73:[0-9]+]] = fir.coordinate_of %[[V_69]]#1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_74:[0-9]+]] = fir.load %[[V_73]] : !fir.ref ! CHECK: %[[V_75:[0-9]+]] = arith.shli %c-1{{.*}}, %c2{{.*}} : i8 ! CHECK: %[[V_76:[0-9]+]] = arith.andi %[[V_74]], %[[V_75]] : i8 @@ -124,8 +121,7 @@ program p ! CHECK: %[[V_84:[0-9]+]]:2 = hlfir.declare %[[V_83]] ! CHECK: %[[V_85:[0-9]+]] = fir.load %[[V_21]]#0 : !fir.ref ! CHECK: %[[V_86:[0-9]+]] = fir.call @llvm.get.rounding() fastmath : () -> i32 - ! CHECK: %[[V_87:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_88:[0-9]+]] = fir.coordinate_of %[[V_84]]#1, %[[V_87]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_88:[0-9]+]] = fir.coordinate_of %[[V_84]]#1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_89:[0-9]+]] = fir.load %[[V_88]] : !fir.ref ! CHECK: %[[V_90:[0-9]+]] = arith.shli %c-1{{.*}}, %c2{{.*}} : i8 ! CHECK: %[[V_91:[0-9]+]] = arith.andi %[[V_89]], %[[V_90]] : i8 @@ -180,8 +176,7 @@ program p ! CHECK: %[[V_110:[0-9]+]]:2 = hlfir.declare %[[V_109]] ! CHECK: %[[V_111:[0-9]+]] = fir.load %[[V_23]]#0 : !fir.ref ! CHECK: %[[V_112:[0-9]+]] = fir.call @llvm.get.rounding() fastmath : () -> i32 - ! CHECK: %[[V_113:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_114:[0-9]+]] = fir.coordinate_of %[[V_110]]#1, %[[V_113]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_114:[0-9]+]] = fir.coordinate_of %[[V_110]]#1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_115:[0-9]+]] = fir.load %[[V_114]] : !fir.ref ! CHECK: %[[V_116:[0-9]+]] = arith.shli %c-1{{.*}}, %c2{{.*}} : i8 ! CHECK: %[[V_117:[0-9]+]] = arith.andi %[[V_115]], %[[V_116]] : i8 @@ -219,8 +214,7 @@ program p ! CHECK: %[[V_130:[0-9]+]] = fir.address_of(@_QQro._QMieee_arithmeticTieee_class_type.3) : !fir.ref> ! CHECK: %[[V_131:[0-9]+]]:2 = hlfir.declare %[[V_130]] - ! CHECK: %[[V_132:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> - ! CHECK: %[[V_133:[0-9]+]] = fir.coordinate_of %[[V_131]]#1, %[[V_132]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_133:[0-9]+]] = fir.coordinate_of %[[V_131]]#1, _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_134:[0-9]+]] = fir.load %[[V_133]] : !fir.ref ! CHECK: %[[V_135:[0-9]+]] = fir.address_of(@_FortranAIeeeValueTable_4) : !fir.ref> ! CHECK: %[[V_136:[0-9]+]] = fir.coordinate_of %[[V_135]], %[[V_134]] : (!fir.ref>, i8) -> !fir.ref @@ -244,8 +238,7 @@ program p ! CHECK: %[[V_144:[0-9]+]]:2 = hlfir.declare %[[V_143]] ! CHECK: %[[V_145:[0-9]+]] = fir.load %[[V_25]]#0 : !fir.ref ! CHECK: %[[V_146:[0-9]+]] = fir.call @llvm.get.rounding() fastmath : () -> i32 - ! CHECK: %[[V_147:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_148:[0-9]+]] = fir.coordinate_of %[[V_144]]#1, %[[V_147]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_148:[0-9]+]] = fir.coordinate_of %[[V_144]]#1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_149:[0-9]+]] = fir.load %[[V_148]] : !fir.ref ! CHECK: %[[V_150:[0-9]+]] = arith.shli %c-1{{.*}}, %c2{{.*}} : i8 ! CHECK: %[[V_151:[0-9]+]] = arith.andi %[[V_149]], %[[V_150]] : i8 diff --git a/flang/test/Lower/Intrinsics/ieee_rounding.f90 b/flang/test/Lower/Intrinsics/ieee_rounding.f90 index 211e8c8178787..a0c73a3ff8bcd 100644 --- a/flang/test/Lower/Intrinsics/ieee_rounding.f90 +++ b/flang/test/Lower/Intrinsics/ieee_rounding.f90 @@ -9,8 +9,7 @@ program r ! CHECK: fir.if %true{{[_0-9]*}} { if (ieee_support_rounding(ieee_down)) then - ! CHECK: %[[V_62:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_63:[0-9]+]] = fir.coordinate_of %[[V_57]]#1, %[[V_62]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_63:[0-9]+]] = fir.coordinate_of %[[V_57]]#1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_64:[0-9]+]] = fir.call @llvm.get.rounding() fastmath : () -> i32 ! CHECK: %[[V_65:[0-9]+]] = fir.convert %[[V_64]] : (i32) -> i8 ! CHECK: fir.store %[[V_65]] to %[[V_63]] : !fir.ref @@ -18,8 +17,7 @@ program r ! CHECK: %[[V_66:[0-9]+]] = fir.address_of(@_QQro._QM__fortran_builtinsT__builtin_ieee_round_type.0) : !fir.ref> ! CHECK: %[[V_67:[0-9]+]]:2 = hlfir.declare %[[V_66]] - ! CHECK: %[[V_68:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_69:[0-9]+]] = fir.coordinate_of %[[V_67]]#1, %[[V_68]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_69:[0-9]+]] = fir.coordinate_of %[[V_67]]#1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_70:[0-9]+]] = fir.load %[[V_69]] : !fir.ref ! CHECK: %[[V_71:[0-9]+]] = arith.shli %c-1{{.*}}, %c2{{.*}} : i8 ! CHECK: %[[V_72:[0-9]+]] = arith.andi %[[V_70]], %[[V_71]] : i8 @@ -30,8 +28,7 @@ program r call ieee_set_rounding_mode(ieee_down) print*, 'ok' - ! CHECK: %[[V_85:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_86:[0-9]+]] = fir.coordinate_of %[[V_57]]#1, %[[V_85]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_86:[0-9]+]] = fir.coordinate_of %[[V_57]]#1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_87:[0-9]+]] = fir.load %[[V_86]] : !fir.ref ! CHECK: %[[V_88:[0-9]+]] = arith.shli %c-1{{.*}}, %c2{{.*}} : i8 ! CHECK: %[[V_89:[0-9]+]] = arith.andi %[[V_87]], %[[V_88]] : i8 diff --git a/flang/test/Lower/Intrinsics/ieee_unordered.f90 b/flang/test/Lower/Intrinsics/ieee_unordered.f90 index b7e81d53a2d75..18bb2b0009ed9 100644 --- a/flang/test/Lower/Intrinsics/ieee_unordered.f90 +++ b/flang/test/Lower/Intrinsics/ieee_unordered.f90 @@ -12,12 +12,10 @@ x = -17.0 -! CHECK: %[[V_10:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> -! CHECK: %[[V_11:[0-9]+]] = fir.coordinate_of %[[V_1]], %[[V_10]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[V_11:[0-9]+]] = fir.coordinate_of %[[V_1]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %c3{{.*}} to %[[V_11]] : !fir.ref -! CHECK: %[[V_12:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> -! CHECK: %[[V_13:[0-9]+]] = fir.coordinate_of %[[V_1]], %[[V_12]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[V_13:[0-9]+]] = fir.coordinate_of %[[V_1]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_14:[0-9]+]] = fir.load %[[V_13]] : !fir.ref ! CHECK: %[[V_15:[0-9]+]] = fir.address_of(@_FortranAIeeeValueTable_16) : !fir.ref> ! CHECK: %[[V_16:[0-9]+]] = fir.coordinate_of %[[V_15]], %[[V_14]] : (!fir.ref>, i8) -> !fir.ref @@ -28,11 +26,9 @@ ! CHECK: fir.store %[[V_20]] to %[[V_3]] : !fir.ref y = ieee_value(y, ieee_negative_inf) -! CHECK: %[[V_26:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> -! CHECK: %[[V_27:[0-9]+]] = fir.coordinate_of %[[V_0]], %[[V_26]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[V_27:[0-9]+]] = fir.coordinate_of %[[V_0]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %c2{{.*}} to %[[V_27]] : !fir.ref -! CHECK: %[[V_28:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> -! CHECK: %[[V_29:[0-9]+]] = fir.coordinate_of %[[V_0]], %[[V_28]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[V_29:[0-9]+]] = fir.coordinate_of %[[V_0]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_30:[0-9]+]] = fir.load %[[V_29]] : !fir.ref ! CHECK: %[[V_31:[0-9]+]] = fir.address_of(@_FortranAIeeeValueTable_16) : !fir.ref> ! CHECK: %[[V_32:[0-9]+]] = fir.coordinate_of %[[V_31]], %[[V_30]] : (!fir.ref>, i8) -> !fir.ref diff --git a/flang/test/Lower/Intrinsics/storage_size.f90 b/flang/test/Lower/Intrinsics/storage_size.f90 index 3dc135bbf6fbc..d17602b4d4089 100644 --- a/flang/test/Lower/Intrinsics/storage_size.f90 +++ b/flang/test/Lower/Intrinsics/storage_size.f90 @@ -117,8 +117,7 @@ integer function polymorphic_value(t) result(size) ! CHECK-LABEL: func.func @_QMstorage_size_testPpolymorphic_value( ! CHECK-SAME: %[[T:.*]]: !fir.ref>>>}>> {fir.bindc_name = "t"}) -> i32 { ! CHECK: %[[ALLOCA:.*]] = fir.alloca i32 {bindc_name = "size", uniq_name = "_QMstorage_size_testFpolymorphic_valueEsize"} -! CHECK: %[[FIELD_P:.*]] = fir.field_index p, !fir.type<_QMstorage_size_testTp3{p:!fir.class>>>}> -! CHECK: %[[COORD_P:.*]] = fir.coordinate_of %[[T]], %[[FIELD_P]] : (!fir.ref>>>}>>, !fir.field) -> !fir.ref>>>> +! CHECK: %[[COORD_P:.*]] = fir.coordinate_of %[[T]], p : (!fir.ref>>>}>>) -> !fir.ref>>>> ! CHECK: %[[LOAD_COORD_P:.*]] = fir.load %[[COORD_P]] : !fir.ref>>>> ! CHECK: %[[C0:.*]] = arith.constant 0 : index ! CHECK: %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[LOAD_COORD_P]], %[[C0]] : (!fir.class>>>, index) -> (index, index, index) diff --git a/flang/test/Lower/Intrinsics/transfer.f90 b/flang/test/Lower/Intrinsics/transfer.f90 index b75fe2e826561..2cc7e93f86f51 100644 --- a/flang/test/Lower/Intrinsics/transfer.f90 +++ b/flang/test/Lower/Intrinsics/transfer.f90 @@ -106,8 +106,7 @@ integer function trans_test3(p) ! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_19]] : (!fir.ref>) -> !fir.ref ! CHECK: fir.call @_FortranAAssign(%[[VAL_21]], %[[VAL_22]], %[[VAL_23]], %[[VAL_20]]) {{.*}}: (!fir.ref>, !fir.box, !fir.ref, i32) -> () ! CHECK: fir.freemem %[[VAL_17]] - ! CHECK: %[[VAL_25:.*]] = fir.field_index x, !fir.type<_QFtrans_test3Tobj{x:i32}> - ! CHECK: %[[VAL_26:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_25]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[VAL_26:.*]] = fir.coordinate_of %[[VAL_3]], x : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref ! CHECK: fir.store %[[VAL_27]] to %[[VAL_4]] : !fir.ref ! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_4]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-multiple-variables.f90 b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-multiple-variables.f90 index 5d31de10d74f8..0b0d0e7ae3735 100644 --- a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-multiple-variables.f90 +++ b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-multiple-variables.f90 @@ -148,9 +148,9 @@ end subroutine target_allocatable ! CHECK-SAME: map_entries( ! CHECK-SAME: %[[MAPPED_MI0]] -> %[[MAPPED_ARG0:[^,]+]], ! CHECK-SAME: %[[ALLOC_VAR_MAP]] -> %[[MAPPED_ARG1:[^,]+]] -! CHECK-SAME %[[REAL_ARR_DESC_MAP]] -> %[[MAPPED_ARG2:[^,]+]] -! CHECK_SAME %[[CHAR_VAR_DESC_MAP]] -> %[[MAPPED_ARG3:.[^,]+]] : -! CHECK-SAME !fir.ref, !fir.ref>>, !fir.ref>>, !fir.ref>) +! CHECK-SAME: %[[REAL_ARR_DESC_MAP]] -> %[[MAPPED_ARG2:[^,]+]] +! CHECK-SAME: %[[CHAR_VAR_DESC_MAP]] -> %[[MAPPED_ARG3:.[^,]+]] : +! CHECK-SAME: !fir.ref, !fir.ref>>, !fir.ref>>, !fir.ref>) ! CHECK-SAME: private( ! CHECK-SAME: @[[ALLOC_PRIVATIZER_SYM]] %{{[^[:space:]]+}}#0 -> %[[ALLOC_ARG:[^,]+]] [map_idx=1], ! CHECK-SAME: @[[REAL_PRIVATIZER_SYM]] %{{[^[:space:]]+}}#0 -> %[[REAL_ARG:[^,]+]], diff --git a/flang/test/Lower/OpenMP/copyprivate2.f90 b/flang/test/Lower/OpenMP/copyprivate2.f90 index 3412ba2c63c4d..993a81d199f56 100644 --- a/flang/test/Lower/OpenMP/copyprivate2.f90 +++ b/flang/test/Lower/OpenMP/copyprivate2.f90 @@ -43,7 +43,7 @@ !CHECK: omp.single copyprivate( !CHECK-SAME: %[[A]]#0 -> @_copy_box_heap_Uxi32 : !fir.ref>>>, !CHECK-SAME: %[[P]]#0 -> @_copy_box_ptr_i32 : !fir.ref>>) -!CHEK: } +!CHECK: } subroutine test_alloc_ptr() integer, allocatable :: a(:) integer, pointer :: p diff --git a/flang/test/Lower/OpenMP/declare-mapper.f90 b/flang/test/Lower/OpenMP/declare-mapper.f90 index fa7f23c182a68..e12becbc5d9a9 100644 --- a/flang/test/Lower/OpenMP/declare-mapper.f90 +++ b/flang/test/Lower/OpenMP/declare-mapper.f90 @@ -39,8 +39,7 @@ subroutine declare_mapper_1 !CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i64) -> index !CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_14]], %[[VAL_6]]#0 : index !CHECK: %[[VAL_16:.*]] = omp.map.bounds lower_bound(%[[VAL_10]] : index) upper_bound(%[[VAL_15]] : index) extent(%[[VAL_6]]#1 : index) stride(%[[VAL_8]] : index) start_idx(%[[VAL_6]]#0 : index) - !CHECK: %[[VAL_17:.*]] = arith.constant 1 : index - !CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_1]]#0, %[[VAL_17]] : (!fir.ref<[[MY_TYPE]]>, index) -> !fir.ref>>> + !CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_1]]#0, values : (!fir.ref<[[MY_TYPE]]>) -> !fir.ref>>> !CHECK: %[[VAL_19:.*]] = fir.box_offset %[[VAL_18]] base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> !CHECK: %[[VAL_20:.*]] = omp.map.info var_ptr(%[[VAL_18]] : !fir.ref>>>, i32) var_ptr_ptr(%[[VAL_19]] : !fir.llvm_ptr>>) map_clauses(tofrom) capture(ByRef) bounds(%[[VAL_16]]) -> !fir.llvm_ptr>> {name = ""} !CHECK: %[[VAL_21:.*]] = omp.map.info var_ptr(%[[VAL_18]] : !fir.ref>>>, !fir.box>>) map_clauses(to) capture(ByRef) -> !fir.ref>>> {name = "var%[[VAL_22:.*]](1:var%[[VAL_23:.*]])"} @@ -132,8 +131,7 @@ subroutine declare_mapper_3 !CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i64) -> index !CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_14]], %[[VAL_6]]#0 : index !CHECK: %[[VAL_16:.*]] = omp.map.bounds lower_bound(%[[VAL_10]] : index) upper_bound(%[[VAL_15]] : index) extent(%[[VAL_6]]#1 : index) stride(%[[VAL_8]] : index) start_idx(%[[VAL_6]]#0 : index) - !CHECK: %[[VAL_17:.*]] = arith.constant 1 : index - !CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_1]]#0, %[[VAL_17]] : (!fir.ref<[[MY_TYPE]]>, index) -> !fir.ref>>> + !CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_1]]#0, values : (!fir.ref<[[MY_TYPE]]>) -> !fir.ref>>> !CHECK: %[[VAL_19:.*]] = fir.box_offset %[[VAL_18]] base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> !CHECK: %[[VAL_20:.*]] = omp.map.info var_ptr(%[[VAL_18]] : !fir.ref>>>, i32) var_ptr_ptr(%[[VAL_19]] : !fir.llvm_ptr>>) map_clauses(tofrom) capture(ByRef) bounds(%[[VAL_16]]) -> !fir.llvm_ptr>> {name = ""} !CHECK: %[[VAL_21:.*]] = omp.map.info var_ptr(%[[VAL_18]] : !fir.ref>>>, !fir.box>>) map_clauses(to) capture(ByRef) -> !fir.ref>>> {name = "var%[[VAL_22:.*]](1:var%[[VAL_23:.*]])"} diff --git a/flang/test/Lower/OpenMP/derived-type-allocatable-map.f90 b/flang/test/Lower/OpenMP/derived-type-allocatable-map.f90 index 28a2b9b5b967b..768d782848b53 100644 --- a/flang/test/Lower/OpenMP/derived-type-allocatable-map.f90 +++ b/flang/test/Lower/OpenMP/derived-type-allocatable-map.f90 @@ -3,8 +3,7 @@ !CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.type<[[ONE_LAYER_TY:_QFdtype_alloca_map_op_blockTone_layer{i:f32,scalar:!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}]]> {{.*}} !CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]] {{{.*}}} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) !CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true} -!CHECK: %[[MEMBER_INDEX:.*]] = arith.constant 4 : index -!CHECK: %[[MEMBER_COORD:.*]] = fir.coordinate_of %[[DECLARE]]#0, %[[MEMBER_INDEX]] : (!fir.ref>, index) -> !fir.ref>>> +!CHECK: %[[MEMBER_COORD:.*]] = fir.coordinate_of %[[DECLARE]]#0, array_j : (!fir.ref>) -> !fir.ref>>> !CHECK: %[[MEMBER_BASE_ADDR:.*]] = fir.box_offset %[[MEMBER_COORD]] base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> !CHECK: %[[MAP_MEMBER_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[MEMBER_COORD]] : !fir.ref>>>, i32) var_ptr_ptr(%[[MEMBER_BASE_ADDR]] : !fir.llvm_ptr>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr>> {{.*}} !CHECK: %[[MAP_MEMBER_DESCRIPTOR:.*]] = omp.map.info var_ptr(%[[MEMBER_COORD]] : !fir.ref>>>, !fir.box>>) map_clauses(to) capture(ByRef) -> !fir.ref>>> {{.*}} @@ -34,14 +33,12 @@ subroutine dtype_alloca_map_op_block() !CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]] {{{.*}}} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) !CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true} !CHECK: %[[LOAD_DTYPE:.*]] = fir.load %[[DECLARE]]#0 : !fir.ref>>> -!CHECK: %[[MEMBER_INDEX:.*]] = arith.constant 4 : index -!CHECK: %[[MEMBER_COORD:.*]] = fir.coordinate_of %[[LOAD_DTYPE]], %[[MEMBER_INDEX]] : (!fir.box>>, index) -> !fir.ref>>> +!CHECK: %[[MEMBER_COORD:.*]] = fir.coordinate_of %[[LOAD_DTYPE]], array_j : (!fir.box>>) -> !fir.ref>>> !CHECK: %[[MEMBER_BASE_ADDR:.*]] = fir.box_offset %[[MEMBER_COORD]] base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> !CHECK: %[[MAP_MEMBER_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[MEMBER_COORD]] : !fir.ref>>>, i32) var_ptr_ptr(%[[MEMBER_BASE_ADDR]] : !fir.llvm_ptr>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr>> {{.*}} !CHECK: %[[MAP_MEMBER_DESC:.*]] = omp.map.info var_ptr(%[[MEMBER_COORD]] : !fir.ref>>>, !fir.box>>) map_clauses(to) capture(ByRef) -> !fir.ref>>> {{.*}} !CHECK: %[[LOAD_DTYPE:.*]] = fir.load %[[DECLARE]]#0 : !fir.ref>>> -!CHECK: %[[MEMBER_COORD:.*]] = arith.constant 5 : index -!CHECK: %[[REGULAR_MEMBER:.*]] = fir.coordinate_of %[[LOAD_DTYPE]], %[[MEMBER_COORD]] : (!fir.box>>, index) -> !fir.ref +!CHECK: %[[REGULAR_MEMBER:.*]] = fir.coordinate_of %[[LOAD_DTYPE]], k : (!fir.box>>) -> !fir.ref !CHECK: %[[MAP_REGULAR_MEMBER:.*]] = omp.map.info var_ptr(%[[REGULAR_MEMBER]] : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {{.*}} !CHECK: %[[DTYPE_BASE_ADDR:.*]] = fir.box_offset %[[DECLARE]]#1 base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> !CHECK: %[[MAP_DTYPE_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref>>>, !fir.type<[[REC_TY]]>) var_ptr_ptr(%[[DTYPE_BASE_ADDR]] : !fir.llvm_ptr>>) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr>> {{.*}} @@ -73,18 +70,14 @@ subroutine alloca_dtype_op_block_add() !CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]] {{.*}} : (!fir.ref}>>>>) -> (!fir.ref}>>>>, !fir.ref}>>>>) !CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true} !CHECK: %[[LOAD:.*]] = fir.load %[[DECLARE]]#0 : !fir.ref}>>>> -!CHECK: %[[NESTED_DTYPE_INDEX:.*]] = arith.constant 6 : index -!CHECK: %[[NESTED_DTYPE_COORD:.*]] = fir.coordinate_of %[[LOAD]], %[[NESTED_DTYPE_INDEX]] : (!fir.box}>>>, index) -> !fir.ref,array_k:!fir.box>>,k:i32}]]>> -!CHECK: %[[NESTED_MEMBER_INDEX:.*]] = arith.constant 2 : index -!CHECK: %[[NESTED_MEMBER_COORD:.*]] = fir.coordinate_of %[[NESTED_DTYPE_COORD]], %[[NESTED_MEMBER_INDEX]] : (!fir.ref>, index) -> !fir.ref>>> +!CHECK: %[[NESTED_DTYPE_COORD:.*]] = fir.coordinate_of %[[LOAD]], nest : (!fir.box}>>>) -> !fir.ref,array_k:!fir.box>>,k:i32}]]>> +!CHECK: %[[NESTED_MEMBER_COORD:.*]] = fir.coordinate_of %[[NESTED_DTYPE_COORD]], array_k : (!fir.ref>) -> !fir.ref>>> !CHECK: %[[NESTED_MEMBER_BASE_ADDR:.*]] = fir.box_offset %[[NESTED_MEMBER_COORD]] base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> !CHECK: %[[MAP_NESTED_MEMBER_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[NESTED_MEMBER_COORD]] : !fir.ref>>>, i32) var_ptr_ptr(%[[NESTED_MEMBER_BASE_ADDR]] : !fir.llvm_ptr>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr>> {{.*}} !CHECK: %[[MAP_NESTED_MEMBER_COORD:.*]] = omp.map.info var_ptr(%[[NESTED_MEMBER_COORD]] : !fir.ref>>>, !fir.box>>) map_clauses(to) capture(ByRef) -> !fir.ref>>> {{.*}} !CHECK: %[[LOAD:.*]] = fir.load %[[DECLARE]]#0 : !fir.ref}>>>> -!CHECK: %[[NESTED_DTYPE_INDEX:.*]] = arith.constant 6 : index -!CHECK: %[[NESTED_DTYPE_COORD:.*]] = fir.coordinate_of %[[LOAD]], %[[NESTED_DTYPE_INDEX]] : (!fir.box}>>>, index) -> !fir.ref> -!CHECK: %[[NESTED_MEMBER_INDEX:.*]] = arith.constant 3 : index -!CHECK: %[[REGULAR_NESTED_MEMBER_COORD:.*]] = fir.coordinate_of %[[NESTED_DTYPE_COORD]], %[[NESTED_MEMBER_INDEX]] : (!fir.ref>, index) -> !fir.ref +!CHECK: %[[NESTED_DTYPE_COORD:.*]] = fir.coordinate_of %[[LOAD]], nest : (!fir.box}>>>) -> !fir.ref> +!CHECK: %[[REGULAR_NESTED_MEMBER_COORD:.*]] = fir.coordinate_of %[[NESTED_DTYPE_COORD]], k : (!fir.ref>) -> !fir.ref !CHECK: %[[MAP_REGULAR_NESTED_MEMBER:.*]] = omp.map.info var_ptr(%[[REGULAR_NESTED_MEMBER_COORD]] : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {{.*}} !CHECK: %[[DTYPE_BASE_ADDR:.*]] = fir.box_offset %[[DECLARE]]#1 base_addr : (!fir.ref}>>>>) -> !fir.llvm_ptr}>>> !CHECK: %[[MAP_DTYPE_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref}>>>>, !fir.type<[[REC_TY]]>}>) var_ptr_ptr(%[[DTYPE_BASE_ADDR]] : !fir.llvm_ptr}>>>) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr}>>> {{.*}} @@ -123,10 +116,8 @@ subroutine alloca_nest_dype_map_op_block_add() !CHECK: %[[ALLOCA]] = fir.alloca !fir.type<[[REC_TY:_QFnest_dtype_alloca_map_op_block_addTtop_layer{i:f32,scalar:!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFnest_dtype_alloca_map_op_block_addTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}]]> {{.*}} !CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA:.*]] {{.*}} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) !CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true} -!CHECK: %[[NESTED_DTYPE_INDEX:.*]] = arith.constant 6 : index -!CHECK: %[[NESTED_DTYPE_COORD:.*]] = fir.coordinate_of %[[DECLARE]]#0, %[[NESTED_DTYPE_INDEX]] : (!fir.ref>, index) -> !fir.ref,array_k:!fir.box>>,k:i32}]]>> -!CHECK: %[[NESTED_MEMBER_INDEX:.*]] = arith.constant 2 : index -!CHECK: %[[NESTED_MEMBER_COORD:.*]] = fir.coordinate_of %[[NESTED_DTYPE_COORD]], %[[NESTED_MEMBER_INDEX]] : (!fir.ref>, index) -> !fir.ref>>> +!CHECK: %[[NESTED_DTYPE_COORD:.*]] = fir.coordinate_of %[[DECLARE]]#0, nest : (!fir.ref>) -> !fir.ref,array_k:!fir.box>>,k:i32}]]>> +!CHECK: %[[NESTED_MEMBER_COORD:.*]] = fir.coordinate_of %[[NESTED_DTYPE_COORD]], array_k : (!fir.ref>) -> !fir.ref>>> !CHECK: %[[NESTED_MEMBER_BASE_ADDR:.*]] = fir.box_offset %[[NESTED_MEMBER_COORD]] base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> !CHECK: %[[MAP_NESTED_MEMBER_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[NESTED_MEMBER_COORD]] : !fir.ref>>>, i32) var_ptr_ptr(%[[NESTED_MEMBER_BASE_ADDR]] : !fir.llvm_ptr>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr>> {{.*}} !CHECK: %[[MAP_NESTED_MEMBER_DESC:.*]] = omp.map.info var_ptr(%[[NESTED_MEMBER_COORD]] : !fir.ref>>>, !fir.box>>) map_clauses(to) capture(ByRef) -> !fir.ref>>> {{.*}} diff --git a/flang/test/Lower/OpenMP/loop-directive.f90 b/flang/test/Lower/OpenMP/loop-directive.f90 index ffa4a6ff24f24..795f2a440fd0d 100644 --- a/flang/test/Lower/OpenMP/loop-directive.f90 +++ b/flang/test/Lower/OpenMP/loop-directive.f90 @@ -75,7 +75,7 @@ subroutine test_order() subroutine test_reduction() integer :: i, dummy = 1 - ! CHECK: omp.loop private(@{{.*}} %{{.*}}#0 -> %{{.*}} : !{{.*}}) reduction + ! CHECK: omp.simd private(@{{.*}} %{{.*}}#0 -> %{{.*}} : !{{.*}}) reduction ! CHECK-SAME: (@[[RED]] %{{.*}}#0 -> %[[DUMMY_ARG:.*]] : !{{.*}}) { ! CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) {{.*}} { ! CHECK: %[[DUMMY_DECL:.*]]:2 = hlfir.declare %[[DUMMY_ARG]] {uniq_name = "_QFtest_reductionEdummy"} @@ -294,3 +294,46 @@ subroutine teams_loop_cannot_be_parallel_for_4 !$omp end parallel END DO end subroutine + +! CHECK-LABEL: func.func @_QPloop_parallel_bind_reduction +subroutine loop_parallel_bind_reduction + implicit none + integer :: x, i + + ! CHECK: omp.wsloop + ! CHECK-SAME: private(@{{[^[:space:]]+}} %{{[^[:space:]]+}}#0 -> %[[PRIV_ARG:[^[:space:]]+]] : !fir.ref) + ! CHECK-SAME: reduction(@add_reduction_i32 %{{.*}}#0 -> %[[RED_ARG:.*]] : !fir.ref) { + ! CHECK-NEXT: omp.loop_nest {{.*}} { + ! CHECK-NEXT: hlfir.declare %[[PRIV_ARG]] {uniq_name = "_QF{{.*}}Ei"} + ! CHECK-NEXT: hlfir.declare %[[RED_ARG]] {uniq_name = "_QF{{.*}}Ex"} + ! CHECK: } + ! CHECK: } + !$omp loop bind(parallel) reduction(+: x) + do i = 0, 10 + x = x + i + end do +end subroutine + +! CHECK-LABEL: func.func @_QPloop_teams_loop_reduction +subroutine loop_teams_loop_reduction + implicit none + integer :: x, i + ! CHECK: omp.teams { + ! CHECK: omp.parallel + ! CHECK-SAME: private(@{{[^[:space:]]+}} %{{[^[:space:]]+}}#0 -> %[[PRIV_ARG:[^[:space:]]+]] : !fir.ref) { + ! CHECK: omp.distribute { + ! CHECK: omp.wsloop + ! CHECK-SAME: reduction(@add_reduction_i32 %{{.*}}#0 -> %[[RED_ARG:.*]] : !fir.ref) { + ! CHECK-NEXT: omp.loop_nest {{.*}} { + ! CHECK-NEXT: hlfir.declare %[[PRIV_ARG]] {uniq_name = "_QF{{.*}}Ei"} + ! CHECK-NEXT: hlfir.declare %[[RED_ARG]] {uniq_name = "_QF{{.*}}Ex"} + ! CHECK: } + ! CHECK: } + ! CHECK: } + ! CHECK: } + ! CHECK: } + !$omp teams loop reduction(+: x) + do i = 0, 10 + x = x + i + end do +end subroutine diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90 index bf801e69405b9..3e1680a294b6e 100644 --- a/flang/test/Lower/OpenMP/target.f90 +++ b/flang/test/Lower/OpenMP/target.f90 @@ -508,7 +508,7 @@ subroutine omp_target_device_ptr !CHECK: %[[MAP:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) -> {{.*}} {name = "a"} !CHECK: omp.target_data map_entries(%[[MAP]]{{.*}}) use_device_ptr({{.*}} -> %[[VAL_1:.*]] : !fir.ref>) !$omp target data map(tofrom: a) use_device_ptr(a) - !CHECK: {{.*}} = fir.coordinate_of %[[VAL_1:.*]], {{.*}} : (!fir.ref>, !fir.field) -> !fir.ref + !CHECK: {{.*}} = fir.coordinate_of %[[VAL_1:.*]], __address : (!fir.ref>) -> !fir.ref a = c_loc(b) !CHECK: omp.terminator !$omp end target data diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90 index 85df29e83f75d..f3b5a3da4dc0b 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90 @@ -27,7 +27,7 @@ ! CHECK-SAME: alloc { ! CHECK: %[[REF:.*]] = fir.alloca i64 ! CHECK: omp.yield(%[[REF]] : !fir.ref) -! CHECK-LABE: } init { +! CHECK-LABEL: } init { ! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref, %[[ALLOC:.*]]: !fir.ref): ! CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 ! CHECK: fir.store %[[VAL_1]] to %[[ALLOC]] : !fir.ref diff --git a/flang/test/Lower/array-elemental-calls-2.f90 b/flang/test/Lower/array-elemental-calls-2.f90 index 2243bfdd0b289..2674b07dece17 100644 --- a/flang/test/Lower/array-elemental-calls-2.f90 +++ b/flang/test/Lower/array-elemental-calls-2.f90 @@ -185,10 +185,8 @@ integer elemental function elem_func_derived(x) ! CHECK: fir.do_loop ! CHECK: %[[VAL_21:.*]] = fir.array_access %{{.}}, %{{.*}} ! CHECK: %[[VAL_22:.*]] = fir.no_reassoc %[[VAL_21]] : !fir.ref> -! CHECK: %[[FIELD:.*]] = fir.field_index i, !fir.type<_QMtest_opsFcheck_parentheses_derivedTt{i:i32}> -! CHECK: %[[FROM:.*]] = fir.coordinate_of %[[VAL_22]], %[[FIELD]] : (!fir.ref>, !fir.field) -> !fir.ref -! CHECK: %[[FIELD2:.*]] = fir.field_index i, !fir.type<_QMtest_opsFcheck_parentheses_derivedTt{i:i32}> -! CHECK: %[[TO:.*]] = fir.coordinate_of %[[VAL_0]], %[[FIELD2]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[FROM:.*]] = fir.coordinate_of %[[VAL_22]], i : (!fir.ref>) -> !fir.ref +! CHECK: %[[TO:.*]] = fir.coordinate_of %[[VAL_0]], i : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL:.*]] = fir.load %[[FROM]] : !fir.ref ! CHECK: fir.store %[[VAL]] to %[[TO]] : !fir.ref ! CHECK: %{{.*}} = fir.call @_QPelem_func_derived(%[[VAL_0]]) {{.*}}: (!fir.ref>) -> i32 diff --git a/flang/test/Lower/c-interoperability-c-pointer.f90 b/flang/test/Lower/c-interoperability-c-pointer.f90 index 9700440f6650b..c62f48fa9a1be 100644 --- a/flang/test/Lower/c-interoperability-c-pointer.f90 +++ b/flang/test/Lower/c-interoperability-c-pointer.f90 @@ -3,12 +3,10 @@ ! CHECK-LABEL: func.func @_QPtest( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> {fir.bindc_name = "ptr1"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref> {fir.bindc_name = "ptr2"}) { -! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_2]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]] : !fir.ref ! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i64) -> !fir.ref -! CHECK: %[[VAL_6:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_6]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_1]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_7]] : !fir.ref ! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i64) -> !fir.ref ! CHECK: fir.call @c_func(%[[VAL_5]], %[[VAL_9]]) {{.*}}: (!fir.ref, !fir.ref) -> () @@ -35,14 +33,11 @@ subroutine c_func(c_t1, c_t2) bind(c, name="c_func") ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref ! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = "local", uniq_name = "_QFtest_callee_c_ptrElocal"} ! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_2]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_0]] : (!fir.ref) -> i64 ! CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref -! CHECK: %[[VAL_6:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_6]] : (!fir.ref>, !fir.field) -> !fir.ref -! CHECK: %[[VAL_8:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_5]], %[[VAL_8]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_1]], __address : (!fir.ref>) -> !fir.ref +! CHECK: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_5]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_7]] : !fir.ref ! CHECK: fir.store %[[VAL_10]] to %[[VAL_9]] : !fir.ref ! CHECK: return @@ -59,15 +54,12 @@ subroutine test_callee_c_ptr(ptr1) bind(c) ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref ! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> {bindc_name = "local", uniq_name = "_QFtest_callee_c_funptrElocal"} ! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_2]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_0]] : (!fir.ref) -> i64 ! CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref -! CHECK: %[[VAL_6:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_6]] : (!fir.ref>, !fir.field) -> !fir.ref -! CHECK: %[[VAL_8:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_5]], %[[VAL_8]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_1]], __address : (!fir.ref>) -> !fir.ref +! CHECK: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_5]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_7]] : !fir.ref ! CHECK: fir.store %[[VAL_10]] to %[[VAL_9]] : !fir.ref ! CHECK: return diff --git a/flang/test/Lower/c_ptr-constant-init.f90 b/flang/test/Lower/c_ptr-constant-init.f90 index da56670438aa3..b75ed55f1a4f3 100644 --- a/flang/test/Lower/c_ptr-constant-init.f90 +++ b/flang/test/Lower/c_ptr-constant-init.f90 @@ -13,7 +13,6 @@ end subroutine test ! CHECK-LABEL: fir.global internal @_QQro.1x_QM__fortran_builtinsT__builtin_c_ptr.0 constant : !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>> { ! CHECK: %[[VAL_0:.*]] = fir.undefined !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>> ! CHECK: %[[VAL_1:.*]] = fir.undefined !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK: %[[VAL_3:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_4:.*]] = fir.insert_value %[[VAL_1]], %[[VAL_3]], ["__address", !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>] : (!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>, i64) -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK: %[[VAL_5:.*]] = fir.insert_value %[[VAL_0]], %[[VAL_4]], [0 : index] : (!fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) -> !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>> @@ -33,7 +32,6 @@ end subroutine test2 ! CHECK-LABEL: fir.global internal @_QQro.1x_QM__fortran_builtinsT__builtin_c_funptr.1 constant : !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>> { ! CHECK: %[[VAL_0:.*]] = fir.undefined !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>> ! CHECK: %[[VAL_1:.*]] = fir.undefined !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> ! CHECK: %[[VAL_3:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_4:.*]] = fir.insert_value %[[VAL_1]], %[[VAL_3]], ["__address", !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>] : (!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>, i64) -> !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> ! CHECK: %[[VAL_5:.*]] = fir.insert_value %[[VAL_0]], %[[VAL_4]], [0 : index] : (!fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>>, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>) -> !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>> diff --git a/flang/test/Lower/call-by-value.f90 b/flang/test/Lower/call-by-value.f90 index 32b9c79e11e68..3b551014b6e32 100644 --- a/flang/test/Lower/call-by-value.f90 +++ b/flang/test/Lower/call-by-value.f90 @@ -105,8 +105,7 @@ subroutine test_char_value(x) bind(c) ! CHECK-LABEL: func.func @_QPtest_cptr_value( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref ! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_2]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_0]] : (!fir.ref) -> i64 ! CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref ! CHECK: %[[VAL_5:.*]] = fir.declare %[[VAL_1]] diff --git a/flang/test/Lower/call-copy-in-out.f90 b/flang/test/Lower/call-copy-in-out.f90 index fd3b5c342a48f..1eb2c3ffc0b0e 100644 --- a/flang/test/Lower/call-copy-in-out.f90 +++ b/flang/test/Lower/call-copy-in-out.f90 @@ -283,8 +283,7 @@ subroutine whole_components() end type ! CHECK: %[[a:.*]] = fir.alloca !fir.type<_QFwhole_componentsTt{i:!fir.array<100xi32>}> type(t) :: a - ! CHECK: %[[field:.*]] = fir.field_index i, !fir.type<_QFwhole_componentsTt{i:!fir.array<100xi32>}> - ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[a]], %[[field]] : (!fir.ref}>>, !fir.field) -> !fir.ref> + ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[a]], i : (!fir.ref}>>) -> !fir.ref> ! CHECK: fir.call @_QPbar_integer(%[[addr]]) {{.*}}: (!fir.ref>) -> () call bar_integer(a%i) end subroutine @@ -297,8 +296,7 @@ subroutine whole_component_contiguous_pointer() end type ! CHECK: %[[a:.*]] = fir.alloca !fir.type<_QFwhole_component_contiguous_pointerTt{i:!fir.box>>}> type(t) :: a - ! CHECK: %[[field:.*]] = fir.field_index i, !fir.type<_QFwhole_component_contiguous_pointerTt{i:!fir.box>>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a]], %[[field]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a]], i : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[box_load:.*]] = fir.load %[[coor]] : !fir.ref>>> ! CHECK: %[[addr:.*]] = fir.box_addr %[[box_load]] : (!fir.box>>) -> !fir.ptr> ! CHECK: %[[cast:.*]] = fir.convert %[[addr]] : (!fir.ptr>) -> !fir.ref> @@ -314,8 +312,7 @@ subroutine whole_component_contiguous_char_pointer() end type ! CHECK: %[[a:.*]] = fir.alloca !fir.type<_QFwhole_component_contiguous_char_pointerTt{i:!fir.box>>>}> type(t) :: a - ! CHECK: %[[field:.*]] = fir.field_index i, !fir.type<_QFwhole_component_contiguous_char_pointerTt{i:!fir.box>>>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a]], %[[field]] : (!fir.ref>>>}>>, !fir.field) -> !fir.ref>>>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a]], i : (!fir.ref>>>}>>) -> !fir.ref>>>> ! CHECK: %[[box_load:.*]] = fir.load %[[coor]] : !fir.ref>>>> ! CHECK: %[[addr:.*]] = fir.box_addr %[[box_load]] : (!fir.box>>>) -> !fir.ptr>> ! CHECK: %[[len:.*]] = fir.box_elesize %[[box_load]] : (!fir.box>>>) -> index diff --git a/flang/test/Lower/derived-allocatable-components.f90 b/flang/test/Lower/derived-allocatable-components.f90 index 850a372baf3c1..1debb275d6276 100644 --- a/flang/test/Lower/derived-allocatable-components.f90 +++ b/flang/test/Lower/derived-allocatable-components.f90 @@ -79,8 +79,7 @@ subroutine ref_scalar_real_a(a0_0, a1_0, a0_1, a1_1) type(real_a0) :: a0_0, a0_1(100) type(real_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p, !fir.type<_QMacompTreal_a0{p:!fir.box>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg0]], %[[fld]] : (!fir.ref>}>>, !fir.field) -> !fir.ref>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg0]], p : (!fir.ref>}>>) -> !fir.ref>> ! CHECK: %[[load:.*]] = fir.load %[[coor]] : !fir.ref>> ! CHECK: %[[addr:.*]] = fir.box_addr %[[load]] : (!fir.box>) -> !fir.heap ! CHECK: %[[cast:.*]] = fir.convert %[[addr]] : (!fir.heap) -> !fir.ref @@ -88,16 +87,14 @@ subroutine ref_scalar_real_a(a0_0, a1_0, a0_1, a1_1) call takes_real_scalar(a0_0%p) ! CHECK: %[[a0_1_coor:.*]] = fir.coordinate_of %[[arg2]], %{{.*}} : (!fir.ref>}>>>, i64) -> !fir.ref>}>> - ! CHECK: %[[fld:.*]] = fir.field_index p, !fir.type<_QMacompTreal_a0{p:!fir.box>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_1_coor]], %[[fld]] : (!fir.ref>}>>, !fir.field) -> !fir.ref>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_1_coor]], p : (!fir.ref>}>>) -> !fir.ref>> ! CHECK: %[[load:.*]] = fir.load %[[coor]] : !fir.ref>> ! CHECK: %[[addr:.*]] = fir.box_addr %[[load]] : (!fir.box>) -> !fir.heap ! CHECK: %[[cast:.*]] = fir.convert %[[addr]] : (!fir.heap) -> !fir.ref ! CHECK: fir.call @_QPtakes_real_scalar(%[[cast]]) {{.*}}: (!fir.ref) -> () call takes_real_scalar(a0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p, !fir.type<_QMacompTreal_a1{p:!fir.box>>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg1]], %[[fld]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg1]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[box:.*]] = fir.load %[[coor]] : !fir.ref>>> ! CHECK-DAG: %[[addr:.*]] = fir.box_addr %[[box]] : (!fir.box>>) -> !fir.heap> ! CHECK-DAG: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} : (!fir.box>>, index) -> (index, index, index) @@ -108,8 +105,7 @@ subroutine ref_scalar_real_a(a0_0, a1_0, a0_1, a1_1) call takes_real_scalar(a1_0%p(7)) ! CHECK: %[[a1_1_coor:.*]] = fir.coordinate_of %[[arg3]], %{{.*}} : (!fir.ref>>}>>>, i64) -> !fir.ref>>}>> - ! CHECK: %[[fld:.*]] = fir.field_index p, !fir.type<_QMacompTreal_a1{p:!fir.box>>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_1_coor]], %[[fld]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_1_coor]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[box:.*]] = fir.load %[[coor]] : !fir.ref>>> ! CHECK-DAG: %[[addr:.*]] = fir.box_addr %[[box]] : (!fir.box>>) -> !fir.heap> ! CHECK-DAG: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} : (!fir.box>>, index) -> (index, index, index) @@ -122,8 +118,7 @@ subroutine ref_scalar_real_a(a0_0, a1_0, a0_1, a1_1) ! CHECK-LABEL: func @_QMacompPref_array_real_a( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>>}>>{{.*}}, %[[VAL_1:.*]]: !fir.ref>>}>>>{{.*}}) { -! CHECK: %[[VAL_2:.*]] = fir.field_index p, !fir.type<_QMacompTreal_a1{p:!fir.box>>}> -! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_2]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]] : !fir.ref>>> ! CHECK: %[[VAL_5:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_4]], %[[VAL_5]] : (!fir.box>>, index) -> (index, index, index) @@ -143,8 +138,7 @@ subroutine ref_scalar_real_a(a0_0, a1_0, a0_1, a1_1) ! CHECK: %[[VAL_18:.*]] = arith.constant 1 : i64 ! CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_17]], %[[VAL_18]] : i64 ! CHECK: %[[VAL_20:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_19]] : (!fir.ref>>}>>>, i64) -> !fir.ref>>}>> -! CHECK: %[[VAL_21:.*]] = fir.field_index p, !fir.type<_QMacompTreal_a1{p:!fir.box>>}> -! CHECK: %[[VAL_22:.*]] = fir.coordinate_of %[[VAL_20]], %[[VAL_21]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_22:.*]] = fir.coordinate_of %[[VAL_20]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref>>> ! CHECK: %[[VAL_24:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_25:.*]]:3 = fir.box_dims %[[VAL_23]], %[[VAL_24]] : (!fir.box>>, index) -> (index, index, index) @@ -175,8 +169,7 @@ subroutine ref_scalar_cst_char_a(a0_0, a1_0, a0_1, a1_1) type(cst_char_a0) :: a0_0, a0_1(100) type(cst_char_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[addr:.*]] = fir.box_addr %[[box]] ! CHECK: %[[boxchar:.*]] = fir.emboxchar %[[addr]], %c10{{.*}} @@ -184,8 +177,7 @@ subroutine ref_scalar_cst_char_a(a0_0, a1_0, a0_1, a1_1) call takes_char_scalar(a0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[addr:.*]] = fir.box_addr %[[box]] ! CHECK: %[[boxchar:.*]] = fir.emboxchar %[[addr]], %c10{{.*}} @@ -193,8 +185,7 @@ subroutine ref_scalar_cst_char_a(a0_0, a1_0, a0_1, a1_1) call takes_char_scalar(a0_1(5)%p) - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[base:.*]] = fir.box_addr %[[box]] ! CHECK-DAG: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} @@ -207,8 +198,7 @@ subroutine ref_scalar_cst_char_a(a0_0, a1_0, a0_1, a1_1) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[base:.*]] = fir.box_addr %[[box]] ! CHECK-DAG: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} @@ -227,8 +217,7 @@ subroutine ref_scalar_def_char_a(a0_0, a1_0, a0_1, a1_1) type(def_char_a0) :: a0_0, a0_1(100) type(def_char_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[len:.*]] = fir.box_elesize %[[box]] ! CHECK-DAG: %[[addr:.*]] = fir.box_addr %[[box]] @@ -237,8 +226,7 @@ subroutine ref_scalar_def_char_a(a0_0, a1_0, a0_1, a1_1) call takes_char_scalar(a0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[len:.*]] = fir.box_elesize %[[box]] ! CHECK-DAG: %[[addr:.*]] = fir.box_addr %[[box]] @@ -247,8 +235,7 @@ subroutine ref_scalar_def_char_a(a0_0, a1_0, a0_1, a1_1) call takes_char_scalar(a0_1(5)%p) - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} ! CHECK-DAG: %[[len:.*]] = fir.box_elesize %[[box]] @@ -267,8 +254,7 @@ subroutine ref_scalar_def_char_a(a0_0, a1_0, a0_1, a1_1) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} ! CHECK-DAG: %[[len:.*]] = fir.box_elesize %[[box]] @@ -293,45 +279,37 @@ subroutine ref_scalar_derived(a0_0, a1_0, a0_1, a1_1) type(derived_a0) :: a0_0, a0_1(100) type(derived_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] - ! CHECK: %[[fldx:.*]] = fir.field_index x - ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], %[[fldx]] + ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], x ! CHECK: fir.call @_QPtakes_real_scalar(%[[addr]]) call takes_real_scalar(a0_0%p%x) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] - ! CHECK: %[[fldx:.*]] = fir.field_index x - ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], %[[fldx]] + ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], x ! CHECK: fir.call @_QPtakes_real_scalar(%[[addr]]) call takes_real_scalar(a0_1(5)%p%x) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} ! CHECK: %[[lb:.*]] = fir.convert %[[dims]]#0 : (index) -> i64 ! CHECK: %[[index:.*]] = arith.subi %c7{{.*}}, %[[lb]] : i64 ! CHECK: %[[elem:.*]] = fir.coordinate_of %[[box]], %[[index]] - ! CHECK: %[[fldx:.*]] = fir.field_index x - ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[elem]], %[[fldx]] + ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[elem]], x ! CHECK: fir.call @_QPtakes_real_scalar(%[[addr]]) call takes_real_scalar(a1_0%p(7)%x) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} ! CHECK: %[[lb:.*]] = fir.convert %[[dims]]#0 : (index) -> i64 ! CHECK: %[[index:.*]] = arith.subi %c7{{.*}}, %[[lb]] : i64 ! CHECK: %[[elem:.*]] = fir.coordinate_of %[[box]], %[[index]] - ! CHECK: %[[fldx:.*]] = fir.field_index x - ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[elem]], %[[fldx]] + ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[elem]], x ! CHECK: fir.call @_QPtakes_real_scalar(%[[addr]]) call takes_real_scalar(a1_1(5)%p(7)%x) @@ -346,25 +324,21 @@ subroutine ref_scalar_derived(a0_0, a1_0, a0_1, a1_1) subroutine pass_real_a(a0_0, a1_0, a0_1, a1_1) type(real_a0) :: a0_0, a0_1(100) type(real_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], p ! CHECK: fir.call @_QPtakes_real_scalar_pointer(%[[coor]]) call takes_real_scalar_pointer(a0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.call @_QPtakes_real_scalar_pointer(%[[coor]]) call takes_real_scalar_pointer(a0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], p ! CHECK: fir.call @_QPtakes_real_array_pointer(%[[coor]]) call takes_real_array_pointer(a1_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.call @_QPtakes_real_array_pointer(%[[coor]]) call takes_real_array_pointer(a1_1(5)%p) end subroutine @@ -378,28 +352,24 @@ subroutine pass_real_a(a0_0, a1_0, a0_1, a1_1) subroutine allocated_p(a0_0, a1_0, a0_1, a1_1) type(real_a0) :: a0_0, a0_1(100) type(def_char_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: fir.box_addr %[[box]] call takes_logical(allocated(a0_0%p)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: fir.box_addr %[[box]] call takes_logical(allocated(a0_1(5)%p)) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: fir.box_addr %[[box]] call takes_logical(allocated(a1_0%p)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: fir.box_addr %[[box]] call takes_logical(allocated(a1_1(5)%p)) @@ -414,25 +384,21 @@ subroutine allocated_p(a0_0, a1_0, a0_1, a1_1) subroutine allocate_real(a0_0, a1_0, a0_1, a1_1) type(real_a0) :: a0_0, a0_1(100) type(real_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(a0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(a0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(a1_0%p(100)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(a1_1(5)%p(100)) end subroutine @@ -442,25 +408,21 @@ subroutine allocate_real(a0_0, a1_0, a0_1, a1_1) subroutine allocate_cst_char(a0_0, a1_0, a0_1, a1_1) type(cst_char_a0) :: a0_0, a0_1(100) type(cst_char_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(a0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(a0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(a1_0%p(100)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(a1_1(5)%p(100)) end subroutine @@ -470,25 +432,21 @@ subroutine allocate_cst_char(a0_0, a1_0, a0_1, a1_1) subroutine allocate_def_char(a0_0, a1_0, a0_1, a1_1) type(def_char_a0) :: a0_0, a0_1(100) type(def_char_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(character(18)::a0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(character(18)::a0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(character(18)::a1_0%p(100)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(character(18)::a1_1(5)%p(100)) end subroutine @@ -502,25 +460,21 @@ subroutine allocate_def_char(a0_0, a1_0, a0_1, a1_1) subroutine deallocate_real(a0_0, a1_0, a0_1, a1_1) type(real_a0) :: a0_0, a0_1(100) type(real_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] deallocate(a0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] deallocate(a0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] deallocate(a1_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] deallocate(a1_1(5)%p) end subroutine @@ -538,17 +492,13 @@ subroutine test_recursive(x) end type type(t) :: x - ! CHECK: %[[fldNext1:.*]] = fir.field_index next - ! CHECK: %[[next1:.*]] = fir.coordinate_of %[[x]], %[[fldNext1]] + ! CHECK: %[[next1:.*]] = fir.coordinate_of %[[x]], next ! CHECK: %[[nextBox1:.*]] = fir.load %[[next1]] - ! CHECK: %[[fldNext2:.*]] = fir.field_index next - ! CHECK: %[[next2:.*]] = fir.coordinate_of %[[nextBox1]], %[[fldNext2]] + ! CHECK: %[[next2:.*]] = fir.coordinate_of %[[nextBox1]], next ! CHECK: %[[nextBox2:.*]] = fir.load %[[next2]] - ! CHECK: %[[fldNext3:.*]] = fir.field_index next - ! CHECK: %[[next3:.*]] = fir.coordinate_of %[[nextBox2]], %[[fldNext3]] + ! CHECK: %[[next3:.*]] = fir.coordinate_of %[[nextBox2]], next ! CHECK: %[[nextBox3:.*]] = fir.load %[[next3]] - ! CHECK: %[[fldi:.*]] = fir.field_index i - ! CHECK: %[[i:.*]] = fir.coordinate_of %[[nextBox3]], %[[fldi]] + ! CHECK: %[[i:.*]] = fir.coordinate_of %[[nextBox3]], i ! CHECK: %[[nextBox3:.*]] = fir.load %[[i]] : !fir.ref print *, x%next%next%next%i end subroutine diff --git a/flang/test/Lower/derived-pointer-components.f90 b/flang/test/Lower/derived-pointer-components.f90 index b01cb5f8deb60..a55618dc16a5f 100644 --- a/flang/test/Lower/derived-pointer-components.f90 +++ b/flang/test/Lower/derived-pointer-components.f90 @@ -79,8 +79,7 @@ subroutine ref_scalar_real_p(p0_0, p1_0, p0_1, p1_1) type(real_p0) :: p0_0, p0_1(100) type(real_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p0{p:!fir.box>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg0]], %[[fld]] : (!fir.ref>}>>, !fir.field) -> !fir.ref>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg0]], p : (!fir.ref>}>>) -> !fir.ref>> ! CHECK: %[[load:.*]] = fir.load %[[coor]] : !fir.ref>> ! CHECK: %[[addr:.*]] = fir.box_addr %[[load]] : (!fir.box>) -> !fir.ptr ! CHECK: %[[cast:.*]] = fir.convert %[[addr]] : (!fir.ptr) -> !fir.ref @@ -88,16 +87,14 @@ subroutine ref_scalar_real_p(p0_0, p1_0, p0_1, p1_1) call takes_real_scalar(p0_0%p) ! CHECK: %[[p0_1_coor:.*]] = fir.coordinate_of %[[arg2]], %{{.*}} : (!fir.ref>}>>>, i64) -> !fir.ref>}>> - ! CHECK: %[[fld:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p0{p:!fir.box>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_1_coor]], %[[fld]] : (!fir.ref>}>>, !fir.field) -> !fir.ref>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_1_coor]], p : (!fir.ref>}>>) -> !fir.ref>> ! CHECK: %[[load:.*]] = fir.load %[[coor]] : !fir.ref>> ! CHECK: %[[addr:.*]] = fir.box_addr %[[load]] : (!fir.box>) -> !fir.ptr ! CHECK: %[[cast:.*]] = fir.convert %[[addr]] : (!fir.ptr) -> !fir.ref ! CHECK: fir.call @_QPtakes_real_scalar(%[[cast]]) {{.*}}: (!fir.ref) -> () call takes_real_scalar(p0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p1{p:!fir.box>>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg1]], %[[fld]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg1]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[load:.*]] = fir.load %[[coor]] : !fir.ref>>> ! CHECK: %[[dims:.*]]:3 = fir.box_dims %[[load]], %c0{{.*}} : (!fir.box>>, index) -> (index, index, index) ! CHECK: %[[lb:.*]] = fir.convert %[[dims]]#0 : (index) -> i64 @@ -107,8 +104,7 @@ subroutine ref_scalar_real_p(p0_0, p1_0, p0_1, p1_1) call takes_real_scalar(p1_0%p(7)) ! CHECK: %[[p1_1_coor:.*]] = fir.coordinate_of %[[arg3]], %{{.*}} : (!fir.ref>>}>>>, i64) -> !fir.ref>>}>> - ! CHECK: %[[fld:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p1{p:!fir.box>>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_1_coor]], %[[fld]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_1_coor]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[load:.*]] = fir.load %[[coor]] : !fir.ref>>> ! CHECK: %[[dims:.*]]:3 = fir.box_dims %[[load]], %c0{{.*}} : (!fir.box>>, index) -> (index, index, index) ! CHECK: %[[lb:.*]] = fir.convert %[[dims]]#0 : (index) -> i64 @@ -120,8 +116,7 @@ subroutine ref_scalar_real_p(p0_0, p1_0, p0_1, p1_1) ! CHECK-LABEL: func @_QMpcompPref_array_real_p( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>>}>>{{.*}}, %[[VAL_1:.*]]: !fir.ref>>}>>>{{.*}}) { -! CHECK: %[[VAL_2:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p1{p:!fir.box>>}> -! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_2]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]] : !fir.ref>>> ! CHECK: %[[VAL_5:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_4]], %[[VAL_5]] : (!fir.box>>, index) -> (index, index, index) @@ -140,8 +135,7 @@ subroutine ref_scalar_real_p(p0_0, p1_0, p0_1, p1_1) ! CHECK: %[[VAL_17:.*]] = arith.constant 1 : i64 ! CHECK: %[[VAL_18:.*]] = arith.subi %[[VAL_16]], %[[VAL_17]] : i64 ! CHECK: %[[VAL_19:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_18]] : (!fir.ref>>}>>>, i64) -> !fir.ref>>}>> -! CHECK: %[[VAL_20:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p1{p:!fir.box>>}> -! CHECK: %[[VAL_21:.*]] = fir.coordinate_of %[[VAL_19]], %[[VAL_20]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_21:.*]] = fir.coordinate_of %[[VAL_19]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref>>> ! CHECK: %[[VAL_23:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_24:.*]]:3 = fir.box_dims %[[VAL_22]], %[[VAL_23]] : (!fir.box>>, index) -> (index, index, index) @@ -171,31 +165,27 @@ subroutine ref_array_real_p(p1_0, p1_1) subroutine assign_scalar_real_p(p0_0, p1_0, p0_1, p1_1) type(real_p0) :: p0_0, p0_1(100) type(real_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[addr:.*]] = fir.box_addr %[[box]] ! CHECK: fir.store {{.*}} to %[[addr]] p0_0%p = 1. ! CHECK: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[addr:.*]] = fir.box_addr %[[box]] ! CHECK: fir.store {{.*}} to %[[addr]] p0_1(5)%p = 2. - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], {{.*}} ! CHECK: fir.store {{.*}} to %[[addr]] p1_0%p(7) = 3. ! CHECK: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], {{.*}} ! CHECK: fir.store {{.*}} to %[[addr]] @@ -208,8 +198,7 @@ subroutine ref_scalar_cst_char_p(p0_0, p1_0, p0_1, p1_1) type(cst_char_p0) :: p0_0, p0_1(100) type(cst_char_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[addr:.*]] = fir.box_addr %[[box]] ! CHECK: %[[boxchar:.*]] = fir.emboxchar %[[addr]], %c10{{.*}} @@ -217,8 +206,7 @@ subroutine ref_scalar_cst_char_p(p0_0, p1_0, p0_1, p1_1) call takes_char_scalar(p0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[addr:.*]] = fir.box_addr %[[box]] ! CHECK: %[[boxchar:.*]] = fir.emboxchar %[[addr]], %c10{{.*}} @@ -226,8 +214,7 @@ subroutine ref_scalar_cst_char_p(p0_0, p1_0, p0_1, p1_1) call takes_char_scalar(p0_1(5)%p) - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} ! CHECK: %[[lb:.*]] = fir.convert %[[dims]]#0 : (index) -> i64 @@ -239,8 +226,7 @@ subroutine ref_scalar_cst_char_p(p0_0, p1_0, p0_1, p1_1) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} ! CHECK: %[[lb:.*]] = fir.convert %[[dims]]#0 : (index) -> i64 @@ -258,8 +244,7 @@ subroutine ref_scalar_def_char_p(p0_0, p1_0, p0_1, p1_1) type(def_char_p0) :: p0_0, p0_1(100) type(def_char_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[len:.*]] = fir.box_elesize %[[box]] ! CHECK-DAG: %[[addr:.*]] = fir.box_addr %[[box]] @@ -268,8 +253,7 @@ subroutine ref_scalar_def_char_p(p0_0, p1_0, p0_1, p1_1) call takes_char_scalar(p0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[len:.*]] = fir.box_elesize %[[box]] ! CHECK-DAG: %[[addr:.*]] = fir.box_addr %[[box]] @@ -278,8 +262,7 @@ subroutine ref_scalar_def_char_p(p0_0, p1_0, p0_1, p1_1) call takes_char_scalar(p0_1(5)%p) - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[len:.*]] = fir.box_elesize %[[box]] ! CHECK-DAG: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} @@ -292,8 +275,7 @@ subroutine ref_scalar_def_char_p(p0_0, p1_0, p0_1, p1_1) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[len:.*]] = fir.box_elesize %[[box]] ! CHECK-DAG: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} @@ -312,45 +294,37 @@ subroutine ref_scalar_derived(p0_0, p1_0, p0_1, p1_1) type(derived_p0) :: p0_0, p0_1(100) type(derived_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] - ! CHECK: %[[fldx:.*]] = fir.field_index x - ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], %[[fldx]] + ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], x ! CHECK: fir.call @_QPtakes_real_scalar(%[[addr]]) call takes_real_scalar(p0_0%p%x) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] - ! CHECK: %[[fldx:.*]] = fir.field_index x - ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], %[[fldx]] + ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], x ! CHECK: fir.call @_QPtakes_real_scalar(%[[addr]]) call takes_real_scalar(p0_1(5)%p%x) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} ! CHECK: %[[lb:.*]] = fir.convert %[[dims]]#0 : (index) -> i64 ! CHECK: %[[index:.*]] = arith.subi %c7{{.*}}, %[[lb]] ! CHECK: %[[elem:.*]] = fir.coordinate_of %[[box]], %[[index]] - ! CHECK: %[[fldx:.*]] = fir.field_index x - ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[elem]], %[[fldx]] + ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[elem]], x ! CHECK: fir.call @_QPtakes_real_scalar(%[[addr]]) call takes_real_scalar(p1_0%p(7)%x) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} ! CHECK: %[[lb:.*]] = fir.convert %[[dims]]#0 : (index) -> i64 ! CHECK: %[[index:.*]] = arith.subi %c7{{.*}}, %[[lb]] ! CHECK: %[[elem:.*]] = fir.coordinate_of %[[box]], %[[index]] - ! CHECK: %[[fldx:.*]] = fir.field_index x - ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[elem]], %[[fldx]] + ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[elem]], x ! CHECK: fir.call @_QPtakes_real_scalar(%[[addr]]) call takes_real_scalar(p1_1(5)%p(7)%x) @@ -365,25 +339,21 @@ subroutine ref_scalar_derived(p0_0, p1_0, p0_1, p1_1) subroutine pass_real_p(p0_0, p1_0, p0_1, p1_1) type(real_p0) :: p0_0, p0_1(100) type(real_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: fir.call @_QPtakes_real_scalar_pointer(%[[coor]]) call takes_real_scalar_pointer(p0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.call @_QPtakes_real_scalar_pointer(%[[coor]]) call takes_real_scalar_pointer(p0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: fir.call @_QPtakes_real_array_pointer(%[[coor]]) call takes_real_array_pointer(p1_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.call @_QPtakes_real_array_pointer(%[[coor]]) call takes_real_array_pointer(p1_1(5)%p) end subroutine @@ -397,28 +367,24 @@ subroutine pass_real_p(p0_0, p1_0, p0_1, p1_1) subroutine associated_p(p0_0, p1_0, p0_1, p1_1) type(real_p0) :: p0_0, p0_1(100) type(def_char_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: fir.box_addr %[[box]] call takes_logical(associated(p0_0%p)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: fir.box_addr %[[box]] call takes_logical(associated(p0_1(5)%p)) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: fir.box_addr %[[box]] call takes_logical(associated(p1_0%p)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: fir.box_addr %[[box]] call takes_logical(associated(p1_1(5)%p)) @@ -433,25 +399,21 @@ subroutine associated_p(p0_0, p1_0, p0_1, p1_1) subroutine passoc_real(p0_0, p1_0, p0_1, p1_1) type(real_p0) :: p0_0, p0_1(100) type(real_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] p0_0%p => real_target ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] p0_1(5)%p => real_target - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] p1_0%p => real_array_target ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] p1_1(5)%p => real_array_target end subroutine @@ -461,25 +423,21 @@ subroutine passoc_real(p0_0, p1_0, p0_1, p1_1) subroutine passoc_char(p0_0, p1_0, p0_1, p1_1) type(cst_char_p0) :: p0_0, p0_1(100) type(def_char_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] p0_0%p => char_target ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] p0_1(5)%p => char_target - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] p1_0%p => char_array_target ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] p1_1(5)%p => char_array_target end subroutine @@ -493,25 +451,21 @@ subroutine passoc_char(p0_0, p1_0, p0_1, p1_1) subroutine nullify_test(p0_0, p1_0, p0_1, p1_1) type(real_p0) :: p0_0, p0_1(100) type(def_char_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] nullify(p0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] nullify(p0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] nullify(p1_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] nullify(p1_1(5)%p) end subroutine @@ -525,25 +479,21 @@ subroutine nullify_test(p0_0, p1_0, p0_1, p1_1) subroutine allocate_real(p0_0, p1_0, p0_1, p1_1) type(real_p0) :: p0_0, p0_1(100) type(real_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(p0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(p0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(p1_0%p(100)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(p1_1(5)%p(100)) end subroutine @@ -553,25 +503,21 @@ subroutine allocate_real(p0_0, p1_0, p0_1, p1_1) subroutine allocate_cst_char(p0_0, p1_0, p0_1, p1_1) type(cst_char_p0) :: p0_0, p0_1(100) type(cst_char_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(p0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(p0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(p1_0%p(100)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(p1_1(5)%p(100)) end subroutine @@ -581,25 +527,21 @@ subroutine allocate_cst_char(p0_0, p1_0, p0_1, p1_1) subroutine allocate_def_char(p0_0, p1_0, p0_1, p1_1) type(def_char_p0) :: p0_0, p0_1(100) type(def_char_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(character(18)::p0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(character(18)::p0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(character(18)::p1_0%p(100)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(character(18)::p1_1(5)%p(100)) end subroutine @@ -617,8 +559,7 @@ subroutine deallocate_real(p0_0, p1_0, p0_1, p1_1) ! CHECK: %[[VAL_0:.*]] = fir.absent !fir.box ! CHECK: %[[VAL_1:.*]] = fir.address_of(@_QQclX{{.*}}) : !fir.ref> ! CHECK: %[[LINE_0:.*]] = arith.constant {{.*}} : i32 - ! CHECK: %[[VAL_2:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p0{p:!fir.box>}> - ! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %arg0, %[[VAL_2]] : (!fir.ref>}>>, !fir.field) -> !fir.ref>> + ! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %arg0, p : (!fir.ref>}>>) -> !fir.ref>> ! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (!fir.ref>>) -> !fir.ref> ! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_1]] : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_6:.*]] = fir.call @_FortranAPointerDeallocate(%[[VAL_4]], %false, %[[VAL_0]], %[[VAL_5]], %[[LINE_0]]) fastmath : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -632,8 +573,7 @@ subroutine deallocate_real(p0_0, p1_0, p0_1, p1_1) ! CHECK: %[[CON_1:.*]] = arith.constant 1 : i64 ! CHECK: %[[VAL_9:.*]] = arith.subi %[[CON_5]], %[[CON_1]] : i64 ! CHECK: %[[VAL_10:.*]] = fir.coordinate_of %arg2, %[[VAL_9:.*]] : (!fir.ref>}>>>, i64) -> !fir.ref>}>> - ! CHECK: %[[VAL_11:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p0{p:!fir.box>}> - ! CHECK: %[[VAL_12:.*]] = fir.coordinate_of %[[VAL_10]], %[[VAL_11]] : (!fir.ref>}>>, !fir.field) -> !fir.ref>> + ! CHECK: %[[VAL_12:.*]] = fir.coordinate_of %[[VAL_10]], p : (!fir.ref>}>>) -> !fir.ref>> ! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (!fir.ref>>) -> !fir.ref> ! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_8]] : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_15:.*]] = fir.call @_FortranAPointerDeallocate(%[[VAL_13]], %false_0, %[[VAL_7]], %[[VAL_14]], %[[LINE_1]]) fastmath : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -643,8 +583,7 @@ subroutine deallocate_real(p0_0, p1_0, p0_1, p1_1) ! CHECK: %[[VAL_16:.*]] = fir.absent !fir.box ! CHECK: %[[VAL_17:.*]] = fir.address_of(@_QQclX{{.*}}) : !fir.ref> ! CHECK: %[[LINE_2:.*]] = arith.constant {{.*}} : i32 - ! CHECK: %[[VAL_18:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p1{p:!fir.box>>}> - ! CHECK: %[[VAL_19:.*]] = fir.coordinate_of %arg1, %[[VAL_18]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> + ! CHECK: %[[VAL_19:.*]] = fir.coordinate_of %arg1, p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (!fir.ref>>>) -> !fir.ref> ! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_17]] : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_22:.*]] = fir.call @_FortranAPointerDeallocate(%[[VAL_20]], %false_1, %[[VAL_16]], %[[VAL_21]], %[[LINE_2]]) fastmath : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -658,8 +597,7 @@ subroutine deallocate_real(p0_0, p1_0, p0_1, p1_1) ! CHECK: %[[CON_1A:.*]] = arith.constant 1 : i64 ! CHECK: %[[VAL_25:.*]] = arith.subi %[[CON_5A]], %[[CON_1A]] : i64 ! CHECK: %[[VAL_26:.*]] = fir.coordinate_of %arg3, %[[VAL_25]] : (!fir.ref>>}>>>, i64) -> !fir.ref>>}>> - ! CHECK: %[[VAL_27:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p1{p:!fir.box>>}> - ! CHECK: %[[VAL_28:.*]] = fir.coordinate_of %[[VAL_26]], %[[VAL_27]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> + ! CHECK: %[[VAL_28:.*]] = fir.coordinate_of %[[VAL_26]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (!fir.ref>>>) -> !fir.ref> ! CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_24]] : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_31:.*]] = fir.call @_FortranAPointerDeallocate(%[[VAL_29]], %false_2, %[[VAL_23]], %[[VAL_30]], %[[LINE_3]]) fastmath : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -694,24 +632,18 @@ subroutine very_long(x) type(t5) :: x(:, :, :, :, :) ! CHECK: %[[coor0:.*]] = fir.coordinate_of %[[x]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.}} - ! CHECK-DAG: %[[flda:.*]] = fir.field_index a - ! CHECK-DAG: %[[fldb:.*]] = fir.field_index b - ! CHECK: %[[coor1:.*]] = fir.coordinate_of %[[coor0]], %[[flda]], %[[fldb]] + ! CHECK: %[[coor1:.*]] = fir.coordinate_of %[[coor0]], a, b ! CHECK: %[[b_box:.*]] = fir.load %[[coor1]] - ! CHECK-DAG: %[[fldc:.*]] = fir.field_index c - ! CHECK-DAG: %[[fldd:.*]] = fir.field_index d - ! CHECK: %[[coor2:.*]] = fir.coordinate_of %[[b_box]], %[[fldc]], %[[fldd]] + ! CHECK: %[[coor2:.*]] = fir.coordinate_of %[[b_box]], c, d ! CHECK: %[[index:.*]] = arith.subi %c6{{.*}}, %c1{{.*}} : i64 ! CHECK: %[[coor3:.*]] = fir.coordinate_of %[[coor2]], %[[index]] - ! CHECK: %[[flde:.*]] = fir.field_index e - ! CHECK: %[[coor4:.*]] = fir.coordinate_of %[[coor3]], %[[flde]] + ! CHECK: %[[coor4:.*]] = fir.coordinate_of %[[coor3]], e ! CHECK: %[[e_box:.*]] = fir.load %[[coor4]] ! CHECK: %[[edims:.*]]:3 = fir.box_dims %[[e_box]], %c0{{.*}} ! CHECK: %[[lb:.*]] = fir.convert %[[edims]]#0 : (index) -> i64 ! CHECK: %[[index2:.*]] = arith.subi %c7{{.*}}, %[[lb]] ! CHECK: %[[coor5:.*]] = fir.coordinate_of %[[e_box]], %[[index2]] - ! CHECK: %[[fldf:.*]] = fir.field_index f - ! CHECK: %[[coor6:.*]] = fir.coordinate_of %[[coor5]], %[[fldf:.*]] + ! CHECK: %[[coor6:.*]] = fir.coordinate_of %[[coor5]], f ! CHECK: fir.load %[[coor6]] : !fir.ref print *, x(1,2,3,4,5)%a%b%c%d(6)%e(7)%f end subroutine @@ -729,17 +661,13 @@ subroutine test_recursive(x) end type type(t) :: x - ! CHECK: %[[fldNext1:.*]] = fir.field_index next - ! CHECK: %[[next1:.*]] = fir.coordinate_of %[[x]], %[[fldNext1]] + ! CHECK: %[[next1:.*]] = fir.coordinate_of %[[x]], next ! CHECK: %[[nextBox1:.*]] = fir.load %[[next1]] - ! CHECK: %[[fldNext2:.*]] = fir.field_index next - ! CHECK: %[[next2:.*]] = fir.coordinate_of %[[nextBox1]], %[[fldNext2]] + ! CHECK: %[[next2:.*]] = fir.coordinate_of %[[nextBox1]], next ! CHECK: %[[nextBox2:.*]] = fir.load %[[next2]] - ! CHECK: %[[fldNext3:.*]] = fir.field_index next - ! CHECK: %[[next3:.*]] = fir.coordinate_of %[[nextBox2]], %[[fldNext3]] + ! CHECK: %[[next3:.*]] = fir.coordinate_of %[[nextBox2]], next ! CHECK: %[[nextBox3:.*]] = fir.load %[[next3]] - ! CHECK: %[[fldi:.*]] = fir.field_index i - ! CHECK: %[[i:.*]] = fir.coordinate_of %[[nextBox3]], %[[fldi]] + ! CHECK: %[[i:.*]] = fir.coordinate_of %[[nextBox3]], i ! CHECK: %[[nextBox3:.*]] = fir.load %[[i]] : !fir.ref print *, x%next%next%next%i end subroutine @@ -754,7 +682,6 @@ module pinit use pcomp ! CHECK-LABEL: fir.global {{.*}}@_QMpinitEarp0 ! CHECK-DAG: %[[undef:.*]] = fir.undefined - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p ! CHECK-DAG: %[[target:.*]] = fir.address_of(@_QMpcompEreal_target) ! CHECK: %[[box:.*]] = fir.embox %[[target]] : (!fir.ref) -> !fir.box ! CHECK: %[[rebox:.*]] = fir.rebox %[[box]] : (!fir.box) -> !fir.box> @@ -764,7 +691,6 @@ module pinit ! CHECK-LABEL: fir.global @_QMpinitEbrp1 : !fir.type<_QMpcompTreal_p1{p:!fir.box>>}> { ! CHECK: %[[VAL_0:.*]] = fir.undefined !fir.type<_QMpcompTreal_p1{p:!fir.box>>}> -! CHECK: %[[VAL_1:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p1{p:!fir.box>>}> ! CHECK: %[[VAL_2:.*]] = fir.address_of(@_QMpcompEreal_array_target) : !fir.ref> ! CHECK: %[[VAL_3:.*]] = arith.constant 100 : index ! CHECK: %[[VAL_4:.*]] = arith.constant 1 : index @@ -792,7 +718,6 @@ module pinit ! CHECK-LABEL: fir.global {{.*}}@_QMpinitEccp0 ! CHECK-DAG: %[[undef:.*]] = fir.undefined - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p ! CHECK-DAG: %[[target:.*]] = fir.address_of(@_QMpcompEchar_target) ! CHECK: %[[box:.*]] = fir.embox %[[target]] : (!fir.ref>) -> !fir.box> ! CHECK: %[[rebox:.*]] = fir.rebox %[[box]] : (!fir.box>) -> !fir.box>> @@ -802,7 +727,6 @@ module pinit ! CHECK-LABEL: fir.global {{.*}}@_QMpinitEdcp1 ! CHECK-DAG: %[[undef:.*]] = fir.undefined - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p ! CHECK-DAG: %[[target:.*]] = fir.address_of(@_QMpcompEchar_array_target) ! CHECK-DAG: %[[shape:.*]] = fir.shape %c100{{.*}} ! CHECK-DAG: %[[box:.*]] = fir.embox %[[target]](%[[shape]]) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> diff --git a/flang/test/Lower/derived-type-finalization.f90 b/flang/test/Lower/derived-type-finalization.f90 index b38fcd8ba5766..66735a9056544 100644 --- a/flang/test/Lower/derived-type-finalization.f90 +++ b/flang/test/Lower/derived-type-finalization.f90 @@ -132,8 +132,7 @@ subroutine test_end_finalization2(a) ! CHECK: cf.br ^bb3 ! CHECK: ^bb2: ! CHECK: %[[C10:.*]] = arith.constant 10 : i32 -! CHECK: %[[FIELD_A:.*]] = fir.field_index a, !fir.type<_QMderived_type_finalizationTt1{a:i32}> -! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[T]], %[[FIELD_A]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[T]], a : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[C10]] to %[[COORD_A]] : !fir.ref ! CHECK: cf.br ^bb3 ! CHECK: ^bb3: diff --git a/flang/test/Lower/derived-types.f90 b/flang/test/Lower/derived-types.f90 index 901eb8eca8c6e..4d36a7632b070 100644 --- a/flang/test/Lower/derived-types.f90 +++ b/flang/test/Lower/derived-types.f90 @@ -55,8 +55,7 @@ subroutine saved_derived() subroutine scalar_numeric_ref() ! CHECK: %[[alloc:.*]] = fir.alloca !fir.type<_QMdTr{x:f32}> type(r) :: some_r - ! CHECK: %[[field:.*]] = fir.field_index x, !fir.type<_QMdTr{x:f32}> - ! CHECK: fir.coordinate_of %[[alloc]], %[[field]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: fir.coordinate_of %[[alloc]], x : (!fir.ref>) -> !fir.ref call real_bar(some_r%x) end subroutine @@ -64,8 +63,7 @@ subroutine scalar_numeric_ref() subroutine scalar_character_ref() ! CHECK: %[[alloc:.*]] = fir.alloca !fir.type<_QMdTc{ch:!fir.char<1,10>}> type(c) :: some_c - ! CHECK: %[[field:.*]] = fir.field_index ch, !fir.type<_QMdTc{ch:!fir.char<1,10>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[alloc]], %[[field]] : (!fir.ref}>>, !fir.field) -> !fir.ref> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[alloc]], ch : (!fir.ref}>>) -> !fir.ref> ! CHECK-DAG: %[[c10:.*]] = arith.constant 10 : index ! CHECK: fir.emboxchar %[[coor]], %c10 : (!fir.ref>, index) -> !fir.boxchar<1> call char_bar(some_c%ch) @@ -78,8 +76,7 @@ subroutine scalar_character_ref() subroutine array_comp_elt_ref() type(r2) :: some_r2 ! CHECK: %[[alloc:.*]] = fir.alloca !fir.type<_QMdTr2{x_array:!fir.array<10x20xf32>}> - ! CHECK: %[[field:.*]] = fir.field_index x_array, !fir.type<_QMdTr2{x_array:!fir.array<10x20xf32>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[alloc]], %[[field]] : (!fir.ref}>>, !fir.field) -> !fir.ref> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[alloc]], x_array : (!fir.ref}>>) -> !fir.ref> ! CHECK-DAG: %[[index1:.*]] = arith.subi %c5{{.*}}, %c1{{.*}} : i64 ! CHECK-DAG: %[[index2:.*]] = arith.subi %c6{{.*}}, %c1{{.*}} : i64 ! CHECK: fir.coordinate_of %[[coor]], %[[index1]], %[[index2]] : (!fir.ref>, i64, i64) -> !fir.ref @@ -90,7 +87,7 @@ subroutine array_comp_elt_ref() ! CHECK-LABEL: func @_QMdPchar_array_comp_elt_ref( subroutine char_array_comp_elt_ref() type(c2) :: some_c2 - ! CHECK: %[[coor:.*]] = fir.coordinate_of %{{.*}}, %{{.*}} : (!fir.ref>}>>, !fir.field) -> !fir.ref>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %{{.*}}, ch_array : (!fir.ref>}>>) -> !fir.ref>> ! CHECK-DAG: %[[index1:.*]] = arith.subi %c5{{.*}}, %c1{{.*}} : i64 ! CHECK-DAG: %[[index2:.*]] = arith.subi %c6{{.*}}, %c1{{.*}} : i64 ! CHECK: fir.coordinate_of %[[coor]], %[[index1]], %[[index2]] : (!fir.ref>>, i64, i64) -> !fir.ref> @@ -104,8 +101,7 @@ subroutine array_elt_comp_ref() ! CHECK: %[[alloca:.*]] = fir.alloca !fir.array<100x!fir.type<_QMdTr{x:f32}>> ! CHECK: %[[index:.*]] = arith.subi %c5{{.*}}, %c1{{.*}} : i64 ! CHECK: %[[elt:.*]] = fir.coordinate_of %[[alloca]], %[[index]] : (!fir.ref>>, i64) -> !fir.ref> - ! CHECK: %[[field:.*]] = fir.field_index x, !fir.type<_QMdTr{x:f32}> - ! CHECK: fir.coordinate_of %[[elt]], %[[field]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: fir.coordinate_of %[[elt]], x : (!fir.ref>) -> !fir.ref call real_bar(some_r_array(5)%x) end subroutine @@ -113,7 +109,7 @@ subroutine array_elt_comp_ref() subroutine char_array_elt_comp_ref() type(c) :: some_c_array(100) ! CHECK: fir.coordinate_of %{{.*}}, %{{.*}} : (!fir.ref}>>>, i64) -> !fir.ref}>> - ! CHECK: fir.coordinate_of %{{.*}}, %{{.*}} : (!fir.ref}>>, !fir.field) -> !fir.ref> + ! CHECK: fir.coordinate_of %{{.*}}, ch : (!fir.ref}>>) -> !fir.ref> ! CHECK: fir.emboxchar %{{.*}}, %c10{{.*}} : (!fir.ref>, index) -> !fir.boxchar<1> call char_bar(some_c_array(5)%ch) end subroutine @@ -130,8 +126,7 @@ subroutine char_array_elt_comp_ref() ! CHECK-SAME: %[[arg0:.*]]: !fir.ref> real function scalar_numeric_load(some_r) type(r) :: some_r - ! CHECK: %[[field:.*]] = fir.field_index x, !fir.type<_QMdTr{x:f32}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg0]], %[[field]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg0]], x : (!fir.ref>) -> !fir.ref ! CHECK: fir.load %[[coor]] scalar_numeric_load = some_r%x end function diff --git a/flang/test/Lower/equivalence-1.f90 b/flang/test/Lower/equivalence-1.f90 index aec5c0f54c190..133accd38c2f7 100644 --- a/flang/test/Lower/equivalence-1.f90 +++ b/flang/test/Lower/equivalence-1.f90 @@ -45,8 +45,7 @@ SUBROUTINE s3 ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[group]], %c0 : (!fir.ref>, index) -> !fir.ref ! CHECK: %[[rloc:.*]] = fir.convert %[[coor]] : (!fir.ref) -> !fir.ptr> ! CHECK: %[[xloc:.*]] = fir.convert %[[coor]] : (!fir.ref) -> !fir.ptr}>> - ! CHECK: %[[fidx:.*]] = fir.field_index r, !fir.type<_QFs3Tt{r:!fir.array<10xf32>}> - ! CHECK: %[[xrloc:.*]] = fir.coordinate_of %[[xloc]], %[[fidx]] : + ! CHECK: %[[xrloc:.*]] = fir.coordinate_of %[[xloc]], r ! CHECK: %[[v1loc:.*]] = fir.coordinate_of %[[xrloc]], %c8_i64 : (!fir.ref>, i64) -> !fir.ref ! CHECK: fir.store %{{.*}} to %[[v1loc]] : !fir.ref x%r(9) = 9.0 diff --git a/flang/test/Lower/forall/array-pointer.f90 b/flang/test/Lower/forall/array-pointer.f90 index 1e8f7a6a55002..fd3efed736c39 100644 --- a/flang/test/Lower/forall/array-pointer.f90 +++ b/flang/test/Lower/forall/array-pointer.f90 @@ -407,10 +407,9 @@ end subroutine s3 ! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i64) -> index ! CHECK: %[[VAL_24:.*]] = arith.subi %[[VAL_23]], %[[VAL_20]] : index ! CHECK: %[[VAL_25:.*]] = fir.field_index ip, !fir.type<_QMarray_of_pointer_testTtu{ip:!fir.box>>}> -! CHECK: %[[VAL_26:.*]] = fir.field_index v, !fir.type<_QMarray_of_pointer_testTu{v:i32}> ! CHECK: %[[VAL_27:.*]] = fir.array_access %[[VAL_12]], %[[VAL_24]], %[[VAL_25]] : (!fir.array>>}>>, index, !fir.field) -> !fir.ref>>> ! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_27]] : !fir.ref>>> -! CHECK: %[[VAL_29:.*]] = fir.coordinate_of %[[VAL_28]], %[[VAL_26]] : (!fir.box>>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_29:.*]] = fir.coordinate_of %[[VAL_28]], v : (!fir.box>>) -> !fir.ref ! CHECK: fir.store %[[VAL_19]] to %[[VAL_29]] : !fir.ref ! CHECK: %[[VAL_30:.*]] = fir.array_amend %[[VAL_12]], %[[VAL_27]] : (!fir.array>>}>>, !fir.ref>>>) -> !fir.array>>}>> ! CHECK: fir.result %[[VAL_30]] : !fir.array>>}>> @@ -456,10 +455,9 @@ end subroutine s3_1 ! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i64) -> index ! CHECK: %[[VAL_24:.*]] = arith.subi %[[VAL_23]], %[[VAL_20]] : index ! CHECK: %[[VAL_25:.*]] = fir.field_index ip, !fir.type<_QMarray_of_pointer_testTtu{ip:!fir.box>>}> -! CHECK: %[[VAL_26:.*]] = fir.field_index v, !fir.type<_QMarray_of_pointer_testTu{v:i32}> ! CHECK: %[[VAL_27:.*]] = fir.array_access %[[VAL_12]], %[[VAL_24]], %[[VAL_25]] : (!fir.array>>}>>, index, !fir.field) -> !fir.ref>>> ! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_27]] : !fir.ref>>> -! CHECK: %[[VAL_29:.*]] = fir.coordinate_of %[[VAL_28]], %[[VAL_26]] : (!fir.box>>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_29:.*]] = fir.coordinate_of %[[VAL_28]], v : (!fir.box>>) -> !fir.ref ! CHECK: fir.store %[[VAL_19]] to %[[VAL_29]] : !fir.ref ! CHECK: %[[VAL_30:.*]] = fir.array_amend %[[VAL_12]], %[[VAL_27]] : (!fir.array>>}>>, !fir.ref>>>) -> !fir.array>>}>> ! CHECK: fir.result %[[VAL_30]] : !fir.array>>}>> @@ -605,14 +603,12 @@ end subroutine s6 ! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i32) -> i64 ! CHECK: %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (i64) -> index ! CHECK: %[[VAL_40:.*]] = arith.subi %[[VAL_39]], %[[VAL_31]] : index -! CHECK: %[[VAL_41:.*]] = fir.field_index ip, !fir.type<_QMarray_of_pointer_testTtu{ip:!fir.box>>}> -! CHECK: %[[VAL_42:.*]] = fir.field_index v, !fir.type<_QMarray_of_pointer_testTu{v:i32}> ! CHECK: %[[VAL_43:.*]] = fir.array_access %[[VAL_23]], %[[VAL_35]], %[[VAL_36]] : (!fir.array>>}>>>>}>>, index, !fir.field) -> !fir.ref>>}>>>>> ! CHECK: %[[VAL_44:.*]] = fir.load %[[VAL_43]] : !fir.ref>>}>>>>> ! CHECK: %[[VAL_45:.*]] = fir.coordinate_of %[[VAL_44]], %[[VAL_40]] : (!fir.box>>}>>>>, index) -> !fir.ref>>}>> -! CHECK: %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_45]], %[[VAL_41]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_45]], ip : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref>>> -! CHECK: %[[VAL_48:.*]] = fir.coordinate_of %[[VAL_47]], %[[VAL_42]] : (!fir.box>>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_48:.*]] = fir.coordinate_of %[[VAL_47]], v : (!fir.box>>) -> !fir.ref ! CHECK: fir.store %[[VAL_30]] to %[[VAL_48]] : !fir.ref ! CHECK: %[[VAL_49:.*]] = fir.array_amend %[[VAL_23]], %[[VAL_43]] : (!fir.array>>}>>>>}>>, !fir.ref>>}>>>>>) -> !fir.array>>}>>>>}>> ! CHECK: fir.result %[[VAL_49]] : !fir.array>>}>>>>}>> @@ -658,8 +654,7 @@ end subroutine s7 ! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i64 ! CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_21]], %[[VAL_22]] : i64 ! CHECK: %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_23]] : (!fir.box>}>>>, i64) -> !fir.ref>}>> -! CHECK: %[[VAL_25:.*]] = fir.field_index ip, !fir.type<_QMarray_of_pointer_testTt{ip:!fir.box>}> -! CHECK: %[[VAL_26:.*]] = fir.coordinate_of %[[VAL_24]], %[[VAL_25]] : (!fir.ref>}>>, !fir.field) -> !fir.ref>> +! CHECK: %[[VAL_26:.*]] = fir.coordinate_of %[[VAL_24]], ip : (!fir.ref>}>>) -> !fir.ref>> ! CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref>> ! CHECK: %[[VAL_28:.*]] = fir.box_addr %[[VAL_27]] : (!fir.box>) -> !fir.ptr ! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_28]] : !fir.ptr diff --git a/flang/test/Lower/forall/forall-allocatable-2.f90 b/flang/test/Lower/forall/forall-allocatable-2.f90 index f7c46acf87275..67a0018f9a22b 100644 --- a/flang/test/Lower/forall/forall-allocatable-2.f90 +++ b/flang/test/Lower/forall/forall-allocatable-2.f90 @@ -29,8 +29,7 @@ end subroutine forall_with_allocatable2 ! CHECK: %[[VAL_11:.*]] = arith.constant 15 : i32 ! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> index ! CHECK: %[[VAL_13:.*]] = arith.constant 1 : index -! CHECK: %[[VAL_14:.*]] = fir.field_index arr, !fir.type<_QFforall_with_allocatable2Tt{i:i32,arr:!fir.box>>}> -! CHECK: %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_2]], %[[VAL_14]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_2]], arr : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref>>> ! CHECK: %[[VAL_17:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_16]], %[[VAL_17]] : (!fir.box>>, index) -> (index, index, index) diff --git a/flang/test/Lower/forall/forall-where.f90 b/flang/test/Lower/forall/forall-where.f90 index b1dd72fdfb4f2..54ff2bd4c3f16 100644 --- a/flang/test/Lower/forall/forall-where.f90 +++ b/flang/test/Lower/forall/forall-where.f90 @@ -137,8 +137,7 @@ end subroutine test_nested_forall_where ! CHECK: %[[VAL_109:.*]] = arith.constant 1 : i64 ! CHECK: %[[VAL_110:.*]] = arith.subi %[[VAL_108]], %[[VAL_109]] : i64 ! CHECK: %[[VAL_111:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_106]], %[[VAL_110]] : (!fir.box}>>>, i64, i64) -> !fir.ref}>> -! CHECK: %[[VAL_112:.*]] = fir.field_index data, !fir.type<_QFtest_nested_forall_whereTt{data:!fir.array<100xf32>}> -! CHECK: %[[VAL_113:.*]] = fir.coordinate_of %[[VAL_111]], %[[VAL_112]] : (!fir.ref}>>, !fir.field) -> !fir.ref> +! CHECK: %[[VAL_113:.*]] = fir.coordinate_of %[[VAL_111]], data : (!fir.ref}>>) -> !fir.ref> ! CHECK: %[[VAL_114:.*]] = arith.constant 100 : index ! CHECK: %[[VAL_115:.*]] = fir.shape %[[VAL_114]] : (index) -> !fir.shape<1> ! CHECK: %[[VAL_116:.*]] = fir.array_load %[[VAL_113]](%[[VAL_115]]) : (!fir.ref>, !fir.shape<1>) -> !fir.array<100xf32> diff --git a/flang/test/Lower/identical-block-merge-disable.f90 b/flang/test/Lower/identical-block-merge-disable.f90 index de74ba529d87f..cc3120a3b6f67 100644 --- a/flang/test/Lower/identical-block-merge-disable.f90 +++ b/flang/test/Lower/identical-block-merge-disable.f90 @@ -55,8 +55,7 @@ END MODULE DMUMPS_SOL_LR ! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_11]]#0 : (index) -> i64 ! CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] : i64 ! CHECK: %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_10]], %[[VAL_15]] : (!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>>>, i64) -> !fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>> -! CHECK: %[[VAL_17:.*]] = fir.field_index panels_l, !fir.type<_QMdmumps_sol_lrTblr_struc_t{panels_l:!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}> -! CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_16]], %[[VAL_17]] : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_16]], panels_l : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_18]] : !fir.ref>>> ! CHECK: %[[VAL_20:.*]] = fir.box_addr %[[VAL_19]] : (!fir.box>>) -> !fir.ptr> ! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (!fir.ptr>) -> i64 @@ -70,7 +69,7 @@ END MODULE DMUMPS_SOL_LR ! CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_24]]#0 : (index) -> i64 ! CHECK: %[[VAL_28:.*]] = arith.subi %[[VAL_26]], %[[VAL_27]] : i64 ! CHECK: %[[VAL_29:.*]] = fir.coordinate_of %[[VAL_23]], %[[VAL_28]] : (!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>>>, i64) -> !fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>> -! CHECK: %[[VAL_30:.*]] = fir.coordinate_of %[[VAL_29]], %[[VAL_17]] : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_30:.*]] = fir.coordinate_of %[[VAL_29]], panels_l : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_30]] : !fir.ref>>> ! CHECK: %[[VAL_32:.*]]:3 = fir.box_dims %[[VAL_31]], %[[VAL_3]] : (!fir.box>>, index) -> (index, index, index) ! CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]]#1 : (index) -> i32 @@ -82,8 +81,7 @@ END MODULE DMUMPS_SOL_LR ! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_35]]#0 : (index) -> i64 ! CHECK: %[[VAL_39:.*]] = arith.subi %[[VAL_37]], %[[VAL_38]] : i64 ! CHECK: %[[VAL_40:.*]] = fir.coordinate_of %[[VAL_34]], %[[VAL_39]] : (!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>>>, i64) -> !fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>> -! CHECK: %[[VAL_41:.*]] = fir.field_index begs_blr_static, !fir.type<_QMdmumps_sol_lrTblr_struc_t{panels_l:!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}> -! CHECK: %[[VAL_42:.*]] = fir.coordinate_of %[[VAL_40]], %[[VAL_41]] : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_42:.*]] = fir.coordinate_of %[[VAL_40]], begs_blr_static : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref>>> ! CHECK: %[[VAL_44:.*]]:3 = fir.box_dims %[[VAL_43]], %[[VAL_3]] : (!fir.box>>, index) -> (index, index, index) ! CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_44]]#1 : (index) -> i32 @@ -98,8 +96,7 @@ END MODULE DMUMPS_SOL_LR ! CHECK: %[[VAL_51:.*]] = fir.convert %[[VAL_48]]#0 : (index) -> i64 ! CHECK: %[[VAL_52:.*]] = arith.subi %[[VAL_50]], %[[VAL_51]] : i64 ! CHECK: %[[VAL_53:.*]] = fir.coordinate_of %[[VAL_47]], %[[VAL_52]] : (!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>>>, i64) -> !fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>> -! CHECK: %[[VAL_54:.*]] = fir.field_index panels_u, !fir.type<_QMdmumps_sol_lrTblr_struc_t{panels_l:!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}> -! CHECK: %[[VAL_55:.*]] = fir.coordinate_of %[[VAL_53]], %[[VAL_54]] : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_55:.*]] = fir.coordinate_of %[[VAL_53]], panels_u : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_56:.*]] = fir.load %[[VAL_55]] : !fir.ref>>> ! CHECK: %[[VAL_57:.*]] = fir.box_addr %[[VAL_56]] : (!fir.box>>) -> !fir.ptr> ! CHECK: %[[VAL_58:.*]] = fir.convert %[[VAL_57]] : (!fir.ptr>) -> i64 @@ -113,7 +110,7 @@ END MODULE DMUMPS_SOL_LR ! CHECK: %[[VAL_64:.*]] = fir.convert %[[VAL_61]]#0 : (index) -> i64 ! CHECK: %[[VAL_65:.*]] = arith.subi %[[VAL_63]], %[[VAL_64]] : i64 ! CHECK: %[[VAL_66:.*]] = fir.coordinate_of %[[VAL_60]], %[[VAL_65]] : (!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>>>, i64) -> !fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>> -! CHECK: %[[VAL_67:.*]] = fir.coordinate_of %[[VAL_66]], %[[VAL_54]] : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_67:.*]] = fir.coordinate_of %[[VAL_66]], panels_u : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_68:.*]] = fir.load %[[VAL_67]] : !fir.ref>>> ! CHECK: %[[VAL_69:.*]]:3 = fir.box_dims %[[VAL_68]], %[[VAL_3]] : (!fir.box>>, index) -> (index, index, index) ! CHECK: %[[VAL_70:.*]] = fir.convert %[[VAL_69]]#1 : (index) -> i32 @@ -125,8 +122,7 @@ END MODULE DMUMPS_SOL_LR ! CHECK: %[[VAL_75:.*]] = fir.convert %[[VAL_72]]#0 : (index) -> i64 ! CHECK: %[[VAL_76:.*]] = arith.subi %[[VAL_74]], %[[VAL_75]] : i64 ! CHECK: %[[VAL_77:.*]] = fir.coordinate_of %[[VAL_71]], %[[VAL_76]] : (!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>>>, i64) -> !fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>> -! CHECK: %[[VAL_78:.*]] = fir.field_index begs_blr_static, !fir.type<_QMdmumps_sol_lrTblr_struc_t{panels_l:!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}> -! CHECK: %[[VAL_79:.*]] = fir.coordinate_of %[[VAL_77]], %[[VAL_78]] : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_79:.*]] = fir.coordinate_of %[[VAL_77]], begs_blr_static : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_80:.*]] = fir.load %[[VAL_79]] : !fir.ref>>> ! CHECK: %[[VAL_81:.*]]:3 = fir.box_dims %[[VAL_80]], %[[VAL_3]] : (!fir.box>>, index) -> (index, index, index) ! CHECK: %[[VAL_82:.*]] = fir.convert %[[VAL_81]]#1 : (index) -> i32 diff --git a/flang/test/Lower/io-derived-type.f90 b/flang/test/Lower/io-derived-type.f90 index 8ac995739afd7..ecbbc22d24b1e 100644 --- a/flang/test/Lower/io-derived-type.f90 +++ b/flang/test/Lower/io-derived-type.f90 @@ -1,4 +1,4 @@ -! RUN: bbc -emit-fir -hlfir=false -o - %s | FileCheck %s +! RUN: bbc -emit-hlfir -o - %s | FileCheck %s module m type t @@ -22,7 +22,7 @@ subroutine wft(dtv, unit, iotype, v_list, iostat, iomsg) ! CHECK-LABEL: @_QMmPwftd subroutine wftd(dtv, unit, iotype, v_list, iostat, iomsg) - type(t), intent(in) :: dtv + class(t), intent(in) :: dtv integer, intent(in) :: unit character(*), intent(in) :: iotype integer, intent(in) :: v_list(:) @@ -35,9 +35,6 @@ subroutine wftd(dtv, unit, iotype, v_list, iostat, iomsg) ! CHECK-LABEL: @_QMmPtest1 subroutine test1 import, all - ! CHECK: %[[V_14:[0-9]+]] = fir.field_index n, !fir.type<_QMmTt{n:i32}> - ! CHECK: %[[V_15:[0-9]+]] = fir.coordinate_of %{{.*}}, %[[V_14]] : (!fir.ref>, !fir.field) -> !fir.ref - ! CHECK: fir.store %c1{{.*}} to %[[V_15]] : !fir.ref ! CHECK: %[[V_16:[0-9]+]] = fir.embox %{{.*}} : (!fir.ref>) -> !fir.box> ! CHECK: %[[V_17:[0-9]+]] = fir.convert %[[V_16]] : (!fir.box>) -> !fir.box ! CHECK: %[[V_18:[0-9]+]] = fir.address_of(@_QQMmFtest1.nonTbpDefinedIoTable) : !fir.ref, !fir.ref, i32, i1>>>, i1>> @@ -46,9 +43,6 @@ subroutine test1 print *, 'test1 outer, should call wft: ', t(1) block import, only: t - ! CHECK: %[[V_35:[0-9]+]] = fir.field_index n, !fir.type<_QMmTt{n:i32}> - ! CHECK: %[[V_36:[0-9]+]] = fir.coordinate_of %{{.*}}, %[[V_35]] : (!fir.ref>, !fir.field) -> !fir.ref - ! CHECK: fir.store %c2{{.*}} to %[[V_36]] : !fir.ref ! CHECK: %[[V_37:[0-9]+]] = fir.embox %{{.*}} : (!fir.ref>) -> !fir.box> ! CHECK: %[[V_38:[0-9]+]] = fir.convert %[[V_37]] : (!fir.box>) -> !fir.box ! CHECK: %[[V_39:[0-9]+]] = fir.address_of(@_QQdefault.nonTbpDefinedIoTable) : !fir.ref, !fir.ref, i32, i1>>>, i1>> @@ -60,9 +54,6 @@ subroutine test1 ! CHECK-LABEL: @_QMmPtest2 subroutine test2 - ! CHECK: %[[V_13:[0-9]+]] = fir.field_index n, !fir.type<_QMmTt{n:i32}> - ! CHECK: %[[V_14:[0-9]+]] = fir.coordinate_of %{{.*}}, %[[V_13]] : (!fir.ref>, !fir.field) -> !fir.ref - ! CHECK: fir.store %c3{{.*}} to %[[V_14]] : !fir.ref ! CHECK: %[[V_15:[0-9]+]] = fir.embox %{{.*}} : (!fir.ref>) -> !fir.box> ! CHECK: %[[V_16:[0-9]+]] = fir.convert %[[V_15]] : (!fir.box>) -> !fir.box ! CHECK: %[[V_17:[0-9]+]] = fir.address_of(@_QQdefault.nonTbpDefinedIoTable) : !fir.ref, !fir.ref, i32, i1>>>, i1>> @@ -81,7 +72,7 @@ subroutine test3(p, x) procedure p end interface - ! CHECK: %[[V_3:[0-9]+]] = fir.embox %arg1 : (!fir.ref>) -> !fir.box> + ! CHECK: %[[V_3:[0-9]+]] = fir.embox %{{.*}} : (!fir.ref>) -> !fir.box> ! CHECK: %[[V_4:[0-9]+]] = fir.convert %[[V_3]] : (!fir.box>) -> !fir.box ! CHECK: %[[V_5:[0-9]+]] = fir.alloca !fir.array<1xtuple, !fir.ref, i32, i1>> ! CHECK: %[[V_6:[0-9]+]] = fir.undefined !fir.array<1xtuple, !fir.ref, i32, i1>> @@ -91,13 +82,13 @@ subroutine test3(p, x) ! CHECK: %[[V_10:[0-9]+]] = fir.box_addr %arg0 : (!fir.boxproc<() -> ()>) -> !fir.ref ! CHECK: %[[V_11:[0-9]+]] = fir.insert_value %[[V_9]], %[[V_10]], [0 : index, 1 : index] : (!fir.array<1xtuple, !fir.ref, i32, i1>>, !fir.ref) -> !fir.array<1xtuple, !fir.ref, i32, i1>> ! CHECK: %[[V_12:[0-9]+]] = fir.insert_value %[[V_11]], %c2{{.*}}, [0 : index, 2 : index] : (!fir.array<1xtuple, !fir.ref, i32, i1>>, i32) -> !fir.array<1xtuple, !fir.ref, i32, i1>> - ! CHECK: %[[V_13:[0-9]+]] = fir.insert_value %[[V_12]], %false, [0 : index, 3 : index] : (!fir.array<1xtuple, !fir.ref, i32, i1>>, i1) -> !fir.array<1xtuple, !fir.ref, i32, i1>> + ! CHECK: %[[V_13:[0-9]+]] = fir.insert_value %[[V_12]], %true, [0 : index, 3 : index] : (!fir.array<1xtuple, !fir.ref, i32, i1>>, i1) -> !fir.array<1xtuple, !fir.ref, i32, i1>> ! CHECK: fir.store %[[V_13]] to %[[V_5]] : !fir.ref, !fir.ref, i32, i1>>> ! CHECK: %[[V_14:[0-9]+]] = fir.alloca tuple, !fir.ref, i32, i1>>>, i1> ! CHECK: %[[V_15:[0-9]+]] = fir.undefined tuple, !fir.ref, i32, i1>>>, i1> ! CHECK: %[[V_16:[0-9]+]] = fir.insert_value %[[V_15]], %c1{{.*}}, [0 : index] : (tuple, !fir.ref, i32, i1>>>, i1>, i64) -> tuple, !fir.ref, i32, i1>>>, i1> ! CHECK: %[[V_17:[0-9]+]] = fir.insert_value %[[V_16]], %[[V_5]], [1 : index] : (tuple, !fir.ref, i32, i1>>>, i1>, !fir.ref, !fir.ref, i32, i1>>>) -> tuple, !fir.ref, i32, i1>>>, i1> - ! CHECK: %[[V_18:[0-9]+]] = fir.insert_value %[[V_17]], %true, [2 : index] : (tuple, !fir.ref, i32, i1>>>, i1>, i1) -> tuple, !fir.ref, i32, i1>>>, i1> + ! CHECK: %[[V_18:[0-9]+]] = fir.insert_value %[[V_17]], %true_0, [2 : index] : (tuple, !fir.ref, i32, i1>>>, i1>, i1) -> tuple, !fir.ref, i32, i1>>>, i1> ! CHECK: fir.store %[[V_18]] to %[[V_14]] : !fir.ref, !fir.ref, i32, i1>>>, i1>> ! CHECK: %[[V_19:[0-9]+]] = fir.convert %[[V_14]] : (!fir.ref, !fir.ref, i32, i1>>>, i1>>) -> !fir.ref ! CHECK: %[[V_20:[0-9]+]] = fir.call @_FortranAioOutputDerivedType(%{{.*}}, %[[V_4]], %[[V_19]]) fastmath : (!fir.ref, !fir.box, !fir.ref) -> i1 @@ -118,9 +109,6 @@ program p ! CHECK: fir.call @_QMmPtest3 call test3(wftd, t(17)) - ! CHECK: %[[V_95:[0-9]+]] = fir.field_index n, !fir.type<_QMmTt{n:i32}> - ! CHECK: %[[V_96:[0-9]+]] = fir.coordinate_of %{{.*}}, %[[V_95]] : (!fir.ref>, !fir.field) -> !fir.ref - ! CHECK: fir.store %c4{{.*}} to %[[V_96]] : !fir.ref ! CHECK: %[[V_97:[0-9]+]] = fir.embox %{{.*}} : (!fir.ref>) -> !fir.box> ! CHECK: %[[V_98:[0-9]+]] = fir.convert %[[V_97]] : (!fir.box>) -> !fir.box ! CHECK: %[[V_99:[0-9]+]] = fir.address_of(@_QQF.nonTbpDefinedIoTable) : !fir.ref, !fir.ref, i32, i1>>>, i1>> diff --git a/flang/test/Lower/parent-component.f90 b/flang/test/Lower/parent-component.f90 index 3cb23f277c9a3..7de20ea044905 100644 --- a/flang/test/Lower/parent-component.f90 +++ b/flang/test/Lower/parent-component.f90 @@ -1,7 +1,7 @@ ! Test different ways of passing the parent component of an extended ! derived-type to a subroutine or the runtime. -! RUN: bbc --use-desc-for-alloc=false -emit-fir -hlfir=false %s -o - | FileCheck %s +! RUN: bbc -emit-hlfir %s -o - | FileCheck %s program parent_comp type p @@ -43,35 +43,21 @@ subroutine init_with_slice() print*,y(:)%p end subroutine ! CHECK-LABEL: func.func private @_QFPinit_with_slice() - ! CHECK: %[[Y:.*]] = fir.address_of(@_QFFinit_with_sliceEy) : !fir.ref>> - ! CHECK: %[[C2:.*]] = arith.constant 2 : index - ! CHECK: %[[C1:.*]] = arith.constant 1 : index - ! CHECK: %[[C1_I64:.*]] = arith.constant 1 : i64 - ! CHECK: %[[STRIDE:.*]] = fir.convert %[[C1_I64]] : (i64) -> index - ! CHECK: %[[ADD:.*]] = arith.addi %[[C1]], %[[C2]] : index - ! CHECK: %[[UB:.*]] = arith.subi %[[ADD]], %[[C1]] : index - ! CHECK: %[[SHAPE:.*]] = fir.shape %[[C2]] : (index) -> !fir.shape<1> - ! CHECK: %[[SLICE:.*]] = fir.slice %[[C1]], %[[UB]], %[[STRIDE]] : (index, index, index) -> !fir.slice<1> - ! CHECK: %[[BOX:.*]] = fir.embox %[[Y]](%[[SHAPE]]) [%[[SLICE]]] : (!fir.ref>>, !fir.shape<1>, !fir.slice<1>) -> !fir.box>> - ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[BOX]] : (!fir.box>>) -> !fir.box - ! CHECK: %[[IS_CONTIGOUS:.*]] = fir.call @_FortranAIsContiguous(%[[BOX_NONE]]) {{.*}}: (!fir.box) -> i1 - ! CHECK: %[[TEMP:.*]] = fir.if %[[IS_CONTIGOUS]] -> (!fir.heap>>) { - ! CHECK: } else { - ! CHECK: fir.call @_FortranAAssign - ! CHECK: %[[TEMP_CAST:.*]] = fir.convert %[[TEMP]] : (!fir.heap>>) -> !fir.ref>> - ! CHECK: fir.call @_QFPprint_p(%[[TEMP_CAST]]) {{.*}}: (!fir.ref>>) -> () - - ! CHECK-LABEL: %{{.*}} = fir.call @_FortranAioBeginExternalListOutput(%{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (i32, !fir.ref, i32) -> !fir.ref - ! CHECK: %[[C1:.*]] = arith.constant 1 : index - ! CHECK: %[[C1_I64:.*]] = arith.constant 1 : i64 - ! CHECK: %[[STRIDE:.*]] = fir.convert %[[C1_I64]] : (i64) -> index - ! CHECK: %[[ADD:.*]] = arith.addi %[[C1]], %[[C2]] : index - ! CHECK: %[[UB:.*]] = arith.subi %[[ADD]], %[[C1]] : index - ! CHECK: %[[SHAPE:.*]] = fir.shape %[[C2]] : (index) -> !fir.shape<1> - ! CHECK: %[[SLICE:.*]] = fir.slice %{{.*}}, %{{.*}}, %{{.*}} : (index, index, index) -> !fir.slice<1> - ! CHECK: %[[BOX:.*]] = fir.embox %[[Y]](%[[SHAPE]]) [%[[SLICE]]] : (!fir.ref>>, !fir.shape<1>, !fir.slice<1>) -> !fir.box>> - ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[BOX]] : (!fir.box>>) -> !fir.box - ! CHECK: %{{.*}} = fir.call @_FortranAioOutputDescriptor(%{{.*}}, %[[BOX_NONE]]) {{.*}}: (!fir.ref, !fir.box) -> i1 + ! CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box>>> + ! CHECK: %[[VAL_1:.*]] = fir.address_of(@_QFFinit_with_sliceEy) : !fir.ref,b:i32}>>> + ! CHECK: %[[VAL_2:.*]] = arith.constant 2 : index + ! CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_2]] : (index) -> !fir.shape<1> + ! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_3]]) {uniq_name = "_QFFinit_with_sliceEy"} : (!fir.ref,b:i32}>>>, !fir.shape<1>) -> (!fir.ref,b:i32}>>>, !fir.ref,b:i32}>>>) + ! CHECK: %[[VAL_5:.*]] = arith.constant 1 : index + ! CHECK: %[[VAL_6:.*]] = arith.constant 1 : index + ! CHECK: %[[VAL_7:.*]] = arith.constant 2 : index + ! CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1> + ! CHECK: %[[VAL_9:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_5]]:%[[VAL_2]]:%[[VAL_6]]) shape %[[VAL_8]] : (!fir.ref,b:i32}>>>, index, index, index, !fir.shape<1>) -> !fir.ref,b:i32}>>> + ! CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_9]]{"p"} shape %[[VAL_8]] : (!fir.ref,b:i32}>>>, !fir.shape<1>) -> !fir.box>> + ! CHECK: %[[VAL_11:.*]]:2 = hlfir.copy_in %[[VAL_10]] to %[[VAL_0]] : (!fir.box>>, !fir.ref>>>>) -> (!fir.box>>, i1) + ! CHECK: %[[VAL_12:.*]] = fir.box_addr %[[VAL_11]]#0 : (!fir.box>>) -> !fir.ref>> + ! CHECK: fir.call @_QFPprint_p(%[[VAL_12]]) fastmath : (!fir.ref>>) -> () + ! CHECK: hlfir.copy_out %[[VAL_0]], %[[VAL_11]]#1 : (!fir.ref>>>>, i1) -> () subroutine init_no_slice() type(c) :: y(2) = [ c(11, 21), c(12, 22) ] @@ -79,23 +65,16 @@ subroutine init_no_slice() print*,y%p end subroutine ! CHECK-LABEL: func.func private @_QFPinit_no_slice() - ! CHECK: %[[Y:.*]] = fir.address_of(@_QFFinit_no_sliceEy) : !fir.ref>> - ! CHECK: %[[C2:.*]] = arith.constant 2 : index - ! CHECK: %[[SHAPE:.*]] = fir.shape %[[C2]] : (index) -> !fir.shape<1> - ! CHECK: %[[BOX:.*]] = fir.embox %[[Y]](%[[SHAPE]]) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> - ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[BOX]] : (!fir.box>>) -> !fir.box - ! CHECK: %[[IS_CONTIGOUS:.*]] = fir.call @_FortranAIsContiguous(%[[BOX_NONE]]) {{.*}}: (!fir.box) -> i1 - ! CHECK: %[[TEMP:.*]] = fir.if %[[IS_CONTIGOUS]] -> (!fir.heap>>) { - ! CHECK: } else { - ! CHECK: fir.call @_FortranAAssign - ! CHECK: %[[TEMP_CAST:.*]] = fir.convert %[[TEMP]] : (!fir.heap>>) -> !fir.ref>> - ! CHECK: fir.call @_QFPprint_p(%[[TEMP_CAST]]) {{.*}}: (!fir.ref>>) -> () - - ! CHECK-LABEL: %{{.*}} = fir.call @_FortranAioBeginExternalListOutput(%{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (i32, !fir.ref, i32) -> !fir.ref - ! CHECK: %[[SHAPE:.*]] = fir.shape %[[C2]] : (index) -> !fir.shape<1> - ! CHECK: %[[BOX:.*]] = fir.embox %[[Y]](%[[SHAPE]]) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> - ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[BOX]] : (!fir.box>>) -> !fir.box - ! CHECK: %{{.*}} = fir.call @_FortranAioOutputDescriptor(%{{.*}}, %[[BOX_NONE]]) {{.*}}: (!fir.ref, !fir.box) -> i1 + ! CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box>>> + ! CHECK: %[[VAL_1:.*]] = fir.address_of(@_QFFinit_no_sliceEy) : !fir.ref,b:i32}>>> + ! CHECK: %[[VAL_2:.*]] = arith.constant 2 : index + ! CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_2]] : (index) -> !fir.shape<1> + ! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_3]]) {uniq_name = "_QFFinit_no_sliceEy"} : (!fir.ref,b:i32}>>>, !fir.shape<1>) -> (!fir.ref,b:i32}>>>, !fir.ref,b:i32}>>>) + ! CHECK: %[[VAL_5:.*]] = hlfir.designate %[[VAL_4]]#0{"p"} shape %[[VAL_3]] : (!fir.ref,b:i32}>>>, !fir.shape<1>) -> !fir.box>> + ! CHECK: %[[VAL_6:.*]]:2 = hlfir.copy_in %[[VAL_5]] to %[[VAL_0]] : (!fir.box>>, !fir.ref>>>>) -> (!fir.box>>, i1) + ! CHECK: %[[VAL_7:.*]] = fir.box_addr %[[VAL_6]]#0 : (!fir.box>>) -> !fir.ref>> + ! CHECK: fir.call @_QFPprint_p(%[[VAL_7]]) fastmath : (!fir.ref>>) -> () + ! CHECK: hlfir.copy_out %[[VAL_0]], %[[VAL_6]]#1 : (!fir.ref>>>>, i1) -> () subroutine init_allocatable() type(c), allocatable :: y(:) @@ -107,31 +86,19 @@ subroutine init_allocatable() end subroutine ! CHECK-LABEL: func.func private @_QFPinit_allocatable() - ! CHECK: %[[ALLOC:.*]] = fir.alloca !fir.heap>> {uniq_name = "_QFFinit_allocatableEy.addr"} - ! CHECK: %[[LB0:.*]] = fir.alloca index {uniq_name = "_QFFinit_allocatableEy.lb0"} - ! CHECK: %[[EXT0:.*]] = fir.alloca index {uniq_name = "_QFFinit_allocatableEy.ext0"} - ! CHECK-COUNT-6: %{{.*}} = fir.field_index a, !fir.type<_QFTc{a:i32,b:i32}> - ! CHECK: %[[LOAD_LB0:.*]] = fir.load %[[LB0]] : !fir.ref - ! CHECK: %[[LOAD_EXT0:.*]] = fir.load %[[EXT0]] : !fir.ref - ! CHECK: %[[MEM:.*]] = fir.load %[[ALLOC]] : !fir.ref>>> - ! CHECK: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[LOAD_LB0]], %[[LOAD_EXT0]] : (index, index) -> !fir.shapeshift<1> - ! CHECK: %[[BOX:.*]] = fir.embox %[[MEM]](%[[SHAPE_SHIFT]]) : (!fir.heap>>, !fir.shapeshift<1>) -> !fir.box>> - ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[BOX]] : (!fir.box>>) -> !fir.box - ! CHECK: %[[IS_CONTIGOUS:.*]] = fir.call @_FortranAIsContiguous(%[[BOX_NONE]]) {{.*}}: (!fir.box) -> i1 - ! CHECK: %[[TEMP:.*]] = fir.if %[[IS_CONTIGOUS]] -> (!fir.heap>>) { - ! CHECK: } else { - ! CHECK: fir.call @_FortranAAssign - ! CHECK: %[[TEMP_CAST:.*]] = fir.convert %[[TEMP]] : (!fir.heap>>) -> !fir.ref>> - ! CHECK: fir.call @_QFPprint_p(%[[TEMP_CAST]]) {{.*}}: (!fir.ref>>) -> () - - ! CHECK-LABEL: %{{.*}} = fir.call @_FortranAioBeginExternalListOutput(%{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (i32, !fir.ref, i32) -> !fir.ref - ! CHECK: %[[LOAD_LB0:.*]] = fir.load %[[LB0]] : !fir.ref - ! CHECK: %[[LOAD_EXT0:.*]] = fir.load %[[EXT0]] : !fir.ref - ! CHECK: %[[LOAD_ALLOC:.*]] = fir.load %[[ALLOC]] : !fir.ref>>> - ! CHECK: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[LOAD_LB0]], %[[LOAD_EXT0]] : (index, index) -> !fir.shapeshift<1> - ! CHECK: %[[BOX:.*]] = fir.embox %[[LOAD_ALLOC]](%[[SHAPE_SHIFT]]) : (!fir.heap>>, !fir.shapeshift<1>) -> !fir.box>> - ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[BOX]] : (!fir.box>>) -> !fir.box - ! CHECK: %{{.*}} = fir.call @_FortranAioOutputDescriptor(%{{.*}}, %[[BOX_NONE]]) {{.*}}: (!fir.ref, !fir.box) -> i1 + ! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %{{.*}}_QFFinit_allocatableEy" + ! CHECK: hlfir.assign + ! CHECK: hlfir.assign + ! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref,b:i32}>>>>> + ! CHECK: %[[VAL_31:.*]] = arith.constant 0 : index + ! CHECK: %[[VAL_32:.*]]:3 = fir.box_dims %[[VAL_30]], %[[VAL_31]] : (!fir.box,b:i32}>>>>, index) -> (index, index, index) + ! CHECK: %[[VAL_33:.*]] = fir.shape %[[VAL_32]]#1 : (index) -> !fir.shape<1> + ! CHECK: %[[VAL_34:.*]] = hlfir.designate %[[VAL_30]]{"p"} shape %[[VAL_33]] : (!fir.box,b:i32}>>>>, !fir.shape<1>) -> !fir.box>> + ! CHECK: %[[VAL_35:.*]]:2 = hlfir.copy_in %[[VAL_34]] to %[[VAL_0:.*]] : (!fir.box>>, !fir.ref>>>>) -> (!fir.box>>, i1) + ! CHECK: %[[VAL_36:.*]] = fir.box_addr %[[VAL_35]]#0 : (!fir.box>>) -> !fir.ref>> + ! CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (!fir.ref>>) -> !fir.ref>> + ! CHECK: fir.call @_QFPprint_p(%[[VAL_37]]) fastmath : (!fir.ref>>) -> () + ! CHECK: hlfir.copy_out %[[VAL_0]], %[[VAL_35]]#1 : (!fir.ref>>>>, i1) -> () subroutine init_scalar() type(c) :: s = c(11, 21) @@ -140,13 +107,10 @@ subroutine init_scalar() end subroutine ! CHECK-LABEL: func.func private @_QFPinit_scalar() - ! CHECK: %[[S:.*]] = fir.address_of(@_QFFinit_scalarEs) : !fir.ref> - ! CHECK: %[[CAST:.*]] = fir.convert %[[S]] : (!fir.ref>) -> !fir.ref> - ! CHECK: fir.call @_QFPprint_scalar(%[[CAST]]) {{.*}}: (!fir.ref>) -> () - - ! CHECK: %[[BOX:.*]] = fir.embox %{{.*}} : (!fir.ref>) -> !fir.box> - ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[BOX]] : (!fir.box>) -> !fir.box - ! CHECK: %{{.*}} = fir.call @_FortranAioOutputDerivedType(%{{.*}}, %[[BOX_NONE]], %{{.*}}) {{.*}}: (!fir.ref, !fir.box, !fir.ref) -> i1 + ! CHECK: %[[VAL_0:.*]] = fir.address_of(@_QFFinit_scalarEs) : !fir.ref,b:i32}>> + ! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFFinit_scalarEs"} : (!fir.ref,b:i32}>>) -> (!fir.ref,b:i32}>>, !fir.ref,b:i32}>>) + ! CHECK: %[[VAL_2:.*]] = hlfir.designate %[[VAL_1]]#0{"p"} : (!fir.ref,b:i32}>>) -> !fir.ref> + ! CHECK: fir.call @_QFPprint_scalar(%[[VAL_2]]) fastmath : (!fir.ref>) -> () subroutine init_assumed(y) type(c) :: y(:) @@ -155,12 +119,11 @@ subroutine init_assumed(y) end subroutine ! CHECK-LABEL: func.func private @_QFPinit_assumed( - ! CHECK-SAME: %[[ARG0:.*]]: !fir.box> - ! CHECK: %[[BOX:.*]] = fir.rebox %[[ARG0]] : (!fir.box>>) -> !fir.box>> - - ! CHECK: %[[REBOX:.*]] = fir.rebox %[[ARG0]] : (!fir.box>>) -> !fir.box>> - ! CHECK: %[[REBOX_CAST:.*]] = fir.convert %[[REBOX]] : (!fir.box>>) -> !fir.box - ! CHECK: %{{.*}} = fir.call @_FortranAioOutputDescriptor(%{{.*}}, %[[REBOX_CAST]]) {{.*}}: (!fir.ref, !fir.box) -> i1 + ! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %{{.*}}"_QFFinit_assumedEy" + ! CHECK: %[[VAL_4:.*]] = arith.constant 0 : index + ! CHECK: %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_3]]#0, %[[VAL_4]] : (!fir.box,b:i32}>>>, index) -> (index, index, index) + ! CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]]#1 : (index) -> !fir.shape<1> + ! CHECK: %[[VAL_7:.*]] = hlfir.designate %[[VAL_3]]#0{"p"} shape %[[VAL_6]] : (!fir.box,b:i32}>>>, !fir.shape<1>) -> !fir.box>> subroutine init_existing_field() type(z) :: y(2) @@ -168,13 +131,9 @@ subroutine init_existing_field() end subroutine ! CHECK-LABEL: func.func private @_QFPinit_existing_field - ! CHECK: %[[C2:.*]] = arith.constant 2 : index - ! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<2x!fir.type<_QFTz{k:i32,c:!fir.type<_QFTc{a:i32,b:i32}>}>> {bindc_name = "y", uniq_name = "_QFFinit_existing_fieldEy"} - ! CHECK: %[[FIELD_C:.*]] = fir.field_index c, !fir.type<_QFTz{k:i32,c:!fir.type<_QFTc{a:i32,b:i32}>}> - ! CHECK: %[[SHAPE:.*]] = fir.shape %[[C2]] : (index) -> !fir.shape<1> - ! CHECK: %[[C1:.*]] = arith.constant 1 : index - ! CHECK: %[[SLICE:.*]] = fir.slice %[[C1]], %[[C2]], %[[C1]] path %[[FIELD_C]] : (index, index, index, !fir.field) -> !fir.slice<1> - ! CHECK: %{{.*}} = fir.embox %[[ALLOCA]](%[[SHAPE]]) [%[[SLICE]]] : (!fir.ref}>>>, !fir.shape<1>, !fir.slice<1>) -> !fir.box>> + ! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %{{.*}}"_QFFinit_existing_fieldEy" + ! CHECK: %[[VAL_5:.*]] = hlfir.designate %[[VAL_4]]#0{"c"} shape %[[VAL_3]] : (!fir.ref,b:i32}>}>>>, !fir.shape<1>) -> !fir.box,b:i32}>>> + ! CHECK: %[[VAL_6:.*]] = hlfir.designate %[[VAL_5]]{"p"} shape %[[VAL_3]] : (!fir.box,b:i32}>>>, !fir.shape<1>) -> !fir.box>> subroutine parent_comp_lhs() type(c) :: a @@ -183,15 +142,10 @@ subroutine parent_comp_lhs() a%p = B end subroutine -! CHECK-LABEL: func.func private @_QFPparent_comp_lhs() -! CHECK: %[[BOX:.*]] = fir.alloca !fir.box> -! CHECK: %[[A:.*]] = fir.alloca !fir.type<_QFTc{a:i32,b:i32}> {bindc_name = "a", uniq_name = "_QFFparent_comp_lhsEa"} -! CHECK: %[[B:.*]] = fir.alloca !fir.type<_QFTp{a:i32}> {bindc_name = "b", uniq_name = "_QFFparent_comp_lhsEb"} -! CHECK: %[[EMBOX_A:.*]] = fir.embox %[[A]] : (!fir.ref>) -> !fir.box> -! CHECK: %[[EMBOX_B:.*]] = fir.embox %[[B]] : (!fir.ref>) -> !fir.box> -! CHECK: fir.store %[[EMBOX_A]] to %[[BOX]] : !fir.ref>> -! CHECK: %[[A_NONE:.*]] = fir.convert %[[BOX]] : (!fir.ref>>) -> !fir.ref> -! CHECK: %[[B_NONE:.*]] = fir.convert %[[EMBOX_B]] : (!fir.box>) -> !fir.box -! CHECK: fir.call @_FortranAAssign(%[[A_NONE]], %[[B_NONE]], %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, !fir.box, !fir.ref, i32) -> () +! CHECK-LABEL: func.func private @_QFPparent_comp_lhs +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %{{.*}}"_QFFparent_comp_lhsEa" +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %{{.*}}"_QFFparent_comp_lhsEb" +! CHECK: %[[VAL_4:.*]] = hlfir.designate %[[VAL_1]]#0{"p"} : (!fir.ref,b:i32}>>) -> !fir.ref> +! CHECK: hlfir.assign %[[VAL_3]]#0 to %[[VAL_4]] : !fir.ref>, !fir.ref> end diff --git a/flang/test/Lower/pointer-assignments.f90 b/flang/test/Lower/pointer-assignments.f90 index cdf9eac70f450..8f83bf7c4946e 100644 --- a/flang/test/Lower/pointer-assignments.f90 +++ b/flang/test/Lower/pointer-assignments.f90 @@ -76,7 +76,7 @@ subroutine test_pointer_component(temp, temp_ptr) end type mytype type(mytype) :: temp real, pointer :: temp_ptr(:) - ! CHECK: %[[ptr_addr:.*]] = fir.coordinate_of %[[temp]], %{{.*}} : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> + ! CHECK: %[[ptr_addr:.*]] = fir.coordinate_of %[[temp]], ptr : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[ptr:.*]] = fir.load %[[ptr_addr]] : !fir.ref>>> ! CHECK: %[[dims:.*]]:3 = fir.box_dims %[[ptr]], %{{.*}} : (!fir.box>>, index) -> (index, index, index) ! CHECK: %[[shift:.*]] = fir.shift %[[dims]]#0 : (index) -> !fir.shift<1> diff --git a/flang/test/Lower/polymorphic-temp.f90 b/flang/test/Lower/polymorphic-temp.f90 index 5e2937e1f5f65..a9db9ba7b7902 100644 --- a/flang/test/Lower/polymorphic-temp.f90 +++ b/flang/test/Lower/polymorphic-temp.f90 @@ -197,11 +197,9 @@ subroutine test_merge_intrinsic(a, b) ! CHECK-LABEL: func.func @_QMpoly_tmpPtest_merge_intrinsic( ! CHECK-SAME: %[[ARG0:.*]]: !fir.class> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.class> {fir.bindc_name = "b"}) { -! CHECK: %[[FIELD_A:.*]] = fir.field_index a, !fir.type<_QMpoly_tmpTp1{a:i32}> -! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[ARG0]], %[[FIELD_A]] : (!fir.class>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[ARG0]], a : (!fir.class>) -> !fir.ref ! CHECK: %[[LOAD_A1:.*]] = fir.load %[[COORD_A]] : !fir.ref -! CHECK: %[[FIELD_A:.*]] = fir.field_index a, !fir.type<_QMpoly_tmpTp1{a:i32}> -! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[ARG1]], %[[FIELD_A]] : (!fir.class>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[ARG1]], a : (!fir.class>) -> !fir.ref ! CHECK: %[[LOAD_A2:.*]] = fir.load %[[COORD_A]] : !fir.ref ! CHECK: %[[CMPI:.*]] = arith.cmpi sgt, %[[LOAD_A1]], %[[LOAD_A2]] : i32 ! CHECK: %[[SELECT:.*]] = arith.select %[[CMPI]], %[[ARG0]], %[[ARG1]] : !fir.class> diff --git a/flang/test/Lower/polymorphic.f90 b/flang/test/Lower/polymorphic.f90 index a1872e225359f..10793d8a88f42 100644 --- a/flang/test/Lower/polymorphic.f90 +++ b/flang/test/Lower/polymorphic.f90 @@ -104,8 +104,7 @@ subroutine component_access(p) ! CHECK-LABEL: func.func @_QMpolymorphic_testPcomponent_access( ! CHECK-SAME: %[[P:.*]]: !fir.class> {fir.bindc_name = "p"}) { -! CHECK: %[[FIELD:.*]] = fir.field_index a, !fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}> -! CHECK: %[[COORD:.*]] = fir.coordinate_of %[[P]], %[[FIELD]] : (!fir.class>, !fir.field) -> !fir.ref +! CHECK: %[[COORD:.*]] = fir.coordinate_of %[[P]], a : (!fir.class>) -> !fir.ref ! CHECK: %[[LOAD:.*]] = fir.load %[[COORD]] : !fir.ref ! CHECK: %{{.*}} = fir.call @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD]]) {{.*}}: (!fir.ref, i32) -> i1 @@ -205,8 +204,7 @@ subroutine associate_up_pointer(r) ! CHECK-LABEL: func.func @_QMpolymorphic_testPassociate_up_pointer( ! CHECK-SAME: %[[ARG0:.*]]: !fir.class>>}>> {fir.bindc_name = "r"}) { ! CHECK: %[[P:.*]] = fir.alloca !fir.class>> {bindc_name = "p", uniq_name = "_QMpolymorphic_testFassociate_up_pointerEp"} -! CHECK: %[[FIELD_RP:.*]] = fir.field_index rp, !fir.type<_QMpolymorphic_testTr1{rp:!fir.box>>}> -! CHECK: %[[COORD_RP:.*]] = fir.coordinate_of %[[ARG0]], %[[FIELD_RP]] : (!fir.class>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[COORD_RP:.*]] = fir.coordinate_of %[[ARG0]], rp : (!fir.class>>}>>) -> !fir.ref>>> ! CHECK: %[[LOAD_RP:.*]] = fir.load %[[COORD_RP]] : !fir.ref>>> ! CHECK: %[[REBOX_RP:.*]] = fir.rebox %[[LOAD_RP]](%{{.*}}) : (!fir.box>>, !fir.shift<1>) -> !fir.box> ! CHECK: %[[CONV_P:.*]] = fir.convert %[[P]] : (!fir.ref>>>) -> !fir.ref> @@ -308,8 +306,7 @@ subroutine nullify_pointer_array(a) ! CHECK-LABEL: func.func @_QMpolymorphic_testPnullify_pointer_array( ! CHECK-SAME: %[[ARG0:.*]]: !fir.ref>>>}>> {fir.bindc_name = "a"}) { -! CHECK: %[[FIELD_P:.*]] = fir.field_index p, !fir.type<_QMpolymorphic_testTp3{p:!fir.class>>>}> -! CHECK: %[[COORD_P:.*]] = fir.coordinate_of %[[ARG0]], %[[FIELD_P]] : (!fir.ref>>>}>>, !fir.field) -> !fir.ref>>>}>>>>> +! CHECK: %[[COORD_P:.*]] = fir.coordinate_of %[[ARG0]], p : (!fir.ref>>>}>>) -> !fir.ref>>>}>>>>> ! CHECK: %[[TYPE_DESC:.*]] = fir.type_desc !fir.type<_QMpolymorphic_testTp3{p:!fir.class>>>}> ! CHECK: %[[CONV_P:.*]] = fir.convert %[[COORD_P]] : (!fir.ref>>>}>>>>>) -> !fir.ref> ! CHECK: %[[CONV_TDESC:.*]] = fir.convert %[[TYPE_DESC]] : (!fir.tdesc>>>}>>) -> !fir.ref @@ -524,12 +521,10 @@ subroutine internal ! CHECK: %[[POS_IN_TUPLE:.*]] = arith.constant 0 : i32 ! CHECK: %[[COORD_OF_CLASS:.*]] = fir.coordinate_of %[[TUPLE]], %[[POS_IN_TUPLE]] : (!fir.ref>>>, i32) -> !fir.ref>> ! CHECK: %[[CLASS:.*]] = fir.load %[[COORD_OF_CLASS]] : !fir.ref>> -! CHECK: %[[FIELD_A:.*]] = fir.field_index a, !fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}> -! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[CLASS]], %[[FIELD_A]] : (!fir.class>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[CLASS]], a : (!fir.class>) -> !fir.ref ! CHECK: %[[A:.*]] = fir.load %[[COORD_A]] : !fir.ref ! CHECK: %{{.*}} = fir.call @_FortranAioOutputInteger32(%{{.*}}, %[[A]]) {{.*}} : (!fir.ref, i32) -> i1 -! CHECK: %[[FIELD_B:.*]] = fir.field_index b, !fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}> -! CHECK: %[[COORD_B:.*]] = fir.coordinate_of %[[CLASS]], %[[FIELD_B]] : (!fir.class>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_B:.*]] = fir.coordinate_of %[[CLASS]], b : (!fir.class>) -> !fir.ref ! CHECK: %[[B:.*]] = fir.load %[[COORD_B]] : !fir.ref ! CHECK: %{{.*}} = fir.call @_FortranAioOutputInteger32(%{{.*}}, %[[B]]) {{.*}} : (!fir.ref, i32) -> i1 @@ -1156,8 +1151,7 @@ program test ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[ADDR_O]] : (!fir.ref}>>>>) -> !fir.ref> ! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 ! CHECK: %[[O:.*]] = fir.load %[[ADDR_O]] : !fir.ref}>>>> -! CHECK: %[[FIELD_INNER:.*]] = fir.field_index inner, !fir.type<_QMpolymorphic_testTouter{inner:!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>}> -! CHECK: %[[COORD_INNER:.*]] = fir.coordinate_of %[[O]], %[[FIELD_INNER]] : (!fir.box}>>>, !fir.field) -> !fir.ref> +! CHECK: %[[COORD_INNER:.*]] = fir.coordinate_of %[[O]], inner : (!fir.box}>>>) -> !fir.ref> ! CHECK: %{{.*}} = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%arg1 = %{{.*}}) -> (!fir.array<5x!fir.logical<4>>) { ! CHECK: %[[EMBOXED:.*]] = fir.embox %[[COORD_INNER]] : (!fir.ref>) -> !fir.class> ! CHECK: %{{.*}} = fir.call @_QMpolymorphic_testPlt(%{{.*}}, %[[EMBOXED]]) {{.*}} : (!fir.ref, !fir.class>) -> !fir.logical<4> diff --git a/flang/test/Lower/pre-fir-tree04.f90 b/flang/test/Lower/pre-fir-tree04.f90 index e5f8042458542..07077ff0473dd 100644 --- a/flang/test/Lower/pre-fir-tree04.f90 +++ b/flang/test/Lower/pre-fir-tree04.f90 @@ -5,6 +5,7 @@ ! CHECK: Subroutine test_coarray Subroutine test_coarray use iso_fortran_env, only: team_type, event_type, lock_type + save type(team_type) :: t type(event_type) :: done[*] type(lock_type) :: alock[*] diff --git a/flang/test/Lower/select-type.f90 b/flang/test/Lower/select-type.f90 index e4ff2fef0efd3..64dd639731ab1 100644 --- a/flang/test/Lower/select-type.f90 +++ b/flang/test/Lower/select-type.f90 @@ -63,8 +63,7 @@ subroutine select_type1(a) ! CHECK: ^[[CLASS_IS_P1_BLK]] ! CHECK: ^[[CLASS_IS_P2_BLK]] ! CHECK: %[[P2:.*]] = fir.convert %[[ARG0:.*]] : (!fir.class>) -> !fir.class> -! CHECK: %[[FIELD:.*]] = fir.field_index c, !fir.type<_QMselect_type_lower_testTp2{a:i32,b:i32,c:i32}> -! CHECK: %{{.*}} = fir.coordinate_of %[[P2]], %[[FIELD]] : (!fir.class>, !fir.field) -> !fir.ref +! CHECK: %{{.*}} = fir.coordinate_of %[[P2]], c : (!fir.class>) -> !fir.ref ! CHECK: ^[[DEFAULT_BLOCK]] ! CFG-LABEL: func.func @_QMselect_type_lower_testPselect_type1( @@ -663,21 +662,18 @@ subroutine select_type10(a) ! CHECK: ^bb{{.*}}: ! CHECK: %[[EXACT_BOX:.*]] = fir.convert %[[SELECTOR]] : (!fir.class>>) -> !fir.box>> ! CHECK: %[[C1:.*]] = arith.constant 1 : i32 -! CHECK: %[[FIELD_A:.*]] = fir.field_index a, !fir.type<_QMselect_type_lower_testTp1{a:i32,b:i32}> -! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[EXACT_BOX]], %[[FIELD_A]] : (!fir.box>>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[EXACT_BOX]], a : (!fir.box>>) -> !fir.ref ! CHECK: fir.store %[[C1]] to %[[COORD_A]] : !fir.ref ! CHECK: cf.br ^bb{{.*}} ! CHECK: ^bb{{.*}}: ! CHECK: %[[EXACT_BOX:.*]] = fir.convert %[[SELECTOR]] : (!fir.class>>) -> !fir.box>> ! CHECK: %[[C3:.*]] = arith.constant 3 : i32 -! CHECK: %[[FIELD_C:.*]] = fir.field_index c, !fir.type<_QMselect_type_lower_testTp2{a:i32,b:i32,c:i32}> -! CHECK: %[[COORD_C:.*]] = fir.coordinate_of %[[EXACT_BOX]], %[[FIELD_C]] : (!fir.box>>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_C:.*]] = fir.coordinate_of %[[EXACT_BOX]], c : (!fir.box>>) -> !fir.ref ! CHECK: fir.store %[[C3]] to %[[COORD_C]] : !fir.ref ! CHECK: cf.br ^bb{{.*}} ! CHECK: ^bb{{.*}} ! CHECK: %[[C5:.*]] = arith.constant 5 : i32 -! CHECK: %[[FIELD_A:.*]] = fir.field_index a, !fir.type<_QMselect_type_lower_testTp1{a:i32,b:i32}> -! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[SELECTOR]], %[[FIELD_A]] : (!fir.class>>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[SELECTOR]], a : (!fir.class>>) -> !fir.ref ! CHECK: fir.store %[[C5]] to %[[COORD_A]] : !fir.ref ! CHECK: cf.br ^bb{{.*}} @@ -699,15 +695,13 @@ subroutine select_type11(a) ! CHECK: ^bb{{.*}}: ! CHECK: %[[EXACT_BOX:.*]] = fir.convert %[[SELECTOR]] : (!fir.class>>) -> !fir.box>> ! CHECK: %[[C1:.*]] = arith.constant 1 : i32 -! CHECK: %[[FIELD_A:.*]] = fir.field_index a, !fir.type<_QMselect_type_lower_testTp1{a:i32,b:i32}> -! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[EXACT_BOX]], %[[FIELD_A]] : (!fir.box>>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[EXACT_BOX]], a : (!fir.box>>) -> !fir.ref ! CHECK: fir.store %[[C1]] to %[[COORD_A]] : !fir.ref ! CHECK: cf.br ^bb{{.*}} ! CHECK: ^bb{{.*}}: ! CHECK: %[[EXACT_BOX:.*]] = fir.convert %[[SELECTOR]] : (!fir.class>>) -> !fir.box>> ! CHECK: %[[C3:.*]] = arith.constant 3 : i32 -! CHECK: %[[FIELD_C:.*]] = fir.field_index c, !fir.type<_QMselect_type_lower_testTp2{a:i32,b:i32,c:i32}> -! CHECK: %[[COORD_C:.*]] = fir.coordinate_of %[[EXACT_BOX]], %[[FIELD_C]] : (!fir.box>>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_C:.*]] = fir.coordinate_of %[[EXACT_BOX]], c : (!fir.box>>) -> !fir.ref ! CHECK: fir.store %[[C3]] to %[[COORD_C]] : !fir.ref ! CHECK: cf.br ^bb{{.*}} diff --git a/flang/test/Lower/structure-constructors.f90 b/flang/test/Lower/structure-constructors.f90 index 86581ce51bf45..171c8eb631f6e 100644 --- a/flang/test/Lower/structure-constructors.f90 +++ b/flang/test/Lower/structure-constructors.f90 @@ -32,8 +32,7 @@ module m_struct_ctor subroutine test_simple(x) real :: x ! CHECK: %[[tmp:.*]] = fir.alloca !fir.type<_QMm_struct_ctorTt_simple{x:f32}> - ! CHECK: %[[field:.*]] = fir.field_index x, !fir.type<_QMm_struct_ctorTt_simple{x:f32}> - ! CHECK: %[[xcoor:.*]] = fir.coordinate_of %[[tmp]], %[[field]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[xcoor:.*]] = fir.coordinate_of %[[tmp]], x : (!fir.ref>) -> !fir.ref ! CHECK: %[[val:.*]] = fir.load %[[x]] : !fir.ref ! CHECK: fir.store %[[val]] to %[[xcoor]] : !fir.ref call print_simple(t_simple(x=x)) @@ -43,13 +42,11 @@ subroutine test_simple(x) ! CHECK-SAME: %[[x:.*]]: !fir.ref{{.*}}) subroutine test_char_scalar(x) ! CHECK: %[[tmp:.*]] = fir.alloca !fir.type<_QMm_struct_ctorTt_char_scalar{x:f32,c:!fir.char<1,3>}> - ! CHECK: %[[xfield:.*]] = fir.field_index x, !fir.type<_QMm_struct_ctorTt_char_scalar{x:f32,c:!fir.char<1,3>}> - ! CHECK: %[[xcoor:.*]] = fir.coordinate_of %[[tmp]], %[[xfield]] : (!fir.ref}>>, !fir.field) -> !fir.ref + ! CHECK: %[[xcoor:.*]] = fir.coordinate_of %[[tmp]], x : (!fir.ref}>>) -> !fir.ref ! CHECK: %[[val:.*]] = fir.load %[[x]] : !fir.ref ! CHECK: fir.store %[[val]] to %[[xcoor]] : !fir.ref - ! CHECK: %[[cfield:.*]] = fir.field_index c, !fir.type<_QMm_struct_ctorTt_char_scalar{x:f32,c:!fir.char<1,3>}> - ! CHECK: %[[ccoor:.*]] = fir.coordinate_of %[[tmp]], %[[cfield]] : (!fir.ref}>>, !fir.field) -> !fir.ref> + ! CHECK: %[[ccoor:.*]] = fir.coordinate_of %[[tmp]], c : (!fir.ref}>>) -> !fir.ref> ! CHECK: %[[cst:.*]] = fir.address_of(@_QQ{{.*}}) : !fir.ref> ! CHECK-DAG: %[[ccast:.*]] = fir.convert %[[ccoor]] : (!fir.ref>) -> !fir.ref ! CHECK-DAG: %[[cstcast:.*]] = fir.convert %[[cst]] : (!fir.ref>) -> !fir.ref @@ -65,13 +62,11 @@ subroutine test_simple_array(x, j) integer :: j(5) call print_simple_array(t_array(x=x, i=2*j)) ! CHECK: %[[tmp:.*]] = fir.alloca !fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}> - ! CHECK: %[[xfield:.*]] = fir.field_index x, !fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}> - ! CHECK: %[[xcoor:.*]] = fir.coordinate_of %[[tmp]], %[[xfield]] : (!fir.ref}>>, !fir.field) -> !fir.ref + ! CHECK: %[[xcoor:.*]] = fir.coordinate_of %[[tmp]], x : (!fir.ref}>>) -> !fir.ref ! CHECK: %[[val:.*]] = fir.load %[[x]] : !fir.ref ! CHECK: fir.store %[[val]] to %[[xcoor]] : !fir.ref - ! CHECK: %[[ifield:.*]] = fir.field_index i, !fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}> - ! CHECK: %[[icoor:.*]] = fir.coordinate_of %[[tmp]], %[[ifield]] : (!fir.ref}>>, !fir.field) -> !fir.ref> + ! CHECK: %[[icoor:.*]] = fir.coordinate_of %[[tmp]], i : (!fir.ref}>>) -> !fir.ref> ! CHECK: %[[iload:.*]] = fir.array_load %[[icoor]](%{{.*}}) : (!fir.ref>, !fir.shape<1>) -> !fir.array<5xi32> ! CHECK: %[[jload:.*]] = fir.array_load %[[j]](%{{.*}}) : (!fir.ref>, !fir.shape<1>) -> !fir.array<5xi32> ! CHECK: %[[loop:.*]] = fir.do_loop %[[idx:.*]] = %c0{{.*}} to %{{.*}} step %c1{{.*}} iter_args(%[[res:.*]] = %[[iload]]) -> (!fir.array<5xi32>) { @@ -90,12 +85,10 @@ subroutine test_char_array(x, c1) ! CHECK: %[[VAL_4:.*]]:2 = fir.unboxchar %[[VAL_1]] : (!fir.boxchar<1>) -> (!fir.ref>, index) ! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]]#0 : (!fir.ref>) -> !fir.ref>> ! CHECK: %[[VAL_6:.*]] = arith.constant 5 : index - ! CHECK: %[[VAL_7:.*]] = fir.field_index x, !fir.type<_QMm_struct_ctorTt_char_array{x:f32,c:!fir.array<5x!fir.char<1,3>>}> - ! CHECK: %[[VAL_8:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_7]] : (!fir.ref>}>>, !fir.field) -> !fir.ref + ! CHECK: %[[VAL_8:.*]] = fir.coordinate_of %[[VAL_3]], x : (!fir.ref>}>>) -> !fir.ref ! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_0]] : !fir.ref ! CHECK: fir.store %[[VAL_9]] to %[[VAL_8]] : !fir.ref - ! CHECK: %[[VAL_10:.*]] = fir.field_index c, !fir.type<_QMm_struct_ctorTt_char_array{x:f32,c:!fir.array<5x!fir.char<1,3>>}> - ! CHECK: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_10]] : (!fir.ref>}>>, !fir.field) -> !fir.ref>> + ! CHECK: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_3]], c : (!fir.ref>}>>) -> !fir.ref>> ! CHECK: %[[VAL_12:.*]] = arith.constant 5 : index ! CHECK: %[[VAL_13:.*]] = fir.shape %[[VAL_12]] : (index) -> !fir.shape<1> ! CHECK: %[[VAL_14:.*]] = fir.array_load %[[VAL_11]](%[[VAL_13]]) : (!fir.ref>>, !fir.shape<1>) -> !fir.array<5x!fir.char<1,3>> @@ -132,12 +125,10 @@ subroutine test_char_array(x, c1) ! CHECK-LABEL: func @_QMm_struct_ctorPtest_ptr( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref{{.*}}, %[[VAL_1:.*]]: !fir.box> {{{.*}}, fir.target}) { ! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.type<_QMm_struct_ctorTt_ptr{x:f32,p:!fir.box>>}> - ! CHECK: %[[VAL_4:.*]] = fir.field_index x, !fir.type<_QMm_struct_ctorTt_ptr{x:f32,p:!fir.box>>}> - ! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_4]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref + ! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_3]], x : (!fir.ref>>}>>) -> !fir.ref ! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_0]] : !fir.ref ! CHECK: fir.store %[[VAL_6]] to %[[VAL_5]] : !fir.ref - ! CHECK: %[[VAL_7:.*]] = fir.field_index p, !fir.type<_QMm_struct_ctorTt_ptr{x:f32,p:!fir.box>>}> - ! CHECK: %[[VAL_8:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_7]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> + ! CHECK: %[[VAL_8:.*]] = fir.coordinate_of %[[VAL_3]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i64 ! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i64) -> index ! CHECK: %[[VAL_11:.*]] = arith.constant 2 : i64 @@ -170,22 +161,16 @@ subroutine test_nested(x, d) real :: x type(t_array) :: d ! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.type<_QMm_struct_ctorTt_nested{x:f32,dt:!fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}>}> - ! CHECK: %[[VAL_3:.*]] = fir.field_index x, !fir.type<_QMm_struct_ctorTt_nested{x:f32,dt:!fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}>}> - ! CHECK: %[[VAL_4:.*]] = fir.coordinate_of %[[VAL_2]], %[[VAL_3]] : (!fir.ref}>}>>, !fir.field) -> !fir.ref + ! CHECK: %[[VAL_4:.*]] = fir.coordinate_of %[[VAL_2]], x : (!fir.ref}>}>>) -> !fir.ref ! CHECK: %[[VAL_5:.*]] = fir.load %[[VAL_0]] : !fir.ref ! CHECK: fir.store %[[VAL_5]] to %[[VAL_4]] : !fir.ref - ! CHECK: %[[VAL_6:.*]] = fir.field_index dt, !fir.type<_QMm_struct_ctorTt_nested{x:f32,dt:!fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}>}> - ! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_2]], %[[VAL_6]] : (!fir.ref}>}>>, !fir.field) -> !fir.ref}>> - ! CHECK: %[[VAL_8:.*]] = fir.field_index x, !fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}> - ! CHECK: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_8]] : (!fir.ref}>>, !fir.field) -> !fir.ref - ! CHECK: %[[VAL_8b:.*]] = fir.field_index x, !fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}> - ! CHECK: %[[VAL_10:.*]] = fir.coordinate_of %[[VAL_7]], %[[VAL_8b]] : (!fir.ref}>>, !fir.field) -> !fir.ref + ! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_2]], dt : (!fir.ref}>}>>) -> !fir.ref}>> + ! CHECK: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_1]], x : (!fir.ref}>>) -> !fir.ref + ! CHECK: %[[VAL_10:.*]] = fir.coordinate_of %[[VAL_7]], x : (!fir.ref}>>) -> !fir.ref ! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref ! CHECK: fir.store %[[VAL_11]] to %[[VAL_10]] : !fir.ref - ! CHECK: %[[VAL_12:.*]] = fir.field_index i, !fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}> - ! CHECK: %[[VAL_13:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_12]] : (!fir.ref}>>, !fir.field) -> !fir.ref> - ! CHECK: %[[VAL_12b:.*]] = fir.field_index i, !fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}> - ! CHECK: %[[VAL_14:.*]] = fir.coordinate_of %[[VAL_7]], %[[VAL_12b]] : (!fir.ref}>>, !fir.field) -> !fir.ref> + ! CHECK: %[[VAL_13:.*]] = fir.coordinate_of %[[VAL_1]], i : (!fir.ref}>>) -> !fir.ref> + ! CHECK: %[[VAL_14:.*]] = fir.coordinate_of %[[VAL_7]], i : (!fir.ref}>>) -> !fir.ref> ! CHECK: %[[VAL_15:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_16:.*]] = arith.constant 1 : index ! CHECK: %[[VAL_17:.*]] = arith.constant 4 : index @@ -238,12 +223,10 @@ subroutine print_nested(t) ! CHECK-LABEL: func.func @_QPtest_parent_component1() { ! CHECK: %[[VAL_0:.*]] = fir.alloca !fir.type<_QFtest_parent_component1Tbase{x:i32,y:!fir.array<2xi32>}> ! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.type<_QFtest_parent_component1Tmid{x:i32,y:!fir.array<2xi32>,mask:!fir.logical<4>}> -! CHECK: %[[VAL_14:.*]] = fir.field_index x, !fir.type<_QFtest_parent_component1Tbase{x:i32,y:!fir.array<2xi32>}> -! CHECK: %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_14]] : (!fir.ref}>>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], x : (!fir.ref}>>) -> !fir.ref ! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 ! CHECK: fir.store %[[VAL_16]] to %[[VAL_15]] : !fir.ref -! CHECK: %[[VAL_17:.*]] = fir.field_index y, !fir.type<_QFtest_parent_component1Tbase{x:i32,y:!fir.array<2xi32>}> -! CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_17]] : (!fir.ref}>>, !fir.field) -> !fir.ref> +! CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_0]], y : (!fir.ref}>>) -> !fir.ref> ! CHECK: %[[VAL_19:.*]] = arith.constant 2 : index ! CHECK: %[[VAL_20:.*]] = fir.shape %[[VAL_19]] : (index) -> !fir.shape<1> ! CHECK: %[[VAL_21:.*]] = fir.array_load %[[VAL_18]](%[[VAL_20]]) : (!fir.ref>, !fir.shape<1>) -> !fir.array<2xi32> @@ -260,16 +243,12 @@ subroutine print_nested(t) ! CHECK: } ! CHECK: fir.array_merge_store %[[VAL_21]], %[[VAL_34:.*]] to %[[VAL_18]] : !fir.array<2xi32>, !fir.array<2xi32>, !fir.ref> ! CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_1]] : (!fir.ref,mask:!fir.logical<4>}>>) -> !fir.ref}>> -! CHECK: %[[VAL_36:.*]] = fir.field_index x, !fir.type<_QFtest_parent_component1Tbase{x:i32,y:!fir.array<2xi32>}> -! CHECK: %[[VAL_37:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_36]] : (!fir.ref}>>, !fir.field) -> !fir.ref -! CHECK: %[[VAL_38:.*]] = fir.field_index x, !fir.type<_QFtest_parent_component1Tbase{x:i32,y:!fir.array<2xi32>}> -! CHECK: %[[VAL_39:.*]] = fir.coordinate_of %[[VAL_35]], %[[VAL_38]] : (!fir.ref}>>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_37:.*]] = fir.coordinate_of %[[VAL_0]], x : (!fir.ref}>>) -> !fir.ref +! CHECK: %[[VAL_39:.*]] = fir.coordinate_of %[[VAL_35]], x : (!fir.ref}>>) -> !fir.ref ! CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_37]] : !fir.ref ! CHECK: fir.store %[[VAL_40]] to %[[VAL_39]] : !fir.ref -! CHECK: %[[VAL_41:.*]] = fir.field_index y, !fir.type<_QFtest_parent_component1Tbase{x:i32,y:!fir.array<2xi32>}> -! CHECK: %[[VAL_42:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_41]] : (!fir.ref}>>, !fir.field) -> !fir.ref> -! CHECK: %[[VAL_43:.*]] = fir.field_index y, !fir.type<_QFtest_parent_component1Tbase{x:i32,y:!fir.array<2xi32>}> -! CHECK: %[[VAL_44:.*]] = fir.coordinate_of %[[VAL_35]], %[[VAL_43]] : (!fir.ref}>>, !fir.field) -> !fir.ref> +! CHECK: %[[VAL_42:.*]] = fir.coordinate_of %[[VAL_0]], y : (!fir.ref}>>) -> !fir.ref> +! CHECK: %[[VAL_44:.*]] = fir.coordinate_of %[[VAL_35]], y : (!fir.ref}>>) -> !fir.ref> ! CHECK: %[[VAL_45:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_46:.*]] = arith.constant 1 : index ! CHECK: %[[VAL_47:.*]] = arith.constant 1 : index @@ -279,8 +258,7 @@ subroutine print_nested(t) ! CHECK: %[[VAL_51:.*]] = fir.load %[[VAL_50]] : !fir.ref ! CHECK: fir.store %[[VAL_51]] to %[[VAL_49]] : !fir.ref ! CHECK: } -! CHECK: %[[VAL_52:.*]] = fir.field_index mask, !fir.type<_QFtest_parent_component1Tmid{x:i32,y:!fir.array<2xi32>,mask:!fir.logical<4>}> -! CHECK: %[[VAL_53:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_52]] : (!fir.ref,mask:!fir.logical<4>}>>, !fir.field) -> !fir.ref> +! CHECK: %[[VAL_53:.*]] = fir.coordinate_of %[[VAL_1]], mask : (!fir.ref,mask:!fir.logical<4>}>>) -> !fir.ref> ! CHECK: %[[VAL_54:.*]] = arith.constant true ! CHECK: %[[VAL_55:.*]] = fir.convert %[[VAL_54]] : (i1) -> !fir.logical<4> ! CHECK: fir.store %[[VAL_55]] to %[[VAL_53]] : !fir.ref> @@ -303,10 +281,8 @@ subroutine test_parent_component1() ! CHECK: %[[VAL_0:.*]] = fir.alloca !fir.type<_QFtest_parent_component2Tmid{z:!fir.char<1,5>,mask:!fir.logical<4>}> ! CHECK: %[[VAL_1:.*]] = fir.address_of(@_QFtest_parent_component2Epv) : !fir.ref}>> ! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_0]] : (!fir.ref,mask:!fir.logical<4>}>>) -> !fir.ref}>> -! CHECK: %[[VAL_9:.*]] = fir.field_index z, !fir.type<_QFtest_parent_component2Tbase{z:!fir.char<1,5>}> -! CHECK: %[[VAL_10:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_9]] : (!fir.ref}>>, !fir.field) -> !fir.ref> -! CHECK: %[[VAL_11:.*]] = fir.field_index z, !fir.type<_QFtest_parent_component2Tbase{z:!fir.char<1,5>}> -! CHECK: %[[VAL_12:.*]] = fir.coordinate_of %[[VAL_8]], %[[VAL_11]] : (!fir.ref}>>, !fir.field) -> !fir.ref> +! CHECK: %[[VAL_10:.*]] = fir.coordinate_of %[[VAL_1]], z : (!fir.ref}>>) -> !fir.ref> +! CHECK: %[[VAL_12:.*]] = fir.coordinate_of %[[VAL_8]], z : (!fir.ref}>>) -> !fir.ref> ! CHECK: %[[VAL_13:.*]] = arith.constant 5 : index ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i64 ! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_13]] : (index) -> i64 @@ -315,8 +291,7 @@ subroutine test_parent_component1() ! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_12]] : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_10]] : (!fir.ref>) -> !fir.ref ! CHECK: fir.call @llvm.memmove.p0.p0.i64(%[[VAL_18]], %[[VAL_19]], %[[VAL_16]], %[[VAL_17]]) {{.*}}: (!fir.ref, !fir.ref, i64, i1) -> () -! CHECK: %[[VAL_20:.*]] = fir.field_index mask, !fir.type<_QFtest_parent_component2Tmid{z:!fir.char<1,5>,mask:!fir.logical<4>}> -! CHECK: %[[VAL_21:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_20]] : (!fir.ref,mask:!fir.logical<4>}>>, !fir.field) -> !fir.ref> +! CHECK: %[[VAL_21:.*]] = fir.coordinate_of %[[VAL_0]], mask : (!fir.ref,mask:!fir.logical<4>}>>) -> !fir.ref> ! CHECK: %[[VAL_22:.*]] = arith.constant true ! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i1) -> !fir.logical<4> ! CHECK: fir.store %[[VAL_23]] to %[[VAL_21]] : !fir.ref> @@ -347,8 +322,7 @@ subroutine test_parent_component2() ! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_1]] : (!fir.ref>}>>>) -> !fir.ref> ! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_9]] : (!fir.box>}>>>) -> !fir.box ! CHECK: fir.call @_FortranAAssign(%[[VAL_14]], %[[VAL_15]], %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, !fir.box, !fir.ref, i32) -> () -! CHECK: %[[VAL_18:.*]] = fir.field_index mask, !fir.type<_QFtest_parent_component3Tmid{m:!fir.array<2x!fir.char<1,5>>,mask:!fir.logical<4>}> -! CHECK: %[[VAL_19:.*]] = fir.coordinate_of %[[VAL_2]], %[[VAL_18]] : (!fir.ref>,mask:!fir.logical<4>}>>, !fir.field) -> !fir.ref> +! CHECK: %[[VAL_19:.*]] = fir.coordinate_of %[[VAL_2]], mask : (!fir.ref>,mask:!fir.logical<4>}>>) -> !fir.ref> ! CHECK: %[[VAL_20:.*]] = arith.constant true ! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4> ! CHECK: fir.store %[[VAL_21]] to %[[VAL_19]] : !fir.ref> diff --git a/flang/test/Semantics/abstract02.f90 b/flang/test/Semantics/abstract02.f90 index 29aad7b03e537..22183e445d5c6 100644 --- a/flang/test/Semantics/abstract02.f90 +++ b/flang/test/Semantics/abstract02.f90 @@ -4,6 +4,12 @@ program test abstract interface subroutine abstract end subroutine + !ERROR: An ABSTRACT interface may not have the same name as an intrinsic type + function integer() + end + !ERROR: An ABSTRACT interface may not have the same name as an intrinsic type + subroutine logical + end end interface procedure(abstract), pointer :: p !ERROR: Abstract procedure interface 'abstract' may not be referenced diff --git a/flang/test/Semantics/allocate11.f90 b/flang/test/Semantics/allocate11.f90 index 6440248b6f4a9..1b7495e9fc07d 100644 --- a/flang/test/Semantics/allocate11.f90 +++ b/flang/test/Semantics/allocate11.f90 @@ -38,7 +38,14 @@ subroutine C937(var) type B type(A) y - !ERROR: A component with a POINTER or ALLOCATABLE attribute may not be of a type with a coarray ultimate component (named 'y%x') + !ERROR: Allocatable or array component 'forward' may not have a coarray ultimate component '%y%x' + type(B), allocatable :: forward + real :: u + end type + + type B2 + type(A) y + !ERROR: Pointer 'forward' may not have a coarray potential component '%y%x' type(B), pointer :: forward real :: u end type @@ -48,11 +55,14 @@ subroutine C937(var) end type type D - !ERROR: A component with a POINTER or ALLOCATABLE attribute may not be of a type with a coarray ultimate component (named 'x') - type(A), pointer :: potential + !ERROR: Allocatable or array component 'potential' may not have a coarray ultimate component '%x' + type(A), allocatable :: potential end type - + type D2 + !ERROR: Pointer 'potential' may not have a coarray potential component '%x' + type(A), pointer :: potential + end type class(*), allocatable :: var ! unlimited polymorphic is the ONLY way to get an allocatable/pointer 'var' that can be diff --git a/flang/test/Semantics/array-constr-len.f90 b/flang/test/Semantics/array-constr-len.f90 index 4de9c76c7041c..9b23026a16012 100644 --- a/flang/test/Semantics/array-constr-len.f90 +++ b/flang/test/Semantics/array-constr-len.f90 @@ -11,4 +11,8 @@ subroutine subr(s,n) print *, [(s(1:1),j=1,0)] ! ok print *, [character(2)::(s(1:n),j=1,0)] ! ok print *, [character(n)::(s(1:n),j=1,0)] + !ERROR: A length specifier of '*' or ':' may not appear in the type of an array constructor + print *, [ character(:) :: ] + !ERROR: A length specifier of '*' or ':' may not appear in the type of an array constructor + print *, [ character(*) :: ] end diff --git a/flang/test/Semantics/assign02.f90 b/flang/test/Semantics/assign02.f90 index 707d5ed3cfaa5..a40d204982b2f 100644 --- a/flang/test/Semantics/assign02.f90 +++ b/flang/test/Semantics/assign02.f90 @@ -74,8 +74,8 @@ subroutine s4(x) ! C1020 subroutine s5 - real, target :: x[*] - real, target, volatile :: y[*] + real, target, save :: x[*] + real, target, volatile, save :: y[*] real, pointer :: p real, pointer, volatile :: q p => x @@ -148,7 +148,7 @@ function f2() ! C1026 (R1037) A data-target shall not be a coindexed object. subroutine s10 - real, target :: a[*] + real, target, save :: a[*] real, pointer :: b !ERROR: A coindexed object may not be a pointer target b => a[1] diff --git a/flang/test/Semantics/associated.f90 b/flang/test/Semantics/associated.f90 index 1432744806599..c814980377b9f 100644 --- a/flang/test/Semantics/associated.f90 +++ b/flang/test/Semantics/associated.f90 @@ -90,7 +90,7 @@ subroutine test(assumedRank) type(t2) :: t2x type(t2), target :: t2xtarget integer, target :: targetIntArr(2) - integer, target :: targetIntCoarray[*] + integer, target, save :: targetIntCoarray[*] integer, pointer :: intPointerArr(:) procedure(objPtrFunc), pointer :: objPtrFuncPointer diff --git a/flang/test/Semantics/bind-c09.f90 b/flang/test/Semantics/bind-c09.f90 index 953f2d751234f..e08e4f001c696 100644 --- a/flang/test/Semantics/bind-c09.f90 +++ b/flang/test/Semantics/bind-c09.f90 @@ -44,6 +44,6 @@ function func8() result(res) bind(c) end function func9() result(res) bind(c) - ! ERROR: Interoperable function result may not be a coarray + ! ERROR: Function result may not be a coarray integer :: res[10, *] end diff --git a/flang/test/Semantics/bug125774.f90 b/flang/test/Semantics/bug125774.f90 new file mode 100644 index 0000000000000..9844f1ec5eb1e --- /dev/null +++ b/flang/test/Semantics/bug125774.f90 @@ -0,0 +1,15 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +type t +end type +real, pointer :: rptr +type(t), pointer :: tptr +class(*), pointer :: ulpp +print *, associated(rptr, ulpp) +print *, associated(ulpp, rptr) +print *, associated(tptr, ulpp) +print *, associated(ulpp, tptr) +!ERROR: Arguments of ASSOCIATED() must be a pointer and an optional valid target +print *, associated(rptr, tptr) +!ERROR: Arguments of ASSOCIATED() must be a pointer and an optional valid target +print *, associated(tptr, rptr) +end diff --git a/flang/test/Semantics/call09.f90 b/flang/test/Semantics/call09.f90 index b8583ba4a4907..58b2382f600ef 100644 --- a/flang/test/Semantics/call09.f90 +++ b/flang/test/Semantics/call09.f90 @@ -82,27 +82,26 @@ subroutine test1 ! 15.5.2.9(5) call s01(null(intPtr)) !ERROR: Actual argument associated with procedure dummy argument 'p=' is typeless call s01(B"0101") - !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' must be a pointer unless INTENT(IN) + !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' is not a procedure pointer call s02(realfunc) call s02(p) ! ok !ERROR: Actual procedure argument has interface incompatible with dummy argument 'p=': function results have distinct types: REAL(4) vs INTEGER(4) call s02(ip) - !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' must be a pointer unless INTENT(IN) - call s02(procptr()) + call s02(procptr()) ! believed to be ok call s02(null()) ! ok - !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' must be a pointer unless INTENT(IN) + !ERROR: Actual argument associated with INTENT(IN OUT) procedure pointer dummy argument 'p=' is not definable + !BECAUSE: 'NULL()' is a null pointer call s05(null()) - !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' must be a pointer unless INTENT(IN) + !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' is not a procedure pointer call s02(sin) - !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' must be a pointer unless INTENT(IN) + !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' is not a procedure pointer call s02b(realfunc) call s02b(p) ! ok !ERROR: Actual argument function associated with procedure dummy argument 'p=' is not compatible: function results have distinct types: REAL(4) vs INTEGER(4) call s02b(ip) - !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' must be a pointer unless INTENT(IN) - call s02b(procptr()) + call s02b(procptr()) ! believed to be ok call s02b(null()) - !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' must be a pointer unless INTENT(IN) + !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' is not a procedure pointer call s02b(sin) end subroutine diff --git a/flang/test/Semantics/call10.f90 b/flang/test/Semantics/call10.f90 index 2d2f57934cd8a..81c28082a843f 100644 --- a/flang/test/Semantics/call10.f90 +++ b/flang/test/Semantics/call10.f90 @@ -200,8 +200,9 @@ pure subroutine s13 !ERROR: An image control statement may not appear in a pure subprogram sync all ! C1599 end subroutine - pure subroutine s14 - integer :: img, nimgs, i[*], tmp + pure subroutine s14(i) + integer :: img, nimgs, tmp + integer, intent(in out) :: i[*] ! implicit sync all img = this_image() nimgs = num_images() diff --git a/flang/test/Semantics/call12.f90 b/flang/test/Semantics/call12.f90 index 2e5591ad927da..cd4006a53b3e7 100644 --- a/flang/test/Semantics/call12.f90 +++ b/flang/test/Semantics/call12.f90 @@ -40,7 +40,9 @@ pure function test(ptr, in, hpd, hhpd) type(hasHiddenPtr), intent(in) :: hhpd type(hasPtr), allocatable :: alloc type(hasHiddenPtr), allocatable :: hpAlloc + !ERROR: Pointer 'hcp' may not have a coarray potential component '%co' type(hasCoarray), pointer :: hcp + type(hasCoarray), allocatable :: hca integer :: n common /block/ y external :: extfunc @@ -60,8 +62,8 @@ pure function test(ptr, in, hpd, hhpd) !BECAUSE: 'in' is an INTENT(IN) dummy argument in%a = 0. ! C1594(1) !ERROR: Left-hand side of assignment is not definable - !BECAUSE: A pure subprogram may not define the coindexed object 'hcp%co[1_8]' - hcp%co[1] = 0. ! C1594(1) + !BECAUSE: A pure subprogram may not define the coindexed object 'hca%co[1_8]' + hca%co[1] = 0. ! C1594(1) !ERROR: The left-hand side of a pointer assignment is not definable !BECAUSE: 'ptr' may not be defined in pure subprogram 'test' because it is a POINTER dummy argument of a pure function ptr => z ! C1594(2) diff --git a/flang/test/Semantics/call24.f90 b/flang/test/Semantics/call24.f90 index 78ee17b488676..c1053db93648f 100644 --- a/flang/test/Semantics/call24.f90 +++ b/flang/test/Semantics/call24.f90 @@ -39,7 +39,7 @@ subroutine test() !ERROR: References to the procedure 'bar' require an explicit interface !BECAUSE: a dummy procedure is optional or a pointer !WARNING: If the procedure's interface were explicit, this reference would be in error - !BECAUSE: Actual argument associated with procedure pointer dummy argument 'a_pointer=' must be a pointer unless INTENT(IN) + !BECAUSE: Actual argument associated with procedure pointer dummy argument 'a_pointer=' is not a procedure pointer call bar(sin) !ERROR: References to the procedure 'baz' require an explicit interface diff --git a/flang/test/Semantics/call27.f90 b/flang/test/Semantics/call27.f90 index 062df6e45da89..135d6c06dcb4a 100644 --- a/flang/test/Semantics/call27.f90 +++ b/flang/test/Semantics/call27.f90 @@ -1,12 +1,26 @@ ! RUN: %python %S/test_errors.py %s %flang_fc1 -pedantic ! Catch NULL() actual argument association with allocatable dummy argument program test - !ERROR: NULL() actual argument 'NULL()' may not be associated with allocatable dummy argument 'a=' without INTENT(IN) + real, allocatable :: a + !ERROR: NULL() actual argument 'NULL()' may not be associated with allocatable dummy argument dummy argument 'a=' that is INTENT(OUT) or INTENT(IN OUT) + call foo0(null()) + !WARNING: NULL() actual argument 'NULL()' should not be associated with allocatable dummy argument dummy argument 'a=' without INTENT(IN) call foo1(null()) !PORTABILITY: Allocatable dummy argument 'a=' is associated with NULL() call foo2(null()) call foo3(null()) ! ok + !ERROR: Actual argument associated with INTENT(IN OUT) dummy argument 'a=' is not definable + !BECAUSE: 'null(mold=a)' is a null pointer + call foo0(null(mold=a)) + !WARNING: A null pointer should not be associated with allocatable dummy argument 'a=' without INTENT(IN) + call foo1(null(mold=a)) + !PORTABILITY: Allocatable dummy argument 'a=' is associated with a null pointer + call foo2(null(mold=a)) + call foo3(null(mold=a)) ! ok contains + subroutine foo0(a) + real, allocatable, intent(in out) :: a + end subroutine subroutine foo1(a) real, allocatable :: a end subroutine diff --git a/flang/test/Semantics/change_team01.f90 b/flang/test/Semantics/change_team01.f90 index 43be1c10fb842..a5e53e98fc986 100644 --- a/flang/test/Semantics/change_team01.f90 +++ b/flang/test/Semantics/change_team01.f90 @@ -4,6 +4,7 @@ subroutine test use, intrinsic :: iso_fortran_env, only: team_type + save type(team_type) :: team integer, codimension[*] :: selector integer, codimension[2,*] :: selector2d diff --git a/flang/test/Semantics/coarrays01.f90 b/flang/test/Semantics/coarrays01.f90 index 0a6f88a7e748c..0dfcd1a41c95d 100644 --- a/flang/test/Semantics/coarrays01.f90 +++ b/flang/test/Semantics/coarrays01.f90 @@ -2,7 +2,7 @@ ! Test selector and team-value in CHANGE TEAM statement ! OK -subroutine s1 +subroutine s1(y) use iso_fortran_env, only: team_type type(team_type) :: t real :: y[10,*] @@ -11,7 +11,7 @@ subroutine s1 form team(1, t) end -subroutine s2 +subroutine s2(y,y2,x) use iso_fortran_env type(team_type) :: t real :: y[10,*], y2[*], x[*] @@ -27,7 +27,7 @@ subroutine s2 end team end -subroutine s3 +subroutine s3(y) type :: team_type end type type :: foo diff --git a/flang/test/Semantics/coarrays02.f90 b/flang/test/Semantics/coarrays02.f90 new file mode 100644 index 0000000000000..e52f3e3ef3a40 --- /dev/null +++ b/flang/test/Semantics/coarrays02.f90 @@ -0,0 +1,50 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +! More coarray error tests. +module m + integer :: local[*] ! ok in module +end +program main + use iso_fortran_env + !ERROR: Coarray 'namedconst' may not be a named constant + !ERROR: Local coarray must have the SAVE attribute + integer, parameter :: namedConst = 123 + codimension namedConst[*] + !ERROR: Coarray 'coarr1' may not be in COMMON block '//' + real :: coarr1[*] + common//coarr1 + !ERROR: Variable 'event' with EVENT_TYPE or LOCK_TYPE must be a coarray + type(event_type) event + !ERROR: Variable 'lock' with EVENT_TYPE or LOCK_TYPE must be a coarray + type(lock_type) lock + integer :: local[*] ! ok in main +end + +function func1() + !ERROR: Function result may not be a coarray + integer :: func1[*] + !ERROR: Local coarray must have the SAVE attribute + integer :: local[*] + integer, save :: saved[*] ! ok + integer :: inited[*] = 1 ! ok + func = 1 +end + +function func2() + type t + real, allocatable :: comp[:] + end type + type t2 + !ERROR: Allocatable or array component 'allo' may not have a coarray ultimate component '%comp' + type(t), allocatable :: allo + !ERROR: Allocatable or array component 'arr' may not have a coarray ultimate component '%comp' + type(t) :: arr(1) + end type + !ERROR: Function result 'func2' may not have a coarray potential component '%comp' + type(t) func2 + !ERROR: Pointer 'ptr' may not have a coarray potential component '%comp' + type(t), pointer :: ptr + !ERROR: Coarray 'coarr' may not have a coarray potential component '%comp' + type(t), save :: coarr[*] + !ERROR: Local variable 'local' without the SAVE attribute may not have a coarray potential subobject component '%comp' + type(t) :: local +end diff --git a/flang/test/Semantics/coshape.f90 b/flang/test/Semantics/coshape.f90 index 476000b56411c..d4fb45df6600c 100644 --- a/flang/test/Semantics/coshape.f90 +++ b/flang/test/Semantics/coshape.f90 @@ -1,5 +1,4 @@ ! RUN: %python %S/test_errors.py %s %flang_fc1 -! XFAIL: * ! Check for semantic errors in coshape() function, ! as defined in section 16.9.55 of the Fortran ! 2018 standard @@ -8,18 +7,21 @@ program coshape_tests use iso_c_binding, only : c_int32_t, c_int64_t implicit none + type t + real x + end type integer array(1), non_coarray(1), scalar_coarray[*], array_coarray(1)[*], non_constant, scalar_result real real_coarray[*] complex complex_coarray[*] character char_array(1) logical non_integer, logical_coarray[*] + type(t) derived_scalar_coarray[*], derived_array_coarray(1)[*] integer, allocatable :: codimensions(:) !___ standard-conforming statement with no optional arguments present ___ codimensions = coshape(scalar_coarray) codimensions = coshape(array_coarray) codimensions = coshape(array_coarray(1)) - codimensions = coshape(scalar_coarray[1]) codimensions = coshape(real_coarray) codimensions = coshape(logical_coarray) codimensions = coshape(complex_coarray) @@ -33,54 +35,79 @@ program coshape_tests !___ non-conforming statements ___ ! coarray argument must be a coarray + !ERROR: 'coarray=' argument must have corank > 0 for intrinsic 'coshape' codimensions = coshape(non_coarray) + !ERROR: 'coarray=' argument must have corank > 0 for intrinsic 'coshape' + codimensions = coshape(derived_scalar_coarray[1]%x) + !ERROR: 'coarray=' argument must have corank > 0 for intrinsic 'coshape' + codimensions = coshape(derived_array_coarray[1]%x) + !ERROR: 'coarray=' argument must have corank > 0 for intrinsic 'coshape' + codimensions = coshape(array_coarray[1]) + !ERROR: 'coarray=' argument must have corank > 0 for intrinsic 'coshape' + codimensions = coshape(scalar_coarray[1]) ! kind argument must be an integer + !ERROR: Actual argument for 'kind=' has bad type 'LOGICAL(4)' codimensions = coshape(scalar_coarray, non_integer) ! kind argument must be a constant expression + !ERROR: 'kind=' argument must be a constant scalar integer whose value is a supported kind for the intrinsic result type codimensions = coshape(real_coarray, non_constant) ! kind argument must be an integer scalar + !ERROR: 'kind=' argument has unacceptable rank 1 codimensions = coshape(complex_coarray, array) ! missing all arguments + !ERROR: missing mandatory 'coarray=' argument codimensions = coshape() ! missing mandatory argument + !ERROR: missing mandatory 'coarray=' argument codimensions = coshape(kind=c_int32_t) ! incorrect typing for mandatory argument + !ERROR: 'coarray=' argument must have corank > 0 for intrinsic 'coshape' codimensions = coshape(3.4) ! incorrect typing for coarray argument + !ERROR: 'coarray=' argument must have corank > 0 for intrinsic 'coshape' codimensions = coshape(coarray=3.4) ! too many arguments + !ERROR: too many actual arguments for intrinsic 'coshape' codimensions = coshape(scalar_coarray, c_int32_t, 0) ! incorrect typing with correct keyword for coarray argument + !ERROR: 'coarray=' argument must have corank > 0 for intrinsic 'coshape' codimensions = coshape(coarray=non_coarray) ! correct typing with incorrect keyword for coarray argument + !ERROR: unknown keyword argument to intrinsic 'coshape' codimensions = coshape(c=real_coarray) ! incorrect typing with correct keyword for kind argument + !ERROR: Actual argument for 'kind=' has bad type 'LOGICAL(4)' codimensions = coshape(complex_coarray, kind=non_integer) ! correct typing with incorrect keyword for kind argument + !ERROR: unknown keyword argument to intrinsic 'coshape' codimensions = coshape(logical_coarray, kinds=c_int32_t) ! repeated keyword for coarray argument + !ERROR: repeated keyword argument to intrinsic 'coshape' codimensions = coshape(coarray=scalar_coarray, coarray=real_coarray) ! repeated keyword for kind argument + !ERROR: repeated keyword argument to intrinsic 'coshape' codimensions = coshape(real_coarray, kind=c_int32_t, kind=c_int64_t) ! result must be a rank 1 array + !ERROR: No intrinsic or user-defined ASSIGNMENT(=) matches scalar INTEGER(4) and rank 1 array of INTEGER(4) scalar_result = coshape(scalar_coarray) ! result must be an integer array + !ERROR: No intrinsic or user-defined ASSIGNMENT(=) matches operand types CHARACTER(KIND=1) and INTEGER(4) char_array = coshape(real_coarray) end program coshape_tests diff --git a/flang/test/Semantics/critical02.f90 b/flang/test/Semantics/critical02.f90 index 692b06b025861..9c957d1e859c5 100644 --- a/flang/test/Semantics/critical02.f90 +++ b/flang/test/Semantics/critical02.f90 @@ -61,7 +61,7 @@ end subroutine test6 subroutine test7() use iso_fortran_env - type(event_type) :: x[*], y[*] + type(event_type), save :: x[*], y[*] critical !ERROR: An image control statement is not allowed in a CRITICAL construct event post (x) diff --git a/flang/test/Semantics/definable01.f90 b/flang/test/Semantics/definable01.f90 index d3b31ee38b2a3..5af7e954e4171 100644 --- a/flang/test/Semantics/definable01.f90 +++ b/flang/test/Semantics/definable01.f90 @@ -77,7 +77,8 @@ subroutine test3(objp, procp) !CHECK: error: Actual argument associated with INTENT(IN OUT) dummy argument 'op=' is not definable !CHECK: because: 'objp' is an INTENT(IN) dummy argument call test3a(objp) - !CHECK: error: Actual argument associated with procedure pointer dummy argument 'pp=' may not be INTENT(IN) + !CHECK: error: Actual argument associated with INTENT(IN OUT) procedure pointer dummy argument 'pp=' is not definable + !CHECK: because: 'procp' is an INTENT(IN) dummy argument call test3b(procp) end subroutine subroutine test3a(op) diff --git a/flang/test/Semantics/doconcurrent01.f90 b/flang/test/Semantics/doconcurrent01.f90 index 9d2c9e1ab3115..ab14d970b8501 100644 --- a/flang/test/Semantics/doconcurrent01.f90 +++ b/flang/test/Semantics/doconcurrent01.f90 @@ -69,7 +69,7 @@ end subroutine do_concurrent_test2 subroutine s1() use iso_fortran_env - type(event_type) :: x[*] + type(event_type), save :: x[*] do concurrent (i = 1:n) !ERROR: An image control statement is not allowed in DO CONCURRENT event post (x) @@ -78,7 +78,7 @@ end subroutine s1 subroutine s2() use iso_fortran_env - type(event_type) :: x[*] + type(event_type), save :: x[*] do concurrent (i = 1:n) !ERROR: An image control statement is not allowed in DO CONCURRENT event wait (x) @@ -124,8 +124,7 @@ subroutine s6() type(type0) :: type1_field end type - type(type1) :: pvar; - type(type1) :: qvar; + type(type1), save :: pvar, qvar integer, allocatable, dimension(:) :: array1 integer, allocatable, dimension(:) :: array2 integer, allocatable, codimension[:] :: ca, cb diff --git a/flang/test/Semantics/doconcurrent08.f90 b/flang/test/Semantics/doconcurrent08.f90 index 52b382741d073..48d653fc65896 100644 --- a/flang/test/Semantics/doconcurrent08.f90 +++ b/flang/test/Semantics/doconcurrent08.f90 @@ -85,13 +85,13 @@ subroutine s1() type(HasAllocPolyType) :: nonAllocatableWithAllocPoly ! OK because the declared variable is not allocatable - type(HasAllocPolyCoarrayType) :: nonAllocatableWithAllocPolyCoarray + type(HasAllocPolyCoarrayType), save :: nonAllocatableWithAllocPolyCoarray ! Bad because even though the declared the allocatable component is a coarray type(HasAllocPolyCoarrayType), allocatable :: allocWithAllocPolyCoarray ! OK since it has no polymorphic component - type(HasAllocCoarrayType) :: nonAllocWithAllocCoarray + type(HasAllocCoarrayType), save :: nonAllocWithAllocCoarray ! OK since it has no component that's polymorphic, oops type(HasPointerPolyType), allocatable :: allocatableWithPointerPoly @@ -125,6 +125,8 @@ subroutine s2() class(Base), allocatable, codimension[:] :: allocPolyComponentVar class(Base), allocatable, codimension[:] :: allocPolyComponentVar1 + class(*), allocatable :: unlimitedPoly + allocate(ChildType :: localVar) allocate(ChildType :: localVar1) allocate(Base :: localVar2) @@ -162,6 +164,16 @@ subroutine s2() !ERROR: Deallocation of a polymorphic entity caused by assignment not allowed in DO CONCURRENT allocPolyCoarray = allocPolyCoarray1 +!ERROR: Deallocation of a polymorphic entity caused by assignment not allowed in DO CONCURRENT + unlimitedPoly = 1 + select type (unlimitedPoly) + type is (integer) + unlimitedPoly = 1 ! ok + class default +!ERROR: Deallocation of a polymorphic entity caused by assignment not allowed in DO CONCURRENT + unlimitedPoly = 1 + end select + end do end subroutine s2 diff --git a/flang/test/Semantics/form_team01.f90 b/flang/test/Semantics/form_team01.f90 index 3b82e5b41666e..1510a8bb98f74 100644 --- a/flang/test/Semantics/form_team01.f90 +++ b/flang/test/Semantics/form_team01.f90 @@ -8,8 +8,7 @@ subroutine test integer :: team_index integer :: statvar character(len=50) :: errvar - integer, codimension[*] :: co_team_number - integer, codimension[*] :: co_team_index + integer, codimension[*], save :: co_team_number, co_team_index type(team_type), dimension(1) :: array_team integer, dimension(1) :: array_team_number integer, dimension(1) :: array_team_index diff --git a/flang/test/Semantics/generic07.f90 b/flang/test/Semantics/generic07.f90 index e7486c02a7d2b..5566c0f82633d 100644 --- a/flang/test/Semantics/generic07.f90 +++ b/flang/test/Semantics/generic07.f90 @@ -74,7 +74,7 @@ program test interface distinguishable3 procedure :: s1a, s1b end interface - !ERROR: Generic 'indistinguishable' may not have specific procedures 's2b' and 's2a' as their interfaces are not distinguishable + !ERROR: Generic 'indistinguishable' may not have specific procedures 's2a' and 's2b' as their interfaces are not distinguishable interface indistinguishable procedure :: s2a, s2b end interface diff --git a/flang/test/Semantics/init01.f90 b/flang/test/Semantics/init01.f90 index 65d524b16a23a..a1313e7c234d5 100644 --- a/flang/test/Semantics/init01.f90 +++ b/flang/test/Semantics/init01.f90 @@ -18,6 +18,7 @@ subroutine objectpointers(j) end type type(t1), target, save :: o1 type(t1), save :: o2 +!ERROR: Local variable 'o3' without the SAVE attribute may not have a coarray potential subobject component '%c2' type(t1), target :: o3 !ERROR: An initial data target may not be a reference to an ALLOCATABLE 'x1' real, pointer :: p1 => x1 diff --git a/flang/test/Semantics/io11.f90 b/flang/test/Semantics/io11.f90 index 9b5ad1b8427d9..7565d35aeb407 100644 --- a/flang/test/Semantics/io11.f90 +++ b/flang/test/Semantics/io11.f90 @@ -355,7 +355,7 @@ subroutine formattedReadProc(dtv, unit, iotype, vlist, iostat, iomsg) class(t), intent(inout) :: dtv integer, intent(in) :: unit character(len=*), intent(in) :: iotype - !ERROR: Dummy argument 'vlist' of a defined input/output procedure must be deferred shape + !ERROR: Dummy argument 'vlist' of a defined input/output procedure must be assumed shape integer, intent(in) :: vlist(5) integer, intent(out) :: iostat character(len=*), intent(inout) :: iomsg @@ -689,3 +689,58 @@ module m26b procedure unformattedRead end interface end + +module m27a + type t + integer c + contains + procedure ur1 + generic, private :: read(unformatted) => ur1 + end type + contains + subroutine ur1(dtv,unit,iostat,iomsg) + class(t),intent(inout) :: dtv + integer,intent(in) :: unit + integer,intent(out) :: iostat + character(*),intent(inout) :: iomsg + read(unit,iotype,iostat=iostat,iomsg=iomsg) dtv%c + end +end +module m27b + use m27a + interface read(unformatted) + module procedure ur2 ! ok, t's generic is inaccessible + end interface + contains + subroutine ur2(dtv,unit,iostat,iomsg) + class(t),intent(inout) :: dtv + integer,intent(in) :: unit + integer,intent(out) :: iostat + character(*),intent(inout) :: iomsg + read(unit,iotype,iostat=iostat,iomsg=iomsg) dtv%c + end +end + +module m28 + type t + contains + procedure, private :: write1 + generic :: write(formatted) => write1 + end type + abstract interface + subroutine absWrite(dtv, unit, iotype, v_list, iostat, iomsg) + import t + class(t), intent(in) :: dtv + integer, intent(in) :: unit + character(*), intent(in) :: iotype + integer, intent(in) :: v_list(:) + integer, intent(out) :: iostat + character(*), intent(inout) :: iomsg + end + end interface + !ERROR: Derived type 't' has conflicting type-bound input/output procedure 'write(formatted)' + procedure(absWrite) write1, write2 + interface write(formatted) + procedure write2 + end interface +end diff --git a/flang/test/Semantics/resolve07.f90 b/flang/test/Semantics/resolve07.f90 index 481094a51335f..a280769ac2525 100644 --- a/flang/test/Semantics/resolve07.f90 +++ b/flang/test/Semantics/resolve07.f90 @@ -18,6 +18,7 @@ subroutine s2 end subroutine s3 + save dimension :: x(4), x2(8) !ERROR: The dimensions of 'x' have already been declared allocatable :: x(:) diff --git a/flang/test/Semantics/resolve117.f90 b/flang/test/Semantics/resolve117.f90 index 3e3a813c0921b..b7b0ce7db6b0e 100644 --- a/flang/test/Semantics/resolve117.f90 +++ b/flang/test/Semantics/resolve117.f90 @@ -5,23 +5,28 @@ module m integer, kind :: k = 4 real x contains - procedure, nopass :: tbp => sub - generic :: gen => tbp + procedure, nopass :: tbp => sub1 + generic :: gen1 => tbp + generic :: gen2 => tbp end type type, extends(base1) :: ext1 contains - procedure, nopass :: sub + procedure, nopass :: sub1, sub2 !ERROR: Type parameter, component, or procedure binding 'base1' already defined in this type - generic :: base1 => sub + generic :: base1 => sub1 !ERROR: Type bound generic procedure 'k' may not have the same name as a non-generic symbol inherited from an ancestor type - generic :: k => sub + generic :: k => sub1 !ERROR: Type bound generic procedure 'x' may not have the same name as a non-generic symbol inherited from an ancestor type - generic :: x => sub + generic :: x => sub1 !ERROR: Type bound generic procedure 'tbp' may not have the same name as a non-generic symbol inherited from an ancestor type - generic :: tbp => sub - generic :: gen => sub ! ok + generic :: tbp => sub1 + generic :: gen1 => sub1 ! ok + !ERROR: Generic 'gen2' may not have specific procedures 'tbp' and 'sub2' as their interfaces are not distinguishable + generic :: gen2 => sub2 end type contains - subroutine sub + subroutine sub1 + end + subroutine sub2 end end diff --git a/flang/test/Semantics/resolve34.f90 b/flang/test/Semantics/resolve34.f90 index 4ddb8fd0b0eb8..39709a362b363 100644 --- a/flang/test/Semantics/resolve34.f90 +++ b/flang/test/Semantics/resolve34.f90 @@ -45,6 +45,11 @@ module m4 type, extends(t1) :: t2 end type end +module m4a + use m4 + type, extends(t1) :: t3 ! ok, inaccessible component + end type +end module m5 type :: t1 diff --git a/flang/test/Semantics/resolve50.f90 b/flang/test/Semantics/resolve50.f90 index cc4dc030a9905..5650fff32e16a 100644 --- a/flang/test/Semantics/resolve50.f90 +++ b/flang/test/Semantics/resolve50.f90 @@ -3,6 +3,7 @@ subroutine s1 use iso_fortran_env + save type(team_type) :: t complex :: x[*] real :: y[*] @@ -22,7 +23,7 @@ subroutine s1 subroutine s2 use iso_fortran_env type(team_type) :: t - real :: y[10,*], y2[*], x[*] + real, save :: y[10,*], y2[*], x[*] ! C1113 !ERROR: The codimensions of 'x' have already been declared change team(t, x[10,*] => y, x[*] => y2) diff --git a/flang/test/Semantics/resolve55.f90 b/flang/test/Semantics/resolve55.f90 index 0a40a19435748..5f7a3044e834c 100644 --- a/flang/test/Semantics/resolve55.f90 +++ b/flang/test/Semantics/resolve55.f90 @@ -81,7 +81,7 @@ end subroutine s6 subroutine s7() ! Cannot have a coarray - integer, codimension[*] :: coarray_var + integer, codimension[*], save :: coarray_var !ERROR: Coarray 'coarray_var' not allowed in a LOCAL locality-spec do concurrent(i=1:5) local(coarray_var) end do diff --git a/flang/test/Semantics/resolve88.f90 b/flang/test/Semantics/resolve88.f90 index 3794e9b28a6d3..34eb192347d02 100644 --- a/flang/test/Semantics/resolve88.f90 +++ b/flang/test/Semantics/resolve88.f90 @@ -64,11 +64,11 @@ module m type testType type(coarrayType) :: goodField - !ERROR: A component with a POINTER or ALLOCATABLE attribute may not be of a type with a coarray ultimate component (named 'goodcoarrayfield') + !ERROR: Pointer 'pointerfield' may not have a coarray potential component '%goodcoarrayfield' type(coarrayType), pointer :: pointerField - !ERROR: A component with a POINTER or ALLOCATABLE attribute may not be of a type with a coarray ultimate component (named 'goodcoarrayfield') + !ERROR: Allocatable or array component 'allocatablefield' may not have a coarray ultimate component '%goodcoarrayfield' type(coarrayType), allocatable :: allocatableField - !ERROR: An array or coarray component may not be of a type with a coarray ultimate component (named 'goodcoarrayfield') + !ERROR: Allocatable or array component 'arrayfield' may not have a coarray ultimate component '%goodcoarrayfield' type(coarrayType), dimension(3) :: arrayField end type testType diff --git a/flang/test/Semantics/resolve94.f90 b/flang/test/Semantics/resolve94.f90 index 19c06ad0d1622..75755fb2b2038 100644 --- a/flang/test/Semantics/resolve94.f90 +++ b/flang/test/Semantics/resolve94.f90 @@ -6,6 +6,7 @@ ! C931 A stat-variable in an image-selector shall not be a coindexed object. subroutine s1() use ISO_FORTRAN_ENV + save type(team_type) :: team1, team2 real :: rCoarray[10,20,*] real :: rVar1, rVar2 diff --git a/flang/test/Semantics/this_image01.f90 b/flang/test/Semantics/this_image01.f90 index fdcccdaeed0e3..eb25cd4e5a7ef 100644 --- a/flang/test/Semantics/this_image01.f90 +++ b/flang/test/Semantics/this_image01.f90 @@ -8,7 +8,7 @@ subroutine test type(team_type) :: coteam[*] integer :: coscalar[*], coarray(3)[*] save :: coteam, coscalar, coarray - real coarray1[*], coarray2[2,*], coarray3[2,3,*] + real, save :: coarray1[*], coarray2[2,*], coarray3[2,3,*] integer indices(3) ! correct calls, should produce no errors diff --git a/flang/test/Transforms/generic-loop-rewriting-todo.mlir b/flang/test/Transforms/generic-loop-rewriting-todo.mlir index e992296c9a837..64094d61eb9a3 100644 --- a/flang/test/Transforms/generic-loop-rewriting-todo.mlir +++ b/flang/test/Transforms/generic-loop-rewriting-todo.mlir @@ -1,24 +1,12 @@ // RUN: fir-opt --omp-generic-loop-conversion -verify-diagnostics %s - -omp.declare_reduction @add_reduction_i32 : i32 init { - ^bb0(%arg0: i32): - %c0_i32 = arith.constant 0 : i32 - omp.yield(%c0_i32 : i32) - } combiner { - ^bb0(%arg0: i32, %arg1: i32): - %0 = arith.addi %arg0, %arg1 : i32 - omp.yield(%0 : i32) - } - func.func @_QPloop_order() { omp.teams { %c0 = arith.constant 0 : i32 %c10 = arith.constant 10 : i32 %c1 = arith.constant 1 : i32 - %sum = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_orderEi"} - // expected-error@below {{not yet implemented: Unhandled clause reduction in omp.loop operation}} - omp.loop reduction(@add_reduction_i32 %sum -> %arg2 : !fir.ref) { + // expected-error@below {{not yet implemented: Unhandled clause order in omp.loop operation}} + omp.loop order(reproducible:concurrent) { omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) { omp.yield } diff --git a/flang/test/Transforms/omp-map-info-finalization-implicit-field.fir b/flang/test/Transforms/omp-map-info-finalization-implicit-field.fir index bcf8b63075dbf..121ee553b51e2 100644 --- a/flang/test/Transforms/omp-map-info-finalization-implicit-field.fir +++ b/flang/test/Transforms/omp-map-info-finalization-implicit-field.fir @@ -28,7 +28,7 @@ func.func @_QQmain() { } // CHECK: %[[RECORD_DECL:.*]]:2 = hlfir.declare %0 {uniq_name = "_QFEdst_record"} -// CHECK: %[[FIELD_COORD:.*]] = fir.coordinate_of %[[RECORD_DECL]]#1, %{{c1.*}} +// CHECK: %[[FIELD_COORD:.*]] = fir.coordinate_of %[[RECORD_DECL]]#1, to_implicitly_map // CHECK: %[[UPPER_BOUND:.*]] = arith.subi %{{.*}}#1, %{{c1.*}} : index diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index 867bd1e5ee20f..41f6d3b67c95b 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -724,6 +724,19 @@ add_header_macro( .llvm-libc-macros.poll-macros ) +# UEFI spec references "Uefi.h" so we use that name for compatibility +add_header_macro( + uefi + ../libc/include/Uefi.yaml + Uefi.h.def + Uefi.h + DEPENDS + .llvm_libc_common_h + .llvm-libc-types.EFI_GUID + .llvm-libc-types.EFI_STATUS + .llvm-libc-types.EFI_SYSTEM_TABLE +) + if(NOT LLVM_LIBC_FULL_BUILD) # We don't install headers in non-fullbuild mode. return() diff --git a/libc/include/Uefi.h.def b/libc/include/Uefi.h.def new file mode 100644 index 0000000000000..6655e13579cd8 --- /dev/null +++ b/libc/include/Uefi.h.def @@ -0,0 +1,16 @@ +//===-- UEFI header uefi.h --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_UEFI_H +#define LLVM_LIBC_UEFI_H + +#include "__llvm-libc-common.h" + +%%public_api() + +#endif // LLVM_LIBC_UEFI_H diff --git a/libc/include/Uefi.yaml b/libc/include/Uefi.yaml new file mode 100644 index 0000000000000..28582eb2524b1 --- /dev/null +++ b/libc/include/Uefi.yaml @@ -0,0 +1,15 @@ +header: Uefi.h +standards: UEFI +macros: [] +types: + - type_name: EFI_BOOT_SERVICES + - type_name: EFI_GUID + - type_name: EFI_STATUS + - type_name: EFI_SYSTEM_TABLE +enums: [] +functions: [] +objects: + - object_name: efi_system_table + object_type: EFI_SYSTEM_TABLE * + - object_name: efi_image_handle + object_type: EFI_HANDLE diff --git a/libc/include/llvm-libc-macros/CMakeLists.txt b/libc/include/llvm-libc-macros/CMakeLists.txt index 8c1f7387f3b4d..7f10e773479a3 100644 --- a/libc/include/llvm-libc-macros/CMakeLists.txt +++ b/libc/include/llvm-libc-macros/CMakeLists.txt @@ -337,3 +337,9 @@ add_macro_header( HDR poll-macros.h ) + +add_macro_header( + EFIAPI_macros + HDR + EFIAPI-macros.h +) diff --git a/libc/include/llvm-libc-macros/EFIAPI-macros.h b/libc/include/llvm-libc-macros/EFIAPI-macros.h new file mode 100644 index 0000000000000..cb854928d0ab7 --- /dev/null +++ b/libc/include/llvm-libc-macros/EFIAPI-macros.h @@ -0,0 +1,18 @@ +//===-- Definition of EFIAPI macro ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_MACROS_EFIAPI_MACROS_H +#define LLVM_LIBC_MACROS_EFIAPI_MACROS_H + +#if defined(__x86_64__) && !defined(__ILP32__) +#define EFIAPI __attribute__((ms_abi)) +#else +#define EFIAPI +#endif + +#endif // LLVM_LIBC_MACROS_EFIAPI_MACROS_H diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt index 7ed69ab1af6d9..58761ac97d7cf 100644 --- a/libc/include/llvm-libc-types/CMakeLists.txt +++ b/libc/include/llvm-libc-types/CMakeLists.txt @@ -157,3 +157,127 @@ DEPENDS add_header(locale_t HDR locale_t.h) add_header(struct_lconv HDR struct_lconv.h) add_header(stdfix-types HDR stdfix-types.h) + +# UEFI +add_header(EFI_GUID HDR EFI_GUID.h DEPENDS libc.include.llvm-libc-macros.stdint_macros) +add_header(EFI_CONFIGURATION_TABLE HDR EFI_CONFIGURATION_TABLE.h DEPENDS .EFI_GUID) + +add_header(EFI_PHYSICAL_ADDRESS HDR EFI_PHYSICAL_ADDRESS.h DEPENDS libc.include.llvm-libc-macros.stdint_macros) +add_header(EFI_VIRTUAL_ADDRESS HDR EFI_VIRTUAL_ADDRESS.h DEPENDS libc.include.llvm-libc-macros.stdint_macros) + +add_header(EFI_MEMORY_DESCRIPTOR + HDR + EFI_MEMORY_DESCRIPTOR.h + DEPENDS + libc.include.llvm-libc-macros.stdint_macros + .EFI_PHYSICAL_ADDRESS + .EFI_VIRTUAL_ADDRESS +) + +add_header(EFI_ALLOCATE_TYPE HDR EFI_ALLOCATE_TYPE.h) +add_header(EFI_EVENT HDR EFI_EVENT.h) +add_header(EFI_INTERFACE_TYPE HDR EFI_INTERFACE_TYPE.h) +add_header(EFI_LOCATE_SEARCH_TYPE HDR EFI_LOCATE_SEARCH_TYPE.h) +add_header(EFI_MEMORY_TYPE HDR EFI_MEMORY_TYPE.h) +add_header(EFI_HANDLE HDR EFI_HANDLE.h) +add_header(EFI_TIME HDR EFI_TIME.h DEPENDS libc.include.llvm-libc-macros.stdint_macros) +add_header(EFI_TIMER_DELAY HDR EFI_TIMER_DELAY.h) +add_header(EFI_TPL HDR EFI_TPL.h DEPENDS .size_t) +add_header(EFI_STATUS HDR EFI_STATUS.h DEPENDS .size_t) + +add_header(EFI_OPEN_PROTOCOL_INFORMATION_ENTRY + HDR + EFI_OPEN_PROTOCOL_INFORMATION_ENTRY.h + DEPENDS + libc.include.llvm-libc-macros.stdint_macros + .EFI_HANDLE +) + +add_header(EFI_CAPSULE + HDR + EFI_CAPSULE.h + DEPENDS + libc.include.llvm-libc-macros.stdint_macros + .EFI_GUID +) + +add_header(EFI_TABLE_HEADER + HDR + EFI_TABLE_HEADER.h + DEPENDS + libc.include.llvm-libc-macros.stdint_macros +) + +add_header(EFI_DEVICE_PATH_PROTOCOL + HDR + EFI_DEVICE_PATH_PROTOCOL.h + DEPENDS + libc.include.llvm-libc-macros.stdint_macros +) + +add_header(EFI_SIMPLE_TEXT_INPUT_PROTOCOL + HDR + EFI_SIMPLE_TEXT_INPUT_PROTOCOL.h + DEPENDS + libc.include.llvm-libc-macros.EFIAPI_macros + libc.include.llvm-libc-macros.stdint_macros + .EFI_EVENT + .EFI_STATUS + .char16_t +) + +add_header(EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL + HDR + EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL.h + DEPENDS + libc.include.llvm-libc-macros.stdint_macros + .EFI_STATUS + .size_t +) + +add_header(EFI_BOOT_SERVICES + HDR + EFI_BOOT_SERVICES.h + DEPENDS + libc.include.llvm-libc-macros.EFIAPI_macros + .EFI_ALLOCATE_TYPE + .EFI_DEVICE_PATH_PROTOCOL + .EFI_EVENT + .EFI_INTERFACE_TYPE + .EFI_LOCATE_SEARCH_TYPE + .EFI_MEMORY_DESCRIPTOR + .EFI_MEMORY_TYPE + .EFI_OPEN_PROTOCOL_INFORMATION_ENTRY + .EFI_PHYSICAL_ADDRESS + .EFI_STATUS + .EFI_TABLE_HEADER + .EFI_TIMER_DELAY + .EFI_TPL + .char16_t +) + +add_header(EFI_RUNTIME_SERVICES + HDR + EFI_RUNTIME_SERVICES.h + DEPENDS + .EFI_CAPSULE + .EFI_STATUS + .EFI_TABLE_HEADER + .EFI_TIME + .char16_t +) + +add_header(EFI_SYSTEM_TABLE + HDR + EFI_SYSTEM_TABLE.h + DEPENDS + .EFI_BOOT_SERVICES + .EFI_CONFIGURATION_TABLE + .EFI_HANDLE + .EFI_RUNTIME_SERVICES + .EFI_SIMPLE_TEXT_INPUT_PROTOCOL + .EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL + .EFI_STATUS + .EFI_TABLE_HEADER + .char16_t +) diff --git a/libc/include/llvm-libc-types/EFI_ALLOCATE_TYPE.h b/libc/include/llvm-libc-types/EFI_ALLOCATE_TYPE.h new file mode 100644 index 0000000000000..90f23969678f4 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_ALLOCATE_TYPE.h @@ -0,0 +1,19 @@ +//===-- Definition of EFI_ALLOCATE_TYPE type ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_ALLOCATE_TYPE_H +#define LLVM_LIBC_TYPES_EFI_ALLOCATE_TYPE_H + +typedef enum { + AllocateAnyPages, + AllocateMaxAddress, + AllocateAddress, + MaxAllocateType +} EFI_ALLOCATE_TYPE; + +#endif // LLVM_LIBC_TYPES_EFI_ALLOCATE_TYPE_H diff --git a/libc/include/llvm-libc-types/EFI_BOOT_SERVICES.h b/libc/include/llvm-libc-types/EFI_BOOT_SERVICES.h new file mode 100644 index 0000000000000..8b7a6aadd7a24 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_BOOT_SERVICES.h @@ -0,0 +1,250 @@ +//===-- Definition of EFI_BOOT_SERVICES type ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_BOOT_SERVICES_H +#define LLVM_LIBC_TYPES_EFI_BOOT_SERVICES_H + +#include "../llvm-libc-macros/EFIAPI-macros.h" +#include "EFI_ALLOCATE_TYPE.h" +#include "EFI_DEVICE_PATH_PROTOCOL.h" +#include "EFI_EVENT.h" +#include "EFI_GUID.h" +#include "EFI_INTERFACE_TYPE.h" +#include "EFI_LOCATE_SEARCH_TYPE.h" +#include "EFI_MEMORY_DESCRIPTOR.h" +#include "EFI_MEMORY_TYPE.h" +#include "EFI_OPEN_PROTOCOL_INFORMATION_ENTRY.h" +#include "EFI_PHYSICAL_ADDRESS.h" +#include "EFI_STATUS.h" +#include "EFI_TABLE_HEADER.h" +#include "EFI_TIMER_DELAY.h" +#include "EFI_TPL.h" +#include "char16_t.h" +#include "size_t.h" + +#define EFI_BOOT_SERVICES_SIGNATURE 0x56524553544f4f42 +#define EFI_BOOT_SERVICES_REVISION EFI_SPECIFICATION_VERSION + +typedef EFI_TPL(EFIAPI *EFI_RAISE_TPL)(EFI_TPL NewTpl); +typedef void(EFIAPI *EFI_RESTORE_TPL)(EFI_TPL OldTpl); + +typedef EFI_STATUS(EFIAPI *EFI_ALLOCATE_PAGES)(EFI_ALLOCATE_TYPE Type, + EFI_MEMORY_TYPE MemoryType, + size_t Pages, + EFI_PHYSICAL_ADDRESS *Memory); +typedef EFI_STATUS(EFIAPI *EFI_FREE_PAGES)(EFI_PHYSICAL_ADDRESS Memory, + size_t Pages); +typedef EFI_STATUS(EFIAPI *EFI_GET_MEMORY_MAP)(size_t *MemoryMapSize, + EFI_MEMORY_DESCRIPTOR *MemoryMap, + size_t *MapKey, + size_t *DescriptorSize, + uint32_t *DescriptorVersion); + +typedef EFI_STATUS(EFIAPI *EFI_ALLOCATE_POOL)(EFI_MEMORY_TYPE PoolType, + size_t Size, void **Buffer); +typedef EFI_STATUS(EFIAPI *EFI_FREE_POOL)(void *Buffer); + +typedef void(EFIAPI *EFI_EVENT_NOTIFY)(EFI_EVENT Event, void *Context); + +typedef EFI_STATUS(EFIAPI *EFI_CREATE_EVENT)(uint32_t Type, EFI_TPL NotifyTpl, + EFI_EVENT_NOTIFY NotifyFunction, + void *NotifyContext, + EFI_EVENT *Event); +typedef EFI_STATUS(EFIAPI *EFI_SET_TIMER)(EFI_EVENT Event, EFI_TIMER_DELAY Type, + uint64_t TriggerTime); +typedef EFI_STATUS(EFIAPI *EFI_WAIT_FOR_EVENT)(size_t NumberOfEvents, + EFI_EVENT *Event, size_t *Index); +typedef EFI_STATUS(EFIAPI *EFI_SIGNAL_EVENT)(EFI_EVENT Event); +typedef EFI_STATUS(EFIAPI *EFI_CLOSE_EVENT)(EFI_EVENT Event); +typedef EFI_STATUS(EFIAPI *EFI_CHECK_EVENT)(EFI_EVENT Event); + +typedef EFI_STATUS(EFIAPI *EFI_INSTALL_PROTOCOL_INTERFACE)( + EFI_HANDLE *Handle, EFI_GUID *Protocol, EFI_INTERFACE_TYPE InterfaceType, + void *Interface); +typedef EFI_STATUS(EFIAPI *EFI_REINSTALL_PROTOCOL_INTERFACE)( + EFI_HANDLE Handle, EFI_GUID *Protocol, void *OldInterface, + void *NewInterface); +typedef EFI_STATUS(EFIAPI *EFI_UNINSTALL_PROTOCOL_INTERFACE)(EFI_HANDLE Handle, + EFI_GUID *Protocol, + void *Interface); + +typedef EFI_STATUS(EFIAPI *EFI_HANDLE_PROTOCOL)(EFI_HANDLE Handle, + EFI_GUID *Protocol, + void **Interface); +typedef EFI_STATUS(EFIAPI *EFI_REGISTER_PROTOCOL_NOTIFY)(EFI_GUID *Protocol, + EFI_EVENT Event, + void **Registration); + +typedef EFI_STATUS(EFIAPI *EFI_LOCATE_HANDLE)(EFI_LOCATE_SEARCH_TYPE SearchType, + EFI_GUID *Protocol, + void *SearchKey, + size_t *BufferSize, + EFI_HANDLE *Buffer); +typedef EFI_STATUS(EFIAPI *EFI_LOCATE_DEVICE_PATH)( + EFI_GUID *Protocol, EFI_DEVICE_PATH_PROTOCOL **DevicePath, + EFI_HANDLE *Device); + +typedef EFI_STATUS(EFIAPI *EFI_INSTALL_CONFIGURATION_TABLE)(EFI_GUID *Guid, + void *Table); +typedef EFI_STATUS(EFIAPI *EFI_IMAGE_UNLOAD)(EFI_HANDLE ImageHandle); +typedef EFI_STATUS(EFIAPI *EFI_IMAGE_START)(EFI_HANDLE ImageHandle, + size_t *ExitDataSize, + char16_t **ExitData); + +typedef EFI_STATUS(EFIAPI *EFI_EXIT)(EFI_HANDLE ImageHandle, + EFI_STATUS ExitStatus, size_t ExitDataSize, + char16_t *ExitData); +typedef EFI_STATUS(EFIAPI *EFI_EXIT_BOOT_SERVICES)(EFI_HANDLE ImageHandle, + size_t MapKey); +typedef EFI_STATUS(EFIAPI *EFI_GET_NEXT_MONOTONIC_COUNT)(uint64_t *Count); +typedef EFI_STATUS(EFIAPI *EFI_STALL)(size_t Microseconds); +typedef EFI_STATUS(EFIAPI *EFI_SET_WATCHDOG_TIMER)(size_t Timeout, + uint64_t WatchdogCode, + size_t DataSize, + char16_t *WatchdogData); + +typedef EFI_STATUS(EFIAPI *EFI_CONNECT_CONTROLLER)( + EFI_HANDLE ControllerHandle, EFI_HANDLE *DriverImageHandle, + EFI_DEVICE_PATH_PROTOCOL *RemainingDevicePath, bool Recursive); + +typedef EFI_STATUS(EFIAPI *EFI_DISCONNECT_CONTROLLER)( + EFI_HANDLE ControllerHandle, EFI_HANDLE DriverImageHandle, + EFI_HANDLE ChildHandle); + +typedef EFI_STATUS(EFIAPI *EFI_OPEN_PROTOCOL)( + EFI_HANDLE Handle, EFI_GUID *Protocol, void **Interface, + EFI_HANDLE AgentHandle, EFI_HANDLE ControllerHandle, uint32_t Attributes); + +typedef EFI_STATUS(EFIAPI *EFI_CLOSE_PROTOCOL)(EFI_HANDLE Handle, + EFI_GUID *Protocol, + EFI_HANDLE AgentHandle, + EFI_HANDLE ControllerHandle); + +typedef EFI_STATUS(EFIAPI *EFI_OPEN_PROTOCOL_INFORMATION)( + EFI_HANDLE Handle, EFI_GUID *Protocol, + EFI_OPEN_PROTOCOL_INFORMATION_ENTRY **EntryBuffer, size_t *EntryCount); + +typedef EFI_STATUS(EFIAPI *EFI_PROTOCOLS_PER_HANDLE)( + EFI_HANDLE Handle, EFI_GUID ***ProtocolBuffer, size_t *ProtocolBufferCount); + +typedef EFI_STATUS(EFIAPI *EFI_LOCATE_HANDLE_BUFFER)( + EFI_LOCATE_SEARCH_TYPE SearchType, EFI_GUID *Protocol, void *SearchKey, + size_t *NoHandles, EFI_HANDLE **Buffer); + +typedef EFI_STATUS(EFIAPI *EFI_LOCATE_PROTOCOL)(EFI_GUID *Protocol, + void *Registration, + void **Interface); + +typedef EFI_STATUS(EFIAPI *EFI_UNINSTALL_MULTIPLE_PROTOCOL_INTERFACES)( + EFI_HANDLE Handle, ...); +typedef EFI_STATUS(EFIAPI *EFI_CALCULATE_CRC32)(void *Data, size_t DataSize, + uint32_t *Crc32); + +typedef void(EFIAPI *EFI_COPY_MEM)(void *Destination, void *Source, + size_t Length); +typedef void(EFIAPI *EFI_SET_MEM)(void *Buffer, size_t Size, uint8_t Value); + +typedef EFI_STATUS(EFIAPI *EFI_CREATE_EVENT_EX)( + uint32_t Type, EFI_TPL NotifyTpl, EFI_EVENT_NOTIFY NotifyFunction, + const void *NotifyContext, const EFI_GUID *EventGroup, EFI_EVENT *Event); + +typedef struct { + EFI_TABLE_HEADER Hdr; + + // + // Task Priority Services + // + EFI_RAISE_TPL RaiseTPL; // EFI 1.0+ + EFI_RESTORE_TPL RestoreTPL; // EFI 1.0+ + + // + // Memory Services + // + EFI_ALLOCATE_PAGES AllocatePages; // EFI 1.0+ + EFI_FREE_PAGES FreePages; // EFI 1.0+ + EFI_GET_MEMORY_MAP GetMemoryMap; // EFI 1.0+ + EFI_ALLOCATE_POOL AllocatePool; // EFI 1.0+ + EFI_FREE_POOL FreePool; // EFI 1.0+ + + // + // Event & Timer Services + // + EFI_CREATE_EVENT CreateEvent; // EFI 1.0+ + EFI_SET_TIMER SetTimer; // EFI 1.0+ + EFI_WAIT_FOR_EVENT WaitForEvent; // EFI 1.0+ + EFI_SIGNAL_EVENT SignalEvent; // EFI 1.0+ + EFI_CLOSE_EVENT CloseEvent; // EFI 1.0+ + EFI_CHECK_EVENT CheckEvent; // EFI 1.0+ + + // + // Protocol Handler Services + // + EFI_INSTALL_PROTOCOL_INTERFACE InstallProtocolInterface; // EFI 1.0+ + EFI_REINSTALL_PROTOCOL_INTERFACE ReinstallProtocolInterface; // EFI 1.0+ + EFI_UNINSTALL_PROTOCOL_INTERFACE UninstallProtocolInterface; // EFI 1.0+ + EFI_HANDLE_PROTOCOL HandleProtocol; // EFI 1.0+ + void *Reserved; // EFI 1.0+ + EFI_REGISTER_PROTOCOL_NOTIFY RegisterProtocolNotify; // EFI 1.0+ + EFI_LOCATE_HANDLE LocateHandle; // EFI 1.+ + EFI_LOCATE_DEVICE_PATH LocateDevicePath; // EFI 1.0+ + EFI_INSTALL_CONFIGURATION_TABLE InstallConfigurationTable; // EFI 1.0+ + + // + // Image Services + // + EFI_IMAGE_UNLOAD LoadImage; // EFI 1.0+ + EFI_IMAGE_START StartImage; // EFI 1.0+ + EFI_EXIT Exit; // EFI 1.0+ + EFI_IMAGE_UNLOAD UnloadImage; // EFI 1.0+ + EFI_EXIT_BOOT_SERVICES ExitBootServices; // EFI 1.0+ + + // + // Miscellaneous Services + // + EFI_GET_NEXT_MONOTONIC_COUNT GetNextMonotonicCount; // EFI 1.0+ + EFI_STALL Stall; // EFI 1.0+ + EFI_SET_WATCHDOG_TIMER SetWatchdogTimer; // EFI 1.0+ + + // + // DriverSupport Services + // + EFI_CONNECT_CONTROLLER ConnectController; // EFI 1.1 + EFI_DISCONNECT_CONTROLLER DisconnectController; // EFI 1.1+ + + // + // Open and Close Protocol Services + // + EFI_OPEN_PROTOCOL OpenProtocol; // EFI 1.1+ + EFI_CLOSE_PROTOCOL CloseProtocol; // EFI 1.1+ + EFI_OPEN_PROTOCOL_INFORMATION OpenProtocolInformation; // EFI 1.1+ + + // + // Library Services + // + EFI_PROTOCOLS_PER_HANDLE ProtocolsPerHandle; // EFI 1.1+ + EFI_LOCATE_HANDLE_BUFFER LocateHandleBuffer; // EFI 1.1+ + EFI_LOCATE_PROTOCOL LocateProtocol; // EFI 1.1+ + EFI_UNINSTALL_MULTIPLE_PROTOCOL_INTERFACES + InstallMultipleProtocolInterfaces; // EFI 1.1+ + EFI_UNINSTALL_MULTIPLE_PROTOCOL_INTERFACES + UninstallMultipleProtocolInterfaces; // EFI 1.1+* + + // + // 32-bit CRC Services + // + EFI_CALCULATE_CRC32 CalculateCrc32; // EFI 1.1+ + + // + // Miscellaneous Services + // + EFI_COPY_MEM CopyMem; // EFI 1.1+ + EFI_SET_MEM SetMem; // EFI 1.1+ + EFI_CREATE_EVENT_EX CreateEventEx; // UEFI 2.0+ +} EFI_BOOT_SERVICES; + +#endif // LLVM_LIBC_TYPES_EFI_BOOT_SERVICES_H diff --git a/libc/include/llvm-libc-types/EFI_CAPSULE.h b/libc/include/llvm-libc-types/EFI_CAPSULE.h new file mode 100644 index 0000000000000..c7440c9b03b75 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_CAPSULE.h @@ -0,0 +1,26 @@ +//===-- Definition of EFI_CAPSULE type ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_CAPSULE_H +#define LLVM_LIBC_TYPES_EFI_CAPSULE_H + +#include "../llvm-libc-macros/stdint-macros.h" +#include "EFI_GUID.h" + +typedef struct { + EFI_GUID CapsuleGuid; + uint32_t HeaderSize; + uint32_t Flags; + uint32_t CapsuleImageSize; +} EFI_CAPSULE_HEADER; + +#define CAPSULE_FLAGS_PERSIST_ACROSS_RESET 0x00010000 +#define CAPSULE_FLAGS_POPULATE_SYSTEM_TABLE 0x00020000 +#define CAPSULE_FLAGS_INITIATE_RESET 0x00040000 + +#endif // LLVM_LIBC_TYPES_EFI_CAPSULE_H diff --git a/libc/include/llvm-libc-types/EFI_CONFIGURATION_TABLE.h b/libc/include/llvm-libc-types/EFI_CONFIGURATION_TABLE.h new file mode 100644 index 0000000000000..56cd3e4fbb587 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_CONFIGURATION_TABLE.h @@ -0,0 +1,19 @@ +//===-- Definition of EFI_CONFIGURATION_TABLE type ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_CONFIGURATION_TABLE_H +#define LLVM_LIBC_TYPES_EFI_CONFIGURATION_TABLE_H + +#include "EFI_GUID.h" + +typedef struct { + EFI_GUID VendorGuid; + void *VendorTable; +} EFI_CONFIGURATION_TABLE; + +#endif // LLVM_LIBC_TYPES_EFI_CONFIGURATION_TABLE_H diff --git a/libc/include/llvm-libc-types/EFI_DEVICE_PATH_PROTOCOL.h b/libc/include/llvm-libc-types/EFI_DEVICE_PATH_PROTOCOL.h new file mode 100644 index 0000000000000..f6a0b2e1f45c0 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_DEVICE_PATH_PROTOCOL.h @@ -0,0 +1,23 @@ +//===-- Definition of EFI_DEVICE_PATH_PROTOCOL type -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_DEVICE_PATH_PROTOCOL_H +#define LLVM_LIBC_TYPES_EFI_DEVICE_PATH_PROTOCOL_H + +#include "../llvm-libc-macros/stdint-macros.h" + +#define EFI_DEVICE_PATH_PROTOCOL_GUID \ + {0x09576e91, 0x6d3f, 0x11d2, {0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b}} + +typedef struct _EFI_DEVICE_PATH_PROTOCOL { + uint8_t Type; + uint8_t SubType; + uint8_t Length[2]; +} EFI_DEVICE_PATH_PROTOCOL; + +#endif // LLVM_LIBC_TYPES_EFI_DEVICE_PATH_PROTOCOL_H diff --git a/libc/include/llvm-libc-types/EFI_EVENT.h b/libc/include/llvm-libc-types/EFI_EVENT.h new file mode 100644 index 0000000000000..938856b8e791e --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_EVENT.h @@ -0,0 +1,21 @@ +//===-- Definition of EFI_EVENT type --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_EVENT_H +#define LLVM_LIBC_TYPES_EFI_EVENT_H + +typedef void *EFI_EVENT; + +#define EVT_TIMER 0x80000000 +#define EVT_RUNTIME 0x40000000 +#define EVT_NOTIFY_WAIT 0x00000100 +#define EVT_NOTIFY_SIGNAL 0x00000200 +#define EVT_SIGNAL_EXIT_BOOT_SERVICES 0x00000201 +#define EVT_SIGNAL_VIRTUAL_ADDRESS_CHANGE 0x60000202 + +#endif // LLVM_LIBC_TYPES_EFI_EVENT_H diff --git a/libc/include/llvm-libc-types/EFI_GUID.h b/libc/include/llvm-libc-types/EFI_GUID.h new file mode 100644 index 0000000000000..b3530008384dd --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_GUID.h @@ -0,0 +1,21 @@ +//===-- Definition of EFI_GUID type -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_GUID_H +#define LLVM_LIBC_TYPES_EFI_GUID_H + +#include "../llvm-libc-macros/stdint-macros.h" + +typedef struct { + uint32_t Data1; + uint16_t Data2; + uint16_t Data3; + uint8_t Data4[8]; +} EFI_GUID; + +#endif // LLVM_LIBC_TYPES_EFI_GUID_H diff --git a/libc/include/llvm-libc-types/EFI_HANDLE.h b/libc/include/llvm-libc-types/EFI_HANDLE.h new file mode 100644 index 0000000000000..d4376dd247533 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_HANDLE.h @@ -0,0 +1,14 @@ +//===-- Definition of EFI_HANDLE type ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_HANDLE_H +#define LLVM_LIBC_TYPES_EFI_HANDLE_H + +typedef void *EFI_HANDLE; + +#endif // LLVM_LIBC_TYPES_EFI_HANDLE_H diff --git a/libc/include/llvm-libc-types/EFI_INTERFACE_TYPE.h b/libc/include/llvm-libc-types/EFI_INTERFACE_TYPE.h new file mode 100644 index 0000000000000..d463c5381b3f0 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_INTERFACE_TYPE.h @@ -0,0 +1,16 @@ +//===-- Definition of EFI_INTERFACE_TYPE type -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_INTERFACE_TYPE_H +#define LLVM_LIBC_TYPES_EFI_INTERFACE_TYPE_H + +typedef enum { + EFI_NATIVE_INTERFACE, +} EFI_INTERFACE_TYPE; + +#endif // LLVM_LIBC_TYPES_EFI_INTERFACE_TYPE_H diff --git a/libc/include/llvm-libc-types/EFI_LOCATE_SEARCH_TYPE.h b/libc/include/llvm-libc-types/EFI_LOCATE_SEARCH_TYPE.h new file mode 100644 index 0000000000000..3a8fd7bc3e776 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_LOCATE_SEARCH_TYPE.h @@ -0,0 +1,18 @@ +//===-- Definition of EFI_LOCATE_SEARCH_TYPE type -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_LOCATE_SEARCH_TYPE_H +#define LLVM_LIBC_TYPES_EFI_LOCATE_SEARCH_TYPE_H + +typedef enum { + AllHandles, + ByRegisterNotify, + ByProtocol, +} EFI_LOCATE_SEARCH_TYPE; + +#endif // LLVM_LIBC_TYPES_EFI_LOCATE_SEARCH_TYPE_H diff --git a/libc/include/llvm-libc-types/EFI_MEMORY_DESCRIPTOR.h b/libc/include/llvm-libc-types/EFI_MEMORY_DESCRIPTOR.h new file mode 100644 index 0000000000000..72d0579aef76c --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_MEMORY_DESCRIPTOR.h @@ -0,0 +1,43 @@ +//===-- Definition of EFI_MEMORY_DESCRIPTOR type --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_MEMORY_DESCRIPTOR_H +#define LLVM_LIBC_TYPES_EFI_MEMORY_DESCRIPTOR_H + +#include "../llvm-libc-macros/stdint-macros.h" +#include "EFI_PHYSICAL_ADDRESS.h" +#include "EFI_VIRTUAL_ADDRESS.h" + +#define EFI_MEMORY_DESCRIPTOR_VERSION 1 + +#define EFI_MEMORY_UC 0x0000000000000001 +#define EFI_MEMORY_WC 0x0000000000000002 +#define EFI_MEMORY_WT 0x0000000000000004 +#define EFI_MEMORY_WB 0x0000000000000008 +#define EFI_MEMORY_UCE 0x0000000000000010 +#define EFI_MEMORY_WP 0x0000000000001000 +#define EFI_MEMORY_RP 0x0000000000002000 +#define EFI_MEMORY_XP 0x0000000000004000 +#define EFI_MEMORY_NV 0x0000000000008000 +#define EFI_MEMORY_MORE_RELIABLE 0x0000000000010000 +#define EFI_MEMORY_RO 0x0000000000020000 +#define EFI_MEMORY_SP 0x0000000000040000 +#define EFI_MEMORY_CPU_CRYPTO 0x0000000000080000 +#define EFI_MEMORY_RUNTIME 0x8000000000000000 +#define EFI_MEMORY_ISA_VALID 0x4000000000000000 +#define EFI_MEMORY_ISA_MASK 0x0FFFF00000000000 + +typedef struct { + uint32_t Type; + EFI_PHYSICAL_ADDRESS PhysicalStart; + EFI_VIRTUAL_ADDRESS VirtualStart; + uint64_t NumberOfPages; + uint64_t Attribute; +} EFI_MEMORY_DESCRIPTOR; + +#endif // LLVM_LIBC_TYPES_EFI_MEMORY_DESCRIPTOR_H diff --git a/libc/include/llvm-libc-types/EFI_MEMORY_TYPE.h b/libc/include/llvm-libc-types/EFI_MEMORY_TYPE.h new file mode 100644 index 0000000000000..c8921cda2c388 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_MEMORY_TYPE.h @@ -0,0 +1,32 @@ +//===-- Definition of EFI_MEMORY_TYPE type --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_MEMORY_TYPE_H +#define LLVM_LIBC_TYPES_EFI_MEMORY_TYPE_H + +typedef enum { + EfiReservedMemoryType, + EfiLoaderCode, + EfiLoaderData, + EfiBootServicesCode, + EfiBootServicesData, + EfiRuntimeServicesCode, + EfiRuntimeServicesData, + EfiConventionalMemory, + EfiUnusableMemory, + EfiACPIReclaimMemory, + EfiACPIMemoryNVS, + EfiMemoryMappedIO, + EfiMemoryMappedIOPortSpace, + EfiPalCode, + EfiPersistentMemory, + EfiUnacceptedMemoryType, + EfiMaxMemoryType +} EFI_MEMORY_TYPE; + +#endif // LLVM_LIBC_TYPES_EFI_MEMORY_TYPE_H diff --git a/libc/include/llvm-libc-types/EFI_OPEN_PROTOCOL_INFORMATION_ENTRY.h b/libc/include/llvm-libc-types/EFI_OPEN_PROTOCOL_INFORMATION_ENTRY.h new file mode 100644 index 0000000000000..de0c59c139efb --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_OPEN_PROTOCOL_INFORMATION_ENTRY.h @@ -0,0 +1,22 @@ +//===-- Definition of EFI_OPEN_PROTOCOL_INFORMATION_ENTRY type ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_OPEN_PROTOCOL_INFORMATION_ENTRY_H +#define LLVM_LIBC_TYPES_EFI_OPEN_PROTOCOL_INFORMATION_ENTRY_H + +#include "../llvm-libc-macros/stdint-macros.h" +#include "EFI_HANDLE.h" + +typedef struct { + EFI_HANDLE AgentHandle; + EFI_HANDLE ControllerHandle; + uint32_t Attributes; + uint32_t OpenCount; +} EFI_OPEN_PROTOCOL_INFORMATION_ENTRY; + +#endif // LLVM_LIBC_TYPES_EFI_OPEN_PROTOCOL_INFORMATION_ENTRY_H diff --git a/libc/include/llvm-libc-types/EFI_PHYSICAL_ADDRESS.h b/libc/include/llvm-libc-types/EFI_PHYSICAL_ADDRESS.h new file mode 100644 index 0000000000000..8880ee66c0f8d --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_PHYSICAL_ADDRESS.h @@ -0,0 +1,16 @@ +//===-- Definition of EFI_PHYSICAL_ADDRESS type ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_PHYSICAL_ADDRESS_H +#define LLVM_LIBC_TYPES_EFI_PHYSICAL_ADDRESS_H + +#include "../llvm-libc-macros/stdint-macros.h" + +typedef uint64_t EFI_PHYSICAL_ADDRESS; + +#endif // LLVM_LIBC_TYPES_EFI_PHYSICAL_ADDRESS_H diff --git a/libc/include/llvm-libc-types/EFI_RUNTIME_SERVICES.h b/libc/include/llvm-libc-types/EFI_RUNTIME_SERVICES.h new file mode 100644 index 0000000000000..8913118b0844c --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_RUNTIME_SERVICES.h @@ -0,0 +1,137 @@ +//===-- Definition of EFI_RUNTIME_SERVICES type ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_RUNTIME_SERVICES_H +#define LLVM_LIBC_TYPES_EFI_RUNTIME_SERVICES_H + +#include "../llvm-libc-macros/EFIAPI-macros.h" +#include "../llvm-libc-macros/stdint-macros.h" +#include "EFI_CAPSULE.h" +#include "EFI_MEMORY_DESCRIPTOR.h" +#include "EFI_PHYSICAL_ADDRESS.h" +#include "EFI_STATUS.h" +#include "EFI_TABLE_HEADER.h" +#include "EFI_TIME.h" +#include "char16_t.h" +#include "size_t.h" + +#define EFI_RUNTIME_SERVICES_SIGNATURE 0x56524553544e5552 +#define EFI_RUNTIME_SERVICES_REVISION EFI_SPECIFICATION_VERSION + +#define EFI_VARIABLE_NON_VOLATILE 0x00000001 +#define EFI_VARIABLE_BOOTSERVICE_ACCESS 0x00000002 +#define EFI_VARIABLE_RUNTIME_ACCESS 0x00000004 +#define EFI_VARIABLE_HARDWARE_ERROR_RECORD 0x00000008 +// This attribute is identified by the mnemonic 'HR' elsewhere +// in this specification. +#define EFI_VARIABLE_AUTHENTICATED_WRITE_ACCESS 0x00000010 +// NOTE: EFI_VARIABLE_AUTHENTICATED_WRITE_ACCESS is deprecated +// and should be considered reserved. +#define EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS 0x00000020 +#define EFI_VARIABLE_APPEND_WRITE 0x00000040 +#define EFI_VARIABLE_ENHANCED_AUTHENTICATED_ACCESS 0x00000080 + +typedef enum { + EfiResetCold, + EfiResetWarm, + EfiResetShutdown, + EfiResetPlatformSpecific, +} EFI_RESET_TYPE; + +#define EFI_VARIABLE_AUTHENTICATION_3_CERT_ID_SHA256 1 + +typedef struct { + uint8_t Type; + uint32_t IdSize; + // Value is defined as: + // uint8_t Id[IdSize]; +} EFI_VARIABLE_AUTHENTICATION_3_CERT_ID; + +typedef EFI_STATUS(EFIAPI *EFI_GET_TIME)(EFI_TIME *Time, + EFI_TIME_CAPABILITIES *Capabilities); +typedef EFI_STATUS(EFIAPI *EFI_SET_TIME)(EFI_TIME *Time); +typedef EFI_STATUS(EFIAPI *EFI_GET_WAKEUP_TIME)(bool *Enabled, bool *Pending, + EFI_TIME *Time); +typedef EFI_STATUS(EFIAPI *EFI_SET_WAKEUP_TIME)(bool *Enabled, EFI_TIME *Time); + +typedef EFI_STATUS(EFIAPI *EFI_SET_VIRTUAL_ADDRESS_MAP)( + size_t MemoryMapSize, size_t DescriptorSize, uint32_t DescriptorVersion, + EFI_MEMORY_DESCRIPTOR *VirtualMap); +typedef EFI_STATUS(EFIAPI *EFI_CONVERT_POINTER)(size_t DebugDisposition, + void **Address); + +typedef EFI_STATUS(EFIAPI *EFI_GET_VARIABLE)(char16_t *VariableName, + EFI_GUID *VendorGuid, + uint32_t *Attributes, + size_t *DataSize, void *Data); +typedef EFI_STATUS(EFIAPI *EFI_GET_NEXT_VARIABLE_NAME)(size_t *VariableNameSize, + char16_t *VariableName, + EFI_GUID *VendorGuid); +typedef EFI_STATUS(EFIAPI *EFI_SET_VARIABLE)(char16_t *VariableName, + EFI_GUID *VendorGuid, + uint32_t Attributes, + size_t DataSize, void *Data); + +typedef EFI_STATUS(EFIAPI *EFI_GET_NEXT_HIGH_MONO_COUNT)(uint32_t *HighCount); +typedef void(EFIAPI *EFI_RESET_SYSTEM)(EFI_RESET_TYPE ResetType, + EFI_STATUS ResetStatus, size_t DataSize, + void *ResetData); + +typedef EFI_STATUS(EFIAPI *EFI_UPDATE_CAPSULE)( + EFI_CAPSULE_HEADER **CapsuleHeaderArray, size_t CapsuleCount, + EFI_PHYSICAL_ADDRESS ScatterGatherList); +typedef EFI_STATUS(EFIAPI *EFI_QUERY_CAPSULE_CAPABILITIES)( + EFI_CAPSULE_HEADER **CapsuleHeaderArray, size_t CapsuleCount, + uint64_t *MaximumCapsuleSize, EFI_RESET_TYPE ResetType); + +typedef EFI_STATUS(EFIAPI *EFI_QUERY_VARIABLE_INFO)( + uint32_t Attributes, uint64_t *MaximumVariableStorageSize, + uint64_t *RemainingVariableStorageSize, uint64_t *MaximumVariableSize); + +typedef struct { + EFI_TABLE_HEADER Hdr; + + /// + /// Time Services + EFI_GET_TIME GetTime; + EFI_SET_TIME SetTime; + EFI_GET_WAKEUP_TIME GetWakeupTime; + EFI_SET_WAKEUP_TIME SetWakeupTime; + + // + // Virtual Memory Services + // + EFI_SET_VIRTUAL_ADDRESS_MAP SetVirtualAddressMap; + EFI_CONVERT_POINTER ConvertPointer; + + // + // Variable Services + // + EFI_GET_VARIABLE GetVariable; + EFI_GET_NEXT_VARIABLE_NAME GetNextVariableName; + EFI_SET_VARIABLE SetVariable; + + // + // Miscellaneous Services + // + EFI_GET_NEXT_HIGH_MONO_COUNT GetNextHighMonotonicCount; + EFI_RESET_SYSTEM ResetSystem; + + // + // UEFI 2.0 Capsule Services + // + EFI_UPDATE_CAPSULE UpdateCapsule; + EFI_QUERY_CAPSULE_CAPABILITIES QueryCapsuleCapabilities; + + // + // Miscellaneous UEFI 2.0 Service + // + EFI_QUERY_VARIABLE_INFO QueryVariableInfo; +} EFI_RUNTIME_SERVICES; + +#endif // LLVM_LIBC_TYPES_EFI_RUNTIME_SERVICES_H diff --git a/libc/include/llvm-libc-types/EFI_SIMPLE_TEXT_INPUT_PROTOCOL.h b/libc/include/llvm-libc-types/EFI_SIMPLE_TEXT_INPUT_PROTOCOL.h new file mode 100644 index 0000000000000..a6dc0952b6310 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_SIMPLE_TEXT_INPUT_PROTOCOL.h @@ -0,0 +1,39 @@ +//===-- Definition of EFI_SIMPLE_TEXT_INPUT_PROTOCOL type -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_SIMPLE_TEXT_INPUT_PROTOCOL_H +#define LLVM_LIBC_TYPES_EFI_SIMPLE_TEXT_INPUT_PROTOCOL_H + +#include "../llvm-libc-macros/EFIAPI-macros.h" +#include "../llvm-libc-macros/stdint-macros.h" +#include "EFI_EVENT.h" +#include "EFI_STATUS.h" +#include "char16_t.h" + +#define EFI_SIMPLE_TEXT_INPUT_PROTOCOL_GUID \ + {0x387477c1, 0x69c7, 0x11d2, {0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b}} + +typedef struct { + uint16_t ScanCode; + char16_t UnicodeChar; +} EFI_INPUT_KEY; + +struct _EFI_SIMPLE_TEXT_INPUT_PROTOCOL; + +typedef EFI_STATUS(EFIAPI *EFI_INPUT_RESET)( + struct _EFI_SIMPLE_TEXT_INPUT_PROTOCOL *This, bool ExtendedVerification); +typedef EFI_STATUS(EFIAPI *EFI_INPUT_READ_KEY)( + struct _EFI_SIMPLE_TEXT_INPUT_PROTOCOL *This, EFI_INPUT_KEY *Key); + +typedef struct _EFI_SIMPLE_TEXT_INPUT_PROTOCOL { + EFI_INPUT_RESET Reset; + EFI_INPUT_READ_KEY ReadKeyStroke; + EFI_EVENT WaitForKey; +} EFI_SIMPLE_TEXT_INPUT_PROTOCOL; + +#endif // LLVM_LIBC_TYPES_EFI_SIMPLE_TEXT_INPUT_PROTOCOL_H diff --git a/libc/include/llvm-libc-types/EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL.h b/libc/include/llvm-libc-types/EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL.h new file mode 100644 index 0000000000000..b5014c46a0722 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL.h @@ -0,0 +1,64 @@ +//===-- Definition of EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL type ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL_H +#define LLVM_LIBC_TYPES_EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL_H + +#include "../llvm-libc-macros/stdint-macros.h" +#include "EFI_STATUS.h" +#include "size_t.h" + +#define EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL_GUID \ + {0x387477c2, 0x69c7, 0x11d2, {0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b}} + +struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL; + +typedef EFI_STATUS(EFIAPI *EFI_TEXT_RESET)( + struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *This, bool ExtendedVerification); +typedef EFI_STATUS(EFIAPI *EFI_TEXT_STRING)( + struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *This, const char16_t *String); +typedef EFI_STATUS(EFIAPI *EFI_TEXT_TEST_STRING)( + struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *This, const char16_t *String); +typedef EFI_STATUS(EFIAPI *EFI_TEXT_QUERY_MODE)( + struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *This, size_t ModeNumber, + size_t *Columns, size_t *Rows); + +typedef EFI_STATUS(EFIAPI *EFI_TEXT_SET_MODE)( + struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *This, size_t ModeNumber); +typedef EFI_STATUS(EFIAPI *EFI_TEXT_SET_ATTRIBUTE)( + struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *This, size_t Attribute); +typedef EFI_STATUS(EFIAPI *EFI_TEXT_CLEAR_SCREEN)( + struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *This); +typedef EFI_STATUS(EFIAPI *EFI_TEXT_SET_CURSOR_POSITION)( + struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *This, size_t Column, size_t Row); +typedef EFI_STATUS(EFIAPI *EFI_TEXT_ENABLE_CURSOR)( + struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *This, bool Visible); + +typedef struct { + int32_t MaxMode; + int32_t Mode; + int32_t Attribute; + int32_t CursorColumn; + int32_t CursorRow; + bool CursorVisible; +} SIMPLE_TEXT_OUTPUT_MODE; + +typedef struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL { + EFI_TEXT_RESET Reset; + EFI_TEXT_STRING OutputString; + EFI_TEXT_TEST_STRING TestString; + EFI_TEXT_QUERY_MODE QueryMode; + EFI_TEXT_SET_MODE SetMode; + EFI_TEXT_SET_ATTRIBUTE SetAttribute; + EFI_TEXT_CLEAR_SCREEN ClearScreen; + EFI_TEXT_SET_CURSOR_POSITION SetCursorPosition; + EFI_TEXT_ENABLE_CURSOR EnableCursor; + SIMPLE_TEXT_OUTPUT_MODE *Mode; +} EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL; + +#endif // LLVM_LIBC_TYPES_EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL_H diff --git a/libc/include/llvm-libc-types/EFI_STATUS.h b/libc/include/llvm-libc-types/EFI_STATUS.h new file mode 100644 index 0000000000000..f7fa6e52381e1 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_STATUS.h @@ -0,0 +1,16 @@ +//===-- Definition of EFI_STATUS type ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_STATUS_H +#define LLVM_LIBC_TYPES_EFI_STATUS_H + +#include "size_t.h" + +typedef size_t EFI_STATUS; + +#endif // LLVM_LIBC_TYPES_EFI_STATUS_H diff --git a/libc/include/llvm-libc-types/EFI_SYSTEM_TABLE.h b/libc/include/llvm-libc-types/EFI_SYSTEM_TABLE.h new file mode 100644 index 0000000000000..290067ad862e1 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_SYSTEM_TABLE.h @@ -0,0 +1,65 @@ +//===-- Definition of EFI_SYSTEM_TABLE type -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_SYSTEM_TABLE_H +#define LLVM_LIBC_TYPES_EFI_SYSTEM_TABLE_H + +#include "../llvm-libc-macros/stdint-macros.h" +#include "EFI_BOOT_SERVICES.h" +#include "EFI_CONFIGURATION_TABLE.h" +#include "EFI_HANDLE.h" +#include "EFI_RUNTIME_SERVICES.h" +#include "EFI_SIMPLE_TEXT_INPUT_PROTOCOL.h" +#include "EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL.h" +#include "EFI_STATUS.h" +#include "EFI_TABLE_HEADER.h" + +#include "char16_t.h" +#include "size_t.h" + +#define EFI_SYSTEM_TABLE_SIGNATURE 0x5453595320494249 +#define EFI_2_100_SYSTEM_TABLE_REVISION ((2 << 16) | (100)) +#define EFI_2_90_SYSTEM_TABLE_REVISION ((2 << 16) | (90)) +#define EFI_2_80_SYSTEM_TABLE_REVISION ((2 << 16) | (80)) +#define EFI_2_70_SYSTEM_TABLE_REVISION ((2 << 16) | (70)) +#define EFI_2_60_SYSTEM_TABLE_REVISION ((2 << 16) | (60)) +#define EFI_2_50_SYSTEM_TABLE_REVISION ((2 << 16) | (50)) +#define EFI_2_40_SYSTEM_TABLE_REVISION ((2 << 16) | (40)) +#define EFI_2_31_SYSTEM_TABLE_REVISION ((2 << 16) | (31)) +#define EFI_2_30_SYSTEM_TABLE_REVISION ((2 << 16) | (30)) +#define EFI_2_20_SYSTEM_TABLE_REVISION ((2 << 16) | (20)) +#define EFI_2_10_SYSTEM_TABLE_REVISION ((2 << 16) | (10)) +#define EFI_2_00_SYSTEM_TABLE_REVISION ((2 << 16) | (00)) +#define EFI_1_10_SYSTEM_TABLE_REVISION ((1 << 16) | (10)) +#define EFI_1_02_SYSTEM_TABLE_REVISION ((1 << 16) | (02)) +#define EFI_SPECIFICATION_VERSION EFI_SYSTEM_TABLE_REVISION +#define EFI_SYSTEM_TABLE_REVISION EFI_2_100_SYSTEM_TABLE_REVISION + +typedef struct { + EFI_TABLE_HEADER Hdr; + + char16_t *FirmwareVendor; + uint32_t FirmwareRevision; + + EFI_HANDLE ConsoleInHandle; + EFI_SIMPLE_TEXT_INPUT_PROTOCOL *ConIn; + + EFI_HANDLE ConsoleOutHandle; + EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *ConOut; + + EFI_HANDLE StandardErrorHandle; + EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *StdErr; + + EFI_RUNTIME_SERVICES *RuntimeServices; + EFI_BOOT_SERVICES *BootServices; + + size_t NumberOfTableEntries; + EFI_CONFIGURATION_TABLE *ConfigurationTable; +} EFI_SYSTEM_TABLE; + +#endif // LLVM_LIBC_TYPES_EFI_SYSTEM_TABLE_H diff --git a/libc/include/llvm-libc-types/EFI_TABLE_HEADER.h b/libc/include/llvm-libc-types/EFI_TABLE_HEADER.h new file mode 100644 index 0000000000000..293968ecc4d1b --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_TABLE_HEADER.h @@ -0,0 +1,22 @@ +//===-- Definition of EFI_TABLE_HEADER type -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_TABLE_HEADER_H +#define LLVM_LIBC_TYPES_EFI_TABLE_HEADER_H + +#include "../llvm-libc-macros/stdint-macros.h" + +typedef struct { + uint64_t Signature; + uint32_t Revision; + uint32_t HeaderSize; + uint32_t CRC32; + uint32_t Reserved; +} EFI_TABLE_HEADER; + +#endif // LLVM_LIBC_TYPES_EFI_TABLE_HEADER_H diff --git a/libc/include/llvm-libc-types/EFI_TIME.h b/libc/include/llvm-libc-types/EFI_TIME.h new file mode 100644 index 0000000000000..b0e38b987d44e --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_TIME.h @@ -0,0 +1,37 @@ +//===-- Definition of EFI_TIME type ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_TIME_H +#define LLVM_LIBC_TYPES_EFI_TIME_H + +#include "../llvm-libc-macros/stdint-macros.h" + +typedef struct { + uint16_t Year; // 1900 - 9999 + uint8_t Month; // 1 - 12 + uint8_t Day; // 1 - 31 + uint8_t Hour; // 0 - 23 + uint8_t Minute; // 0 - 59 + uint8_t Second; // 0 - 59 + uint8_t Pad1; + uint32_t Nanosecond; // 0 - 999,999,999 + int16_t TimeZone; // --1440 to 1440 or 2047 +} EFI_TIME; + +#define EFI_TIME_ADJUST_DAYLIGHT 0x01 +#define EFI_TIME_IN_DAYLIGHT 0x02 + +#define EFI_UNSPECIFIED_TIMEZONE 0x07FF + +typedef struct { + uint32_t Resolution; + uint32_t Accuracy; + bool SetsToZero; +} EFI_TIME_CAPABILITIES; + +#endif // LLVM_LIBC_TYPES_EFI_TIME_H diff --git a/libc/include/llvm-libc-types/EFI_TIMER_DELAY.h b/libc/include/llvm-libc-types/EFI_TIMER_DELAY.h new file mode 100644 index 0000000000000..2a6872c69c8b3 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_TIMER_DELAY.h @@ -0,0 +1,18 @@ +//===-- Definition of EFI_TIMER_DELAY type --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_TIMER_DELAY_H +#define LLVM_LIBC_TYPES_EFI_TIMER_DELAY_H + +typedef enum { + TimerCancel, + TimerPeriodic, + TimerRelative, +} EFI_TIMER_DELAY; + +#endif // LLVM_LIBC_TYPES_EFI_TIMER_DELAY_H diff --git a/libc/include/llvm-libc-types/EFI_TPL.h b/libc/include/llvm-libc-types/EFI_TPL.h new file mode 100644 index 0000000000000..8361ccfacd6f5 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_TPL.h @@ -0,0 +1,21 @@ +//===-- Definition of EFI_TPL type ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_TPL_H +#define LLVM_LIBC_TYPES_EFI_TPL_H + +#include "size_t.h" + +typedef size_t EFI_TPL; + +#define TPL_APPLICATION 4 +#define TPL_CALLBACK 8 +#define TPL_NOTIFY 16 +#define TPL_HIGH_LEVEL 31 + +#endif // LLVM_LIBC_TYPES_EFI_TPL_H diff --git a/libc/include/llvm-libc-types/EFI_VIRTUAL_ADDRESS.h b/libc/include/llvm-libc-types/EFI_VIRTUAL_ADDRESS.h new file mode 100644 index 0000000000000..46cbec734dadc --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_VIRTUAL_ADDRESS.h @@ -0,0 +1,16 @@ +//===-- Definition of EFI_VIRTUAL_ADDRESS type ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_VIRTUAL_ADDRESS_H +#define LLVM_LIBC_TYPES_EFI_VIRTUAL_ADDRESS_H + +#include "../llvm-libc-macros/stdint-macros.h" + +typedef uint64_t EFI_VIRTUAL_ADDRESS; + +#endif // LLVM_LIBC_TYPES_EFI_VIRTUAL_ADDRESS_H diff --git a/libcxx/include/__algorithm/simd_utils.h b/libcxx/include/__algorithm/simd_utils.h index 4e03723a32854..e3c790998e902 100644 --- a/libcxx/include/__algorithm/simd_utils.h +++ b/libcxx/include/__algorithm/simd_utils.h @@ -15,8 +15,6 @@ #include <__bit/countr.h> #include <__config> #include <__cstddef/size_t.h> -#include <__type_traits/is_arithmetic.h> -#include <__type_traits/is_same.h> #include <__utility/integer_sequence.h> #include @@ -78,7 +76,7 @@ using __get_as_integer_type_t _LIBCPP_NODEBUG = typename __get_as_integer_type_i # if defined(__AVX__) || defined(__MVS__) template inline constexpr size_t __native_vector_size = 32 / sizeof(_Tp); -# elif defined(__SSE__) || defined(__ARM_NEON__) +# elif defined(__SSE__) || defined(__ARM_NEON) template inline constexpr size_t __native_vector_size = 16 / sizeof(_Tp); # elif defined(__MMX__) diff --git a/libcxx/include/__locale_dir/support/linux.h b/libcxx/include/__locale_dir/support/linux.h index f1662c0112603..fa0b03c646a2a 100644 --- a/libcxx/include/__locale_dir/support/linux.h +++ b/libcxx/include/__locale_dir/support/linux.h @@ -95,12 +95,22 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __ } inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { +#if !_LIBCPP_HAS_MUSL_LIBC return ::strtoll_l(__nptr, __endptr, __base, __loc); +#else + (void)__loc; + return ::strtoll(__nptr, __endptr, __base); +#endif } inline _LIBCPP_HIDE_FROM_ABI unsigned long long __strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { +#if !_LIBCPP_HAS_MUSL_LIBC return ::strtoull_l(__nptr, __endptr, __base, __loc); +#else + (void)__loc; + return ::strtoull(__nptr, __endptr, __base); +#endif } // diff --git a/libcxx/test/libcxx/xopen_source.gen.py b/libcxx/test/libcxx/xopen_source.gen.py index 3f2686483730a..d4a3651181ca7 100644 --- a/libcxx/test/libcxx/xopen_source.gen.py +++ b/libcxx/test/libcxx/xopen_source.gen.py @@ -43,6 +43,9 @@ // recent value of _XOPEN_SOURCE. // UNSUPPORTED: LIBCXX-AIX-FIXME +// This test fails on FreeBSD for an unknown reason. +// UNSUPPORTED: LIBCXX-FREEBSD-FIXME + {lit_header_restrictions.get(header, '')} {lit_header_undeprecations.get(header, '')} diff --git a/libcxx/test/std/input.output/iostream.format/std.manip/setfill_wchar_max.pass.cpp b/libcxx/test/std/input.output/iostream.format/std.manip/setfill_wchar_max.pass.cpp index 9d4126153cc23..82842a75827ac 100644 --- a/libcxx/test/std/input.output/iostream.format/std.manip/setfill_wchar_max.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/std.manip/setfill_wchar_max.pass.cpp @@ -16,7 +16,7 @@ // XFAIL: target={{.*}}-windows{{.*}} && libcpp-abi-version=1 // XFAIL: target=armv{{7|8}}{{l?}}{{.*}}-linux-gnueabihf && libcpp-abi-version=1 -// XFAIL: target=aarch64{{.*}}-linux-gnu && libcpp-abi-version=1 +// XFAIL: target=aarch64{{.*}}-linux{{.*}} && libcpp-abi-version=1 #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.match/awk.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/awk.locale.pass.cpp index 57b8c13aa3c14..879597b2f80fd 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/awk.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/awk.locale.pass.cpp @@ -18,7 +18,7 @@ // TODO: investigation needed // TODO(netbsd): incomplete support for locales -// XFAIL: target={{.*}}-linux-gnu{{.*}}, netbsd, freebsd +// XFAIL: target={{.*}}-linux{{.*}}, netbsd, freebsd // REQUIRES: locale.cs_CZ.ISO8859-2 #include diff --git a/libcxx/test/std/re/re.alg/re.alg.match/basic.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/basic.locale.pass.cpp index 430d35fe739e5..59fb1c48e15d3 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/basic.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/basic.locale.pass.cpp @@ -22,7 +22,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd +// XFAIL: target={{.*}}-linux{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.match/ecma.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/ecma.locale.pass.cpp index b512fa9b5fcf8..0a966759eac3b 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/ecma.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/ecma.locale.pass.cpp @@ -22,7 +22,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd +// XFAIL: target={{.*}}-linux{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.match/extended.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/extended.locale.pass.cpp index 472dc19680263..87ff1e5b6ef12 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/extended.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/extended.locale.pass.cpp @@ -22,7 +22,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd +// XFAIL: target={{.*}}-linux{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.search/awk.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/awk.locale.pass.cpp index 9125df404b1de..c4b211e613bec 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/awk.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/awk.locale.pass.cpp @@ -22,7 +22,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd +// XFAIL: target={{.*}}-linux{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.search/basic.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/basic.locale.pass.cpp index f85b6a40ce129..56cf2e6a61ff3 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/basic.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/basic.locale.pass.cpp @@ -22,7 +22,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd +// XFAIL: target={{.*}}-linux{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.search/ecma.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/ecma.locale.pass.cpp index aa9441cb3e58f..4655a5c2e0ee6 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/ecma.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/ecma.locale.pass.cpp @@ -22,7 +22,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd +// XFAIL: target={{.*}}-linux{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.search/extended.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/extended.locale.pass.cpp index 9746e45f29da5..7bc8a537ca228 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/extended.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/extended.locale.pass.cpp @@ -22,7 +22,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd +// XFAIL: target={{.*}}-linux{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.traits/lookup_collatename.pass.cpp b/libcxx/test/std/re/re.traits/lookup_collatename.pass.cpp index 178979d5b9ce8..3cbbaef9c81b5 100644 --- a/libcxx/test/std/re/re.traits/lookup_collatename.pass.cpp +++ b/libcxx/test/std/re/re.traits/lookup_collatename.pass.cpp @@ -23,7 +23,7 @@ // lookup_collatename(ForwardIterator first, ForwardIterator last) const; // TODO: investigation needed -// XFAIL: target={{.*}}-linux-gnu{{.*}} +// XFAIL: target={{.*}}-linux{{.*}} #include #include diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py index ab8c2fcaf034b..6615c3353ffe4 100755 --- a/lldb/examples/python/crashlog.py +++ b/lldb/examples/python/crashlog.py @@ -296,7 +296,7 @@ class DarwinImage(symbolication.Image): except: dsymForUUIDBinary = "" - dwarfdump_uuid_regex = re.compile("UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") + dwarfdump_uuid_regex = re.compile(r"UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") def __init__( self, text_addr_lo, text_addr_hi, identifier, version, uuid, path, verbose @@ -501,7 +501,7 @@ def find_image_with_identifier(self, identifier): for image in self.images: if image.identifier == identifier: return image - regex_text = "^.*\.%s$" % (re.escape(identifier)) + regex_text = r"^.*\.%s$" % (re.escape(identifier)) regex = re.compile(regex_text) for image in self.images: if regex.match(image.identifier): @@ -925,7 +925,7 @@ def get(cls): version = r"(?:" + super().version + r"\s+)?" address = r"(0x[0-9a-fA-F]{4,})" # 4 digits or more - symbol = """ + symbol = r""" (?: [ ]+ (?P.+) @@ -1095,7 +1095,7 @@ def parse_normal(self, line): self.crashlog.process_identifier = line[11:].strip() elif line.startswith("Version:"): version_string = line[8:].strip() - matched_pair = re.search("(.+)\((.+)\)", version_string) + matched_pair = re.search(r"(.+)\((.+)\)", version_string) if matched_pair: self.crashlog.process_version = matched_pair.group(1) self.crashlog.process_compatability_version = matched_pair.group(2) diff --git a/lldb/examples/python/delta.py b/lldb/examples/python/delta.py index eeb3c177cfa90..f847b95ab119f 100755 --- a/lldb/examples/python/delta.py +++ b/lldb/examples/python/delta.py @@ -99,7 +99,7 @@ def parse_log_file(file, options): print("# Log file: '%s'" % file) print("#----------------------------------------------------------------------") - timestamp_regex = re.compile("(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$") + timestamp_regex = re.compile(r"(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$") base_time = 0.0 last_time = 0.0 diff --git a/lldb/examples/python/gdbremote.py b/lldb/examples/python/gdbremote.py index 40ee15853fdb2..0bbfc1a0f1eed 100755 --- a/lldb/examples/python/gdbremote.py +++ b/lldb/examples/python/gdbremote.py @@ -1537,12 +1537,12 @@ def parse_gdb_log(file, options): a long time during a preset set of debugger commands.""" tricky_commands = ["qRegisterInfo"] - timestamp_regex = re.compile("(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$") + timestamp_regex = re.compile(r"(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$") packet_name_regex = re.compile("([A-Za-z_]+)[^a-z]") packet_transmit_name_regex = re.compile( "(?Psend|read) packet: (?P.*)" ) - packet_contents_name_regex = re.compile("\$([^#]*)#[0-9a-fA-F]{2}") + packet_contents_name_regex = re.compile(r"\$([^#]*)#[0-9a-fA-F]{2}") packet_checksum_regex = re.compile(".*#[0-9a-fA-F]{2}$") packet_names_regex_str = "(" + "|".join(gdb_remote_commands.keys()) + ")(.*)" packet_names_regex = re.compile(packet_names_regex_str) diff --git a/lldb/examples/python/jump.py b/lldb/examples/python/jump.py index e086df5fd1528..8d52bd9af43f6 100644 --- a/lldb/examples/python/jump.py +++ b/lldb/examples/python/jump.py @@ -38,7 +38,7 @@ def parse_linespec(linespec, frame, result): ) if not matched: - mo = re.match("^\+([0-9]+)$", linespec) + mo = re.match(r"^\+([0-9]+)$", linespec) if mo is not None: matched = True # print "Matched +" @@ -54,7 +54,7 @@ def parse_linespec(linespec, frame, result): ) if not matched: - mo = re.match("^\-([0-9]+)$", linespec) + mo = re.match(r"^\-([0-9]+)$", linespec) if mo is not None: matched = True # print "Matched -" @@ -79,7 +79,7 @@ def parse_linespec(linespec, frame, result): breakpoint = target.BreakpointCreateByLocation(file_name, line_number) if not matched: - mo = re.match("\*((0x)?([0-9a-f]+))$", linespec) + mo = re.match(r"\*((0x)?([0-9a-f]+))$", linespec) if mo is not None: matched = True # print "Matched " diff --git a/lldb/examples/python/performance.py b/lldb/examples/python/performance.py index 869a0b061cf85..b86b5a52522e0 100755 --- a/lldb/examples/python/performance.py +++ b/lldb/examples/python/performance.py @@ -346,7 +346,7 @@ def __init__(self, pid): def Measure(self): output = subprocess.getoutput(self.command).split("\n")[-1] - values = re.split("[-+\s]+", output) + values = re.split(r"[-+\s]+", output) for idx, stat in enumerate(values): multiplier = 1 if stat: diff --git a/lldb/examples/python/symbolication.py b/lldb/examples/python/symbolication.py index f6dcc8b9a7943..b16745ee963c9 100755 --- a/lldb/examples/python/symbolication.py +++ b/lldb/examples/python/symbolication.py @@ -177,9 +177,9 @@ class Section: """Class that represents an load address range""" sect_info_regex = re.compile("(?P[^=]+)=(?P.*)") - addr_regex = re.compile("^\s*(?P0x[0-9A-Fa-f]+)\s*$") + addr_regex = re.compile(r"^\s*(?P0x[0-9A-Fa-f]+)\s*$") range_regex = re.compile( - "^\s*(?P0x[0-9A-Fa-f]+)\s*(?P[-+])\s*(?P0x[0-9A-Fa-f]+)\s*$" + r"^\s*(?P0x[0-9A-Fa-f]+)\s*(?P[-+])\s*(?P0x[0-9A-Fa-f]+)\s*$" ) def __init__(self, start_addr=None, end_addr=None, name=None): @@ -557,7 +557,7 @@ def find_images_with_identifier(self, identifier): if image.identifier == identifier: images.append(image) if len(images) == 0: - regex_text = "^.*\.%s$" % (re.escape(identifier)) + regex_text = r"^.*\.%s$" % (re.escape(identifier)) regex = re.compile(regex_text) for image in self.images: if regex.match(image.identifier): diff --git a/lldb/packages/Python/lldbsuite/test/lldbpexpect.py b/lldb/packages/Python/lldbsuite/test/lldbpexpect.py index 998a080565b6b..3279e1fd39f8c 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbpexpect.py +++ b/lldb/packages/Python/lldbsuite/test/lldbpexpect.py @@ -104,4 +104,4 @@ def cursor_forward_escape_seq(self, chars_to_move): Returns the escape sequence to move the cursor forward/right by a certain amount of characters. """ - return b"\x1b\[" + str(chars_to_move).encode("utf-8") + b"C" + return b"\x1b\\[" + str(chars_to_move).encode("utf-8") + b"C" diff --git a/lldb/packages/Python/lldbsuite/test/test_categories.py b/lldb/packages/Python/lldbsuite/test/test_categories.py index 036bda9c957d1..b585f695adeab 100644 --- a/lldb/packages/Python/lldbsuite/test/test_categories.py +++ b/lldb/packages/Python/lldbsuite/test/test_categories.py @@ -31,7 +31,7 @@ "libc++": "Test for libc++ data formatters", "libstdcxx": "Test for libstdcxx data formatters", "lldb-server": "Tests related to lldb-server", - "lldb-dap": "Tests for the Debug Adaptor Protocol with lldb-dap", + "lldb-dap": "Tests for the Debug Adapter Protocol with lldb-dap", "llgs": "Tests for the gdb-server functionality of lldb-server", "pexpect": "Tests requiring the pexpect library to be available", "objc": "Tests related to the Objective-C programming language support", diff --git a/lldb/packages/Python/lldbsuite/test/test_runner/process_control.py b/lldb/packages/Python/lldbsuite/test/test_runner/process_control.py index 07c17993bc878..8ab219a92d99d 100644 --- a/lldb/packages/Python/lldbsuite/test/test_runner/process_control.py +++ b/lldb/packages/Python/lldbsuite/test/test_runner/process_control.py @@ -91,7 +91,7 @@ def timeout_to_seconds(timeout): class ProcessHelper(object): - """Provides an interface for accessing process-related functionality. + r"""Provides an interface for accessing process-related functionality. This class provides a factory method that gives the caller a platform-specific implementation instance of the class. diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py index 391378cf027bc..9471594b66012 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py @@ -76,7 +76,7 @@ def read_packet(f, verbose=False, trace_file=None): if verbose: print('json: "%s"' % (json_str)) if trace_file: - trace_file.write("from adaptor:\n%s\n" % (json_str)) + trace_file.write("from adapter:\n%s\n" % (json_str)) # Decode the JSON bytes into a python dictionary return json.loads(json_str) @@ -259,7 +259,7 @@ def handle_recv_packet(self, packet): def send_packet(self, command_dict, set_sequence=True): """Take the "command_dict" python dictionary and encode it as a JSON string and send the contents as a packet to the VSCode debug - adaptor""" + adapter""" # Set the sequence ID for this command automatically if set_sequence: command_dict["seq"] = self.sequence @@ -267,7 +267,7 @@ def send_packet(self, command_dict, set_sequence=True): # Encode our command dictionary as a JSON string json_str = json.dumps(command_dict, separators=(",", ":")) if self.trace_file: - self.trace_file.write("to adaptor:\n%s\n" % (json_str)) + self.trace_file.write("to adapter:\n%s\n" % (json_str)) length = len(json_str) if length > 0: # Send the encoded JSON packet and flush the 'send' file @@ -275,7 +275,7 @@ def send_packet(self, command_dict, set_sequence=True): self.send.flush() def recv_packet(self, filter_type=None, filter_event=None, timeout=None): - """Get a JSON packet from the VSCode debug adaptor. This function + """Get a JSON packet from the VSCode debug adapter. This function assumes a thread that reads packets is running and will deliver any received packets by calling handle_recv_packet(...). This function will wait for the packet to arrive and return it when @@ -1184,7 +1184,7 @@ def request_setInstructionBreakpoints(self, memory_reference=[]): return self.send_recv(command_dict) -class DebugAdaptorServer(DebugCommunication): +class DebugAdapterServer(DebugCommunication): def __init__( self, executable=None, @@ -1196,7 +1196,7 @@ def __init__( self.process = None self.connection = None if executable is not None: - process, connection = DebugAdaptorServer.launch( + process, connection = DebugAdapterServer.launch( executable=executable, connection=connection, env=env, log_file=log_file ) self.process = process @@ -1224,12 +1224,12 @@ def __init__( @classmethod def launch(cls, /, executable, env=None, log_file=None, connection=None): - adaptor_env = os.environ.copy() + adapter_env = os.environ.copy() if env is not None: - adaptor_env.update(env) + adapter_env.update(env) if log_file: - adaptor_env["LLDBDAP_LOG"] = log_file + adapter_env["LLDBDAP_LOG"] = log_file args = [executable] if connection is not None: @@ -1241,7 +1241,7 @@ def launch(cls, /, executable, env=None, log_file=None, connection=None): stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - env=adaptor_env, + env=adapter_env, ) if connection is None: @@ -1271,7 +1271,7 @@ def get_pid(self): return -1 def terminate(self): - super(DebugAdaptorServer, self).terminate() + super(DebugAdapterServer, self).terminate() if self.process is not None: self.process.terminate() self.process.wait() @@ -1347,7 +1347,7 @@ def run_vscode(dbg, args, options): def main(): parser = optparse.OptionParser( description=( - "A testing framework for the Visual Studio Code Debug Adaptor protocol" + "A testing framework for the Visual Studio Code Debug Adapter protocol" ) ) @@ -1357,7 +1357,7 @@ def main(): dest="vscode_path", help=( "The path to the command line program that implements the " - "Visual Studio Code Debug Adaptor protocol." + "Visual Studio Code Debug Adapter protocol." ), default=None, ) @@ -1407,7 +1407,7 @@ def main(): dest="replay", help=( "Specify a file containing a packet log to replay with the " - "current Visual Studio Code Debug Adaptor executable." + "current Visual Studio Code Debug Adapter executable." ), default=None, ) @@ -1418,7 +1418,7 @@ def main(): action="store_true", dest="debug", default=False, - help="Pause waiting for a debugger to attach to the debug adaptor", + help="Pause waiting for a debugger to attach to the debug adapter", ) parser.add_option( @@ -1581,11 +1581,11 @@ def main(): if options.vscode_path is None and options.connection is None: print( "error: must either specify a path to a Visual Studio Code " - "Debug Adaptor vscode executable path using the --vscode " + "Debug Adapter vscode executable path using the --vscode " "option, or using the --connection option" ) return - dbg = DebugAdaptorServer( + dbg = DebugAdapterServer( executable=options.vscode_path, connection=options.connection ) if options.debug: diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py index 8b0f74ba389c3..70b04b051e0ec 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py @@ -14,13 +14,13 @@ class DAPTestCaseBase(TestBase): timeoutval = 10 * (10 if ("ASAN_OPTIONS" in os.environ) else 1) NO_DEBUG_INFO_TESTCASE = True - def create_debug_adaptor(self, lldbDAPEnv=None, connection=None): - """Create the Visual Studio Code debug adaptor""" + def create_debug_adapter(self, lldbDAPEnv=None, connection=None): + """Create the Visual Studio Code debug adapter""" self.assertTrue( is_exe(self.lldbDAPExec), "lldb-dap must exist and be executable" ) log_file_path = self.getBuildArtifact("dap.txt") - self.dap_server = dap_server.DebugAdaptorServer( + self.dap_server = dap_server.DebugAdapterServer( executable=self.lldbDAPExec, connection=connection, init_commands=self.setUpCommands(), @@ -28,9 +28,9 @@ def create_debug_adaptor(self, lldbDAPEnv=None, connection=None): env=lldbDAPEnv, ) - def build_and_create_debug_adaptor(self, lldbDAPEnv=None): + def build_and_create_debug_adapter(self, lldbDAPEnv=None): self.build() - self.create_debug_adaptor(lldbDAPEnv) + self.create_debug_adapter(lldbDAPEnv) def set_source_breakpoints(self, source_path, lines, data=None): """Sets source breakpoints and returns an array of strings containing @@ -324,11 +324,11 @@ def attach( gdbRemotePort=None, gdbRemoteHostname=None, ): - """Build the default Makefile target, create the DAP debug adaptor, + """Build the default Makefile target, create the DAP debug adapter, and attach to the process. """ - # Make sure we disconnect and terminate the DAP debug adaptor even + # Make sure we disconnect and terminate the DAP debug adapter even # if we throw an exception during the test case. def cleanup(): if disconnectAutomatically: @@ -479,10 +479,10 @@ def build_and_launch( launchCommands=None, expectFailure=False, ): - """Build the default Makefile target, create the DAP debug adaptor, + """Build the default Makefile target, create the DAP debug adapter, and launch the process. """ - self.build_and_create_debug_adaptor(lldbDAPEnv) + self.build_and_create_debug_adapter(lldbDAPEnv) self.assertTrue(os.path.exists(program), "executable must exist") return self.launch( diff --git a/lldb/source/Plugins/ABI/AArch64/ABISysV_arm64.cpp b/lldb/source/Plugins/ABI/AArch64/ABISysV_arm64.cpp index 280ec5ba37100..25803c9799ce4 100644 --- a/lldb/source/Plugins/ABI/AArch64/ABISysV_arm64.cpp +++ b/lldb/source/Plugins/ABI/AArch64/ABISysV_arm64.cpp @@ -102,12 +102,7 @@ static Status PushToLinuxGuardedControlStack(addr_t return_addr, size_t wrote = thread.GetProcess()->WriteMemory(gcspr_el0, &return_addr, sizeof(return_addr), error); if ((wrote != sizeof(return_addr) || error.Fail())) { - // When PrepareTrivialCall fails, the register context is not restored, - // unlike when an expression fails to execute. This is arguably a bug, - // see https://github.com/llvm/llvm-project/issues/124269. - // For now we are handling this here specifically. We can assume this - // write will work as the one to decrement the register did. - reg_ctx->WriteRegisterFromUnsigned(gcspr_el0_info, gcspr_el0 + 8); + // gcspr_el0 will be restored by the ThreadPlan's DoTakedown. return Status("Failed to write new Guarded Control Stack entry."); } @@ -150,8 +145,6 @@ bool ABISysV_arm64::PrepareTrivialCall(Thread &thread, addr_t sp, if (args.size() > 8) return false; - // Do this first, as it's got the most chance of failing (though still very - // low). if (GetProcessSP()->GetTarget().GetArchitecture().GetTriple().isOSLinux()) { Status err = PushToLinuxGuardedControlStack(return_addr, reg_ctx, thread); // If we could not manage the GCS, the expression will certainly fail, diff --git a/lldb/source/Target/ThreadPlanCallFunction.cpp b/lldb/source/Target/ThreadPlanCallFunction.cpp index 50dcb66b9719f..218111d4faf60 100644 --- a/lldb/source/Target/ThreadPlanCallFunction.cpp +++ b/lldb/source/Target/ThreadPlanCallFunction.cpp @@ -174,8 +174,20 @@ void ThreadPlanCallFunction::ReportRegisterState(const char *message) { void ThreadPlanCallFunction::DoTakedown(bool success) { Log *log = GetLog(LLDBLog::Step); + Thread &thread = GetThread(); if (!m_valid) { + // If ConstructorSetup was succesfull but PrepareTrivialCall was not, + // we will have a saved register state and potentially modified registers. + // Restore those. + if (m_stored_thread_state.register_backup_sp) + if (!thread.RestoreRegisterStateFromCheckpoint(m_stored_thread_state)) + LLDB_LOGF( + log, + "ThreadPlanCallFunction(%p): Failed to restore register state from " + "invalid plan that contained a saved register state.", + static_cast(this)); + // Don't call DoTakedown if we were never valid to begin with. LLDB_LOGF(log, "ThreadPlanCallFunction(%p): Log called on " @@ -185,7 +197,6 @@ void ThreadPlanCallFunction::DoTakedown(bool success) { } if (!m_takedown_done) { - Thread &thread = GetThread(); if (success) { SetReturnValue(); } diff --git a/lldb/test/API/commands/command/backticks/TestBackticksInAlias.py b/lldb/test/API/commands/command/backticks/TestBackticksInAlias.py index c31a08ac00182..2cb8d225d6d07 100644 --- a/lldb/test/API/commands/command/backticks/TestBackticksInAlias.py +++ b/lldb/test/API/commands/command/backticks/TestBackticksInAlias.py @@ -20,7 +20,7 @@ def test_backticks_in_alias(self): interp = self.dbg.GetCommandInterpreter() result = lldb.SBCommandReturnObject() interp.HandleCommand( - "command alias _test-argv-cmd expression -Z \`argc\` -- argv", result + r"command alias _test-argv-cmd expression -Z \`argc\` -- argv", result ) self.assertCommandReturn(result, "Made the alias") interp.HandleCommand("_test-argv-cmd", result) @@ -28,7 +28,7 @@ def test_backticks_in_alias(self): # Now try a harder case where we create this using an alias: interp.HandleCommand( - "command alias _test-argv-parray-cmd parray \`argc\` argv", result + r"command alias _test-argv-parray-cmd parray \`argc\` argv", result ) self.assertCommandReturn(result, "Made the alias") interp.HandleCommand("_test-argv-parray-cmd", result) diff --git a/lldb/test/API/commands/expression/memory-allocation/TestMemoryAllocSettings.py b/lldb/test/API/commands/expression/memory-allocation/TestMemoryAllocSettings.py index d27f07717affb..a82141a0792f2 100644 --- a/lldb/test/API/commands/expression/memory-allocation/TestMemoryAllocSettings.py +++ b/lldb/test/API/commands/expression/memory-allocation/TestMemoryAllocSettings.py @@ -30,7 +30,7 @@ def test(self): alloc0 = re.search("^.*IRMemoryMap::Malloc.+?0xdead0000.*$", log, re.MULTILINE) # Malloc adds additional bytes to allocation size, hence 10007 alloc1 = re.search( - "^.*IRMemoryMap::Malloc\s*?\(10007.+?0xdead1000.*$", log, re.MULTILINE + r"^.*IRMemoryMap::Malloc\s*?\(10007.+?0xdead1000.*$", log, re.MULTILINE ) self.assertTrue(alloc0, "Couldn't find an allocation at a given address.") self.assertTrue( diff --git a/lldb/test/API/commands/expression/test/TestExprs.py b/lldb/test/API/commands/expression/test/TestExprs.py index 41faf07f8cb44..17fd952130ee7 100644 --- a/lldb/test/API/commands/expression/test/TestExprs.py +++ b/lldb/test/API/commands/expression/test/TestExprs.py @@ -50,7 +50,7 @@ def build_and_run(self): def test_floating_point_expr_commands(self): self.build_and_run() - self.expect("expression 2.234f", patterns=["\(float\) \$.* = 2\.234"]) + self.expect("expression 2.234f", patterns=[r"\(float\) \$.* = 2\.234"]) # (float) $2 = 2.234 def test_many_expr_commands(self): diff --git a/lldb/test/API/commands/gui/expand-threads-tree/TestGuiExpandThreadsTree.py b/lldb/test/API/commands/gui/expand-threads-tree/TestGuiExpandThreadsTree.py index 3bb45521747d8..69aa674f6ae5d 100644 --- a/lldb/test/API/commands/gui/expand-threads-tree/TestGuiExpandThreadsTree.py +++ b/lldb/test/API/commands/gui/expand-threads-tree/TestGuiExpandThreadsTree.py @@ -48,7 +48,7 @@ def test_gui(self): self.child.expect_exact("Threads") # The main thread should be expanded. - self.child.expect("#\d+: main") + self.child.expect(r"#\d+: main") # Quit the GUI self.child.send(escape_key) diff --git a/lldb/test/API/commands/help/TestHelp.py b/lldb/test/API/commands/help/TestHelp.py index f0f5bcb321801..6aaff17fa4ea6 100644 --- a/lldb/test/API/commands/help/TestHelp.py +++ b/lldb/test/API/commands/help/TestHelp.py @@ -349,13 +349,13 @@ def test_help_show_tags(self): self.expect( "help memory read", patterns=[ - "--show-tags\n\s+Include memory tags in output " - "\(does not apply to binary output\)." + "--show-tags\n\\s+Include memory tags in output " + "\\(does not apply to binary output\\)." ], ) self.expect( "help memory find", - patterns=["--show-tags\n\s+Include memory tags in output."], + patterns=["--show-tags\n\\s+Include memory tags in output."], ) @no_debug_info_test diff --git a/lldb/test/API/commands/process/launch-with-shellexpand/TestLaunchWithShellExpand.py b/lldb/test/API/commands/process/launch-with-shellexpand/TestLaunchWithShellExpand.py index fcf61c9775c63..a7f8b38649b22 100644 --- a/lldb/test/API/commands/process/launch-with-shellexpand/TestLaunchWithShellExpand.py +++ b/lldb/test/API/commands/process/launch-with-shellexpand/TestLaunchWithShellExpand.py @@ -93,7 +93,7 @@ def test(self): self.runCmd("process kill") - self.runCmd("process launch -X true -w %s -- foo\ bar" % (self.getBuildDir())) + self.runCmd(r"process launch -X true -w %s -- foo\ bar" % (self.getBuildDir())) process = self.process() diff --git a/lldb/test/API/commands/register/register/TestRegistersUnavailable.py b/lldb/test/API/commands/register/register/TestRegistersUnavailable.py index abd3aeace8969..0ccccd2f09712 100644 --- a/lldb/test/API/commands/register/register/TestRegistersUnavailable.py +++ b/lldb/test/API/commands/register/register/TestRegistersUnavailable.py @@ -48,12 +48,12 @@ def test_unavailable_registers(self): "register read --all", patterns=[ "(?sm)^general purpose registers:\n" - "^\s+rdx = 0x5555555555555555\n" + "^\\s+rdx = 0x5555555555555555\n" ".*" "^3 registers were unavailable.\n" "\n" "^supplementary registers:\n" - "^\s+edx = 0x55555555\n" + "^\\s+edx = 0x55555555\n" ".*" "^12 registers were unavailable." ], diff --git a/lldb/test/API/commands/register/register/register_command/TestRegisters.py b/lldb/test/API/commands/register/register/register_command/TestRegisters.py index 5bf7aa5dee9c4..100bcceba2812 100644 --- a/lldb/test/API/commands/register/register/register_command/TestRegisters.py +++ b/lldb/test/API/commands/register/register/register_command/TestRegisters.py @@ -662,14 +662,14 @@ def test_register_read_fields(self): # N/Z/C/V bits will always be present, so check only for those. self.expect( "register read cpsr", - patterns=["= \(N = [0|1], Z = [0|1], C = [0|1], V = [0|1]"], + patterns=[r"= \(N = [0|1], Z = [0|1], C = [0|1], V = [0|1]"], ) self.expect( - "register read fpsr", patterns=["= \(QC = [0|1], IDC = [0|1], IXC = [0|1]"] + "register read fpsr", patterns=[r"= \(QC = [0|1], IDC = [0|1], IXC = [0|1]"] ) # AHP/DN/FZ always present, others may vary. self.expect( - "register read fpcr", patterns=["= \(AHP = [0|1], DN = [0|1], FZ = [0|1]"] + "register read fpcr", patterns=[r"= \(AHP = [0|1], DN = [0|1], FZ = [0|1]"] ) # Should get enumerator descriptions for RMode. diff --git a/lldb/test/API/commands/settings/TestSettings.py b/lldb/test/API/commands/settings/TestSettings.py index 2dd813f6b155b..d36e08875919a 100644 --- a/lldb/test/API/commands/settings/TestSettings.py +++ b/lldb/test/API/commands/settings/TestSettings.py @@ -186,13 +186,13 @@ def cleanup(): self.addTearDownHook(cleanup) self.runCmd("settings show frame-format") - m = re.match('^frame-format \(format-string\) = "(.*)"$', self.res.GetOutput()) + m = re.match(r'^frame-format \(format-string\) = "(.*)"$', self.res.GetOutput()) self.assertTrue(m, "Bad settings string") self.format_string = m.group(1) # Change the default format to print function.name rather than # function.name-with-args - format_string = "frame #${frame.index}: ${frame.pc}{ ${module.file.basename}\`${function.name}{${function.pc-offset}}}{ at ${line.file.fullpath}:${line.number}}{, lang=${language}}\n" + format_string = "frame #${frame.index}: ${frame.pc}{ ${module.file.basename}\\`${function.name}{${function.pc-offset}}}{ at ${line.file.fullpath}:${line.number}}{, lang=${language}}\n" self.runCmd("settings set frame-format %s" % format_string) # Immediately test the setting. @@ -724,7 +724,7 @@ def test_settings_with_trailing_whitespace(self): ) self.runCmd("settings set target.run-args 1 2 3") # Set to known value # Set to new value with trailing whitespaces - self.runCmd("settings set target.run-args 3 \ \ ") + self.runCmd(r"settings set target.run-args 3 \ \ ") self.expect( "settings show target.run-args", SETTING_MSG("target.run-args"), @@ -846,11 +846,11 @@ def test_settings_clear_all(self): # Check that settings have their default values after clearing. self.expect( "settings show target.env-vars", - patterns=["^target.env-vars \(dictionary of strings\) =\s*$"], + patterns=[r"^target.env-vars \(dictionary of strings\) =\s*$"], ) self.expect( "settings show target.run-args", - patterns=["^target.run-args \(arguments\) =\s*$"], + patterns=[r"^target.run-args \(arguments\) =\s*$"], ) self.expect("settings show auto-confirm", substrs=["false"]) self.expect("settings show tab-size", substrs=["2"]) @@ -947,7 +947,7 @@ def test_experimental_settings(self): # showing & setting an undefined .experimental. setting should generate no errors. self.expect( "settings show target.experimental.setting-which-does-not-exist", - patterns=["^\s$"], + patterns=[r"^\s$"], error=False, ) self.expect( diff --git a/lldb/test/API/commands/target/basic/TestTargetCommand.py b/lldb/test/API/commands/target/basic/TestTargetCommand.py index 953b59d729bfa..d91a3e0edb715 100644 --- a/lldb/test/API/commands/target/basic/TestTargetCommand.py +++ b/lldb/test/API/commands/target/basic/TestTargetCommand.py @@ -74,7 +74,7 @@ def do_target_command(self): # Find the largest index of the existing list. import re - pattern = re.compile("target #(\d+):") + pattern = re.compile(r"target #(\d+):") for line in reversed(output.split(os.linesep)): match = pattern.search(line) if match: diff --git a/lldb/test/API/commands/target/dump-separate-debug-info/dwo/TestDumpDwo.py b/lldb/test/API/commands/target/dump-separate-debug-info/dwo/TestDumpDwo.py index 05c72945b1439..13d12e3686a17 100644 --- a/lldb/test/API/commands/target/dump-separate-debug-info/dwo/TestDumpDwo.py +++ b/lldb/test/API/commands/target/dump-separate-debug-info/dwo/TestDumpDwo.py @@ -94,11 +94,11 @@ def test_dwos_loaded_table_output(self): self.expect( "target modules dump separate-debug-info", patterns=[ - "Symbol file: .*?a\.out", + r"Symbol file: .*?a\.out", 'Type: "dwo"', - "Dwo ID\s+Err\s+Dwo Path", - "0x[a-zA-Z0-9]{16}\s+.*main\.dwo", - "0x[a-zA-Z0-9]{16}\s+.*foo\.dwo", + r"Dwo ID\s+Err\s+Dwo Path", + r"0x[a-zA-Z0-9]{16}\s+.*main\.dwo", + r"0x[a-zA-Z0-9]{16}\s+.*foo\.dwo", ], ) @@ -118,11 +118,11 @@ def test_dwos_not_loaded_table_output(self): self.expect( "target modules dump separate-debug-info", patterns=[ - "Symbol file: .*?a\.out", + r"Symbol file: .*?a\.out", 'Type: "dwo"', - "Dwo ID\s+Err\s+Dwo Path", - "0x[a-zA-Z0-9]{16}\s+E\s+.*main\.dwo", - "0x[a-zA-Z0-9]{16}\s+E\s+.*foo\.dwo", + r"Dwo ID\s+Err\s+Dwo Path", + r"0x[a-zA-Z0-9]{16}\s+E\s+.*main\.dwo", + r"0x[a-zA-Z0-9]{16}\s+E\s+.*foo\.dwo", ], ) diff --git a/lldb/test/API/commands/target/dump-separate-debug-info/oso/TestDumpOso.py b/lldb/test/API/commands/target/dump-separate-debug-info/oso/TestDumpOso.py index 06dc823459184..4e7560338b1d4 100644 --- a/lldb/test/API/commands/target/dump-separate-debug-info/oso/TestDumpOso.py +++ b/lldb/test/API/commands/target/dump-separate-debug-info/oso/TestDumpOso.py @@ -93,11 +93,11 @@ def test_shows_oso_loaded_table_output(self): self.expect( "target modules dump separate-debug-info", patterns=[ - "Symbol file: .*?a\.out", + r"Symbol file: .*?a\.out", 'Type: "oso"', - "Mod Time\s+Err\s+Oso Path", - "0x[a-zA-Z0-9]{16}\s+.*main\.o", - "0x[a-zA-Z0-9]{16}\s+.*foo\.o", + r"Mod Time\s+Err\s+Oso Path", + r"0x[a-zA-Z0-9]{16}\s+.*main\.o", + r"0x[a-zA-Z0-9]{16}\s+.*foo\.o", ], ) @@ -119,11 +119,11 @@ def test_shows_oso_not_loaded_table_output(self): self.expect( "target modules dump separate-debug-info", patterns=[ - "Symbol file: .*?a\.out", + r"Symbol file: .*?a\.out", 'Type: "oso"', - "Mod Time\s+Err\s+Oso Path", - "0x[a-zA-Z0-9]{16}\s+E\s+.*main\.o", - "0x[a-zA-Z0-9]{16}\s+E\s+.*foo\.o", + r"Mod Time\s+Err\s+Oso Path", + r"0x[a-zA-Z0-9]{16}\s+E\s+.*main\.o", + r"0x[a-zA-Z0-9]{16}\s+E\s+.*foo\.o", ], ) diff --git a/lldb/test/API/commands/trace/TestTraceDumpInfo.py b/lldb/test/API/commands/trace/TestTraceDumpInfo.py index 3f67475d631dd..52449631f6aa9 100644 --- a/lldb/test/API/commands/trace/TestTraceDumpInfo.py +++ b/lldb/test/API/commands/trace/TestTraceDumpInfo.py @@ -64,7 +64,7 @@ def testDumpRawTraceSize(self): hardware disabled tracing: 4 trace synchronization point: 1""", ], - patterns=["Decoding instructions: \d.\d\ds"], + patterns=[r"Decoding instructions: \d.\d\ds"], ) def testDumpRawTraceSizeJSON(self): diff --git a/lldb/test/API/commands/trace/TestTraceEvents.py b/lldb/test/API/commands/trace/TestTraceEvents.py index 52f6241456b76..c20bcc247105b 100644 --- a/lldb/test/API/commands/trace/TestTraceEvents.py +++ b/lldb/test/API/commands/trace/TestTraceEvents.py @@ -68,7 +68,7 @@ def testPauseEvents(self): self.expect( "thread trace dump instructions -e -f", patterns=[ - f"""thread #1: tid = .* + rf"""thread #1: tid = .* 0: \(event\) trace synchronization point \[offset \= 0x0xec0\] 1: \(event\) hardware disabled tracing a.out`main \+ 23 at main.cpp:12 @@ -102,7 +102,7 @@ def testPauseEvents(self): self.expect( "thread trace dump instructions -e --id 18", patterns=[ - f"""thread #1: tid = .* + rf"""thread #1: tid = .* a.out`symbol stub for: foo\(\) 18: {ADDRESS_REGEX} jmpq .* 17: \(event\) software disabled tracing diff --git a/lldb/test/API/commands/trace/TestTraceStartStop.py b/lldb/test/API/commands/trace/TestTraceStartStop.py index 134cf13096edb..5add321b4c83f 100644 --- a/lldb/test/API/commands/trace/TestTraceStartStop.py +++ b/lldb/test/API/commands/trace/TestTraceStartStop.py @@ -244,7 +244,7 @@ def testStartStopLiveThreads(self): self.expect( "thread trace dump instructions -f", patterns=[ - f"""thread #1: tid = .* + rf"""thread #1: tid = .* a.out`main \+ 4 at main.cpp:2 2: {ADDRESS_REGEX} movl""" ], @@ -255,7 +255,7 @@ def testStartStopLiveThreads(self): self.expect( "thread trace dump instructions -f", patterns=[ - f"""thread #1: tid = .* + rf"""thread #1: tid = .* a.out`main \+ 4 at main.cpp:2 2: {ADDRESS_REGEX} movl .* a.out`main \+ 11 at main.cpp:4 @@ -269,7 +269,7 @@ def testStartStopLiveThreads(self): self.expect( "thread trace dump instructions", patterns=[ - f"""thread #1: tid = .* + rf"""thread #1: tid = .* a.out`main \+ 32 at main.cpp:4 10: {ADDRESS_REGEX} jle .* ; <\+20> at main.cpp:5 8: {ADDRESS_REGEX} cmpl .* @@ -297,7 +297,7 @@ def testStartStopLiveThreads(self): self.expect( "thread trace dump instructions -f", patterns=[ - f"""thread #1: tid = .* + rf"""thread #1: tid = .* a.out`main \+ 20 at main.cpp:5 2: {ADDRESS_REGEX} xorl""" ], @@ -306,7 +306,7 @@ def testStartStopLiveThreads(self): self.expect( "thread trace dump instructions", patterns=[ - f"""thread #1: tid = .* + rf"""thread #1: tid = .* a.out`main \+ 20 at main.cpp:5 2: {ADDRESS_REGEX} xorl""" ], @@ -336,7 +336,7 @@ def testStartStopLiveThreads(self): self.expect( "thread trace dump instructions -c 1", patterns=[ - f"""thread #1: tid = .* + rf"""thread #1: tid = .* a.out`main \+ 11 at main.cpp:4""" ], ) diff --git a/lldb/test/API/commands/trace/TestTraceTSC.py b/lldb/test/API/commands/trace/TestTraceTSC.py index 580cb347dc30a..4a19065e60c2b 100644 --- a/lldb/test/API/commands/trace/TestTraceTSC.py +++ b/lldb/test/API/commands/trace/TestTraceTSC.py @@ -20,7 +20,7 @@ def testTscPerThread(self): self.expect("n") self.expect( "thread trace dump instructions -t -c 1", - patterns=[": \[\d+.\d+ ns\] 0x0000000000400511 movl"], + patterns=[r": \[\d+.\d+ ns\] 0x0000000000400511 movl"], ) @testSBAPIAndCommands @@ -43,7 +43,7 @@ def testMultipleTscsPerThread(self): self.runCmd("thread trace dump instructions -t --raw --forward") id_to_timestamp = {} for line in self.res.GetOutput().splitlines(): - m = re.search(" (.+): \[(.+)\ ns].*", line) + m = re.search(r" (.+): \[(.+)\ ns].*", line) if m: id_to_timestamp[int(m.group(1))] = m.group(2) self.assertEqual(len(id_to_timestamp), 3) @@ -69,12 +69,12 @@ def testTscPerProcess(self): self.expect("n") self.expect( "thread trace dump instructions -t -c 1", - patterns=[": \[\d+.\d+ ns\] 0x0000000000400511 movl"], + patterns=[r": \[\d+.\d+ ns\] 0x0000000000400511 movl"], ) self.expect( "thread trace dump instructions -t -c 1 --pretty-json", - patterns=['''"timestamp_ns": "\d+.\d+"'''], + patterns=[r'''"timestamp_ns": "\d+.\d+"'''], ) @testSBAPIAndCommands @@ -91,7 +91,7 @@ def testDumpingAfterTracingWithoutTsc(self): self.expect("n") self.expect( "thread trace dump instructions -t -c 1", - patterns=[": \[unavailable\] 0x0000000000400511 movl"], + patterns=[r": \[unavailable\] 0x0000000000400511 movl"], ) self.expect( diff --git a/lldb/test/API/driver/quit_speed/TestQuitWithProcess.py b/lldb/test/API/driver/quit_speed/TestQuitWithProcess.py index 5cfcf5d69fd2a..2412b295bfb59 100644 --- a/lldb/test/API/driver/quit_speed/TestQuitWithProcess.py +++ b/lldb/test/API/driver/quit_speed/TestQuitWithProcess.py @@ -28,7 +28,7 @@ def test_run_quit(self): # Launch the process without a TTY so we don't have to interrupt: child.sendline("process launch -n") print("launched process") - child.expect("Process ([\d]*) launched:") + child.expect(r"Process ([\d]*) launched:") print("Got launch message") child.sendline("quit") print("sent quit") diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_by_line_and_column/TestBreakpointByLineAndColumn.py b/lldb/test/API/functionalities/breakpoint/breakpoint_by_line_and_column/TestBreakpointByLineAndColumn.py index fe99adf425513..5798c8ffa8220 100644 --- a/lldb/test/API/functionalities/breakpoint/breakpoint_by_line_and_column/TestBreakpointByLineAndColumn.py +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_by_line_and_column/TestBreakpointByLineAndColumn.py @@ -60,7 +60,7 @@ def testBreakpointByLineAndColumnNearestCode(self): for pattern in patterns: line = line_number("main.cpp", pattern) + 1 - column = int(re.search("\(col:([0-9]+)\)", pattern).group(1)) + column = int(re.search(r"\(col:([0-9]+)\)", pattern).group(1)) source_loc.append({"line": line, "column": column}) target = self.createTestTarget() diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_locations/TestBreakpointLocations.py b/lldb/test/API/functionalities/breakpoint/breakpoint_locations/TestBreakpointLocations.py index d87e6275f7b51..e0c93e3c18581 100644 --- a/lldb/test/API/functionalities/breakpoint/breakpoint_locations/TestBreakpointLocations.py +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_locations/TestBreakpointLocations.py @@ -53,7 +53,7 @@ def set_breakpoint(self): ], patterns=[ "where = a.out`func_inlined .+unresolved, hit count = 0", - "where = a.out`main .+\[inlined\].+unresolved, hit count = 0", + r"where = a.out`main .+\[inlined\].+unresolved, hit count = 0", ], ) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-advanced/TestDataFormatterAdv.py b/lldb/test/API/functionalities/data-formatter/data-formatter-advanced/TestDataFormatterAdv.py index b740689e67538..ce4eb0a060cee 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-advanced/TestDataFormatterAdv.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-advanced/TestDataFormatterAdv.py @@ -104,7 +104,9 @@ def cleanup(): self.runCmd("type summary clear") - self.runCmd('type summary add --summary-string "${var[0-1]}" -x "int\[[0-9]\]"') + self.runCmd( + r'type summary add --summary-string "${var[0-1]}" -x "int\[[0-9]\]"' + ) self.expect("frame variable int_array", substrs=["1,2"]) @@ -119,7 +121,7 @@ def cleanup(): self.runCmd("type summary clear") - self.runCmd('type summary add -c -x "i_am_cool\[[0-9]\]"') + self.runCmd(r'type summary add -c -x "i_am_cool\[[0-9]\]"') self.runCmd("type summary add -c i_am_cool") self.expect( @@ -172,7 +174,7 @@ def cleanup(): self.runCmd("type summary clear") self.runCmd( - 'type summary add --summary-string "${*var[].x[0-3]%hex} is a bitfield on a set of integers" -x "SimpleWithPointers\[[0-9]\]"' + r'type summary add --summary-string "${*var[].x[0-3]%hex} is a bitfield on a set of integers" -x "SimpleWithPointers\[[0-9]\]"' ) self.expect( diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-cpp/TestDataFormatterCpp.py b/lldb/test/API/functionalities/data-formatter/data-formatter-cpp/TestDataFormatterCpp.py index 644529b1c451b..a848c6257510e 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-cpp/TestDataFormatterCpp.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-cpp/TestDataFormatterCpp.py @@ -62,7 +62,7 @@ def cleanup(): self.expect( "frame variable", patterns=[ - "\(Speed\) SPILookHex = 0x[0-9a-f]+" # Speed should look hex-ish now. + r"\(Speed\) SPILookHex = 0x[0-9a-f]+" # Speed should look hex-ish now. ], ) @@ -71,14 +71,14 @@ def cleanup(): self.expect( "frame variable", patterns=[ - "\(SignalMask\) SMILookHex = 0x[0-9a-f]+" # SignalMask should look hex-ish now. + r"\(SignalMask\) SMILookHex = 0x[0-9a-f]+" # SignalMask should look hex-ish now. ], ) self.expect( "frame variable", matching=False, patterns=[ - "\(Type4\) T4ILookChar = 0x[0-9a-f]+" # Type4 should NOT look hex-ish now. + r"\(Type4\) T4ILookChar = 0x[0-9a-f]+" # Type4 should NOT look hex-ish now. ], ) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSContainer.py b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSContainer.py index 4046dc79538a5..c90a5c61d9c0b 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSContainer.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSContainer.py @@ -53,7 +53,7 @@ def nscontainers_data_formatter_commands(self): self.expect( "frame variable -d run-target *nscfDictionary", patterns=[ - "\(__NSCFDictionary\) \*nscfDictionary =", + r"\(__NSCFDictionary\) \*nscfDictionary =", 'key = 0x.* @"foo"', 'value = 0x.* @"foo"', 'key = 0x.* @"bar"', @@ -68,7 +68,7 @@ def nscontainers_data_formatter_commands(self): self.expect( "frame variable -d run-target *cfDictionaryRef", patterns=[ - "\(const __CFDictionary\) \*cfDictionaryRef =", + r"\(const __CFDictionary\) \*cfDictionaryRef =", 'key = 0x.* @"foo"', 'value = 0x.* @"foo"', 'key = 0x.* @"bar"', @@ -89,18 +89,18 @@ def nscontainers_data_formatter_commands(self): self.expect( "frame variable -d run-target *nscfSet", patterns=[ - "\(__NSCFSet\) \*nscfSet =", - '\[0\] = 0x.* @".*"', - '\[1\] = 0x.* @".*"', + r"\(__NSCFSet\) \*nscfSet =", + r'\[0\] = 0x.* @".*"', + r'\[1\] = 0x.* @".*"', ], ) self.expect( "frame variable -d run-target *cfSetRef", patterns=[ - "\(const __CFSet\) \*cfSetRef =", - '\[0\] = 0x.* @".*"', - '\[1\] = 0x.* @".*"', + r"\(const __CFSet\) \*cfSetRef =", + r'\[0\] = 0x.* @".*"', + r'\[1\] = 0x.* @".*"', ], ) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/unordered/TestDataFormatterGenericUnordered.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/unordered/TestDataFormatterGenericUnordered.py index c3043b489d951..50dfbbf6b90a5 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/unordered/TestDataFormatterGenericUnordered.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/unordered/TestDataFormatterGenericUnordered.py @@ -83,9 +83,9 @@ def cleanup(): [ "IntsUnorderedSet", "size=5 {", - "\[\d\] = 5", - "\[\d\] = 3", - "\[\d\] = 2", + r"\[\d\] = 5", + r"\[\d\] = 3", + r"\[\d\] = 2", ], ) @@ -94,9 +94,9 @@ def cleanup(): [ "StringsUnorderedSet", "size=5 {", - '\[\d\] = "is"', - '\[\d\] = "world"', - '\[\d\] = "hello"', + r'\[\d\] = "is"', + r'\[\d\] = "world"', + r'\[\d\] = "hello"', ], ) @@ -105,9 +105,9 @@ def cleanup(): [ "IntsUnorderedMultiSet", "size=6 {", - "(\[\d\] = 3(\\n|.)+){3}", - "\[\d\] = 2", - "\[\d\] = 1", + "(\\[\\d\\] = 3(\\n|.)+){3}", + r"\[\d\] = 2", + r"\[\d\] = 1", ], ) @@ -116,8 +116,8 @@ def cleanup(): [ "StringsUnorderedMultiSet", "size=5 {", - '(\[\d\] = "is"(\\n|.)+){2}', - '(\[\d\] = "world"(\\n|.)+){2}', + '(\\[\\d\\] = "is"(\\n|.)+){2}', + '(\\[\\d\\] = "world"(\\n|.)+){2}', ], ) diff --git a/lldb/test/API/functionalities/data-formatter/type_summary_list_arg/TestTypeSummaryListArg.py b/lldb/test/API/functionalities/data-formatter/type_summary_list_arg/TestTypeSummaryListArg.py index 0bc34910df7d2..6b883a5bb6c8f 100644 --- a/lldb/test/API/functionalities/data-formatter/type_summary_list_arg/TestTypeSummaryListArg.py +++ b/lldb/test/API/functionalities/data-formatter/type_summary_list_arg/TestTypeSummaryListArg.py @@ -17,7 +17,7 @@ def test_type_summary_list_with_arg(self): "type summary list Foo", substrs=["Category: default", "Category: system"] ) self.expect( - "type summary list char", substrs=["char ?(\*|\[\])", "char ?\[[0-9]+\]"] + "type summary list char", substrs=[r"char ?(\*|\[\])", r"char ?\[[0-9]+\]"] ) self.expect("type summary list -w default", substrs=["system"], matching=False) @@ -28,6 +28,6 @@ def test_type_summary_list_with_arg(self): ) self.expect( "type summary list -w system char", - substrs=["char ?(\*|\[\])", "char ?\[[0-9]+\]"], + substrs=[r"char ?(\*|\[\])", r"char ?\[[0-9]+\]"], matching=True, ) diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestXMLRegisterFlags.py b/lldb/test/API/functionalities/gdb_remote_client/TestXMLRegisterFlags.py index 2dbb2b5f5e3a9..d7849500c378d 100644 --- a/lldb/test/API/functionalities/gdb_remote_client/TestXMLRegisterFlags.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestXMLRegisterFlags.py @@ -678,7 +678,7 @@ def test_enum_type_not_found(self): """ ) - self.expect("register read cpsr", patterns=["\(E = 1\)$"]) + self.expect("register read cpsr", patterns=[r"\(E = 1\)$"]) @skipIfXmlSupportMissing @skipIfRemote @@ -701,7 +701,7 @@ def test_enum_duplicated_evalue(self): ) self.expect("register info cpsr", patterns=["E: 1 = def, 2 = geh$"]) - self.expect("register read cpsr", patterns=["\(E = def \| geh\)$"]) + self.expect("register read cpsr", patterns=[r"\(E = def \| geh\)$"]) @skipIfXmlSupportMissing @skipIfRemote @@ -725,7 +725,7 @@ def test_enum_duplicated(self): ) self.expect("register info cpsr", patterns=["E: 1 = def$"]) - self.expect("register read cpsr", patterns=["\(E = def\)$"]) + self.expect("register read cpsr", patterns=[r"\(E = def\)$"]) @skipIfXmlSupportMissing @skipIfRemote @@ -1014,7 +1014,7 @@ def test_many_fields_same_enum(self): self.expect("register info cpsr", patterns=expected_info) - expected_read = ["\(f2 = valid, f1 = valid\)$"] + expected_read = [r"\(f2 = valid, f1 = valid\)$"] self.expect("register read x0", patterns=expected_read) self.expect("register read cpsr", patterns=expected_read) @@ -1055,4 +1055,4 @@ def test_fields_same_name_different_enum(self): ], ) - self.expect("register read x0", patterns=["\(foo = foo_1, foo = foo_0\)$"]) + self.expect("register read x0", patterns=[r"\(foo = foo_1, foo = foo_0\)$"]) diff --git a/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py b/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py index 577411ebc1037..50182e72e498c 100644 --- a/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py +++ b/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py @@ -95,7 +95,7 @@ def test_command(self): self.assertFalse(result.Succeeded()) self.assertRegex( result.GetError(), - "Usage: memory region \(or \-\-all\)", + r"Usage: memory region \(or \-\-all\)", ) # --all should match what repeating the command gives you diff --git a/lldb/test/API/functionalities/target_var/TestTargetVar.py b/lldb/test/API/functionalities/target_var/TestTargetVar.py index 0ef3d008e8f19..2d108df3e22e5 100644 --- a/lldb/test/API/functionalities/target_var/TestTargetVar.py +++ b/lldb/test/API/functionalities/target_var/TestTargetVar.py @@ -27,7 +27,7 @@ def testTargetVarExpr(self): ) self.expect("target variable i", substrs=["i", "42"]) self.expect( - "target variable var", patterns=["\(incomplete \*\) var = 0[xX](0)*dead"] + "target variable var", patterns=[r"\(incomplete \*\) var = 0[xX](0)*dead"] ) self.expect( "target variable var[0]", diff --git a/lldb/test/API/iohandler/completion/TestIOHandlerCompletion.py b/lldb/test/API/iohandler/completion/TestIOHandlerCompletion.py index b16869b05e7df..0c788b2cdfee3 100644 --- a/lldb/test/API/iohandler/completion/TestIOHandlerCompletion.py +++ b/lldb/test/API/iohandler/completion/TestIOHandlerCompletion.py @@ -55,7 +55,7 @@ def test_completion(self): self.child.expect( re.compile( b"TestIOHandler(\r" - + self.cursor_forward_escape_seq("\d+") + + self.cursor_forward_escape_seq(r"\d+") + b")?Completion.py" ) ) diff --git a/lldb/test/API/lang/c/enum_types/TestEnumTypes.py b/lldb/test/API/lang/c/enum_types/TestEnumTypes.py index 0015c8f478578..d4bbe9bcfac81 100644 --- a/lldb/test/API/lang/c/enum_types/TestEnumTypes.py +++ b/lldb/test/API/lang/c/enum_types/TestEnumTypes.py @@ -27,7 +27,7 @@ def test_command_line(self): self.expect("fr var c", DATA_TYPES_DISPLAYED_CORRECTLY, patterns=[" = C$"]) self.expect("fr var ab", DATA_TYPES_DISPLAYED_CORRECTLY, patterns=[" = AB$"]) self.expect( - "fr var ac", DATA_TYPES_DISPLAYED_CORRECTLY, patterns=[" = A \| C$"] + "fr var ac", DATA_TYPES_DISPLAYED_CORRECTLY, patterns=[r" = A \| C$"] ) self.expect("fr var all", DATA_TYPES_DISPLAYED_CORRECTLY, patterns=[" = ALL$"]) # Test that an enum that doesn't match the heuristic we use in @@ -39,7 +39,7 @@ def test_command_line(self): self.expect( "expression (enum bitfield)nonsense", DATA_TYPES_DISPLAYED_CORRECTLY, - patterns=[" = B \| C \| 0x10$"], + patterns=[r" = B \| C \| 0x10$"], ) # Break inside the main. diff --git a/lldb/test/API/lang/c/function_types/TestFunctionTypes.py b/lldb/test/API/lang/c/function_types/TestFunctionTypes.py index 2f6bb7fc0bb16..6e42cd63573c9 100644 --- a/lldb/test/API/lang/c/function_types/TestFunctionTypes.py +++ b/lldb/test/API/lang/c/function_types/TestFunctionTypes.py @@ -54,7 +54,7 @@ def test_pointers(self): ) if self.platformIsDarwin(): - regexps = ["lib.*\.dylib`printf"] + regexps = [r"lib.*\.dylib`printf"] else: regexps = ["printf"] self.expect( diff --git a/lldb/test/API/lang/c/register_variables/TestRegisterVariables.py b/lldb/test/API/lang/c/register_variables/TestRegisterVariables.py index 0e51d4f7ff468..bcdd590e00d0a 100644 --- a/lldb/test/API/lang/c/register_variables/TestRegisterVariables.py +++ b/lldb/test/API/lang/c/register_variables/TestRegisterVariables.py @@ -9,7 +9,7 @@ def re_expr_equals(val_type, val): # Match ({val_type}) ${sum_digits} = {val} - return re.compile(r"\(" + val_type + "\) \$\d+ = " + str(val)) + return re.compile(r"\(" + val_type + r"\) \$\d+ = " + str(val)) class RegisterVariableTestCase(TestBase): diff --git a/lldb/test/API/lang/c/set_values/TestSetValues.py b/lldb/test/API/lang/c/set_values/TestSetValues.py index 0d697d6719d5f..e0813734403a0 100644 --- a/lldb/test/API/lang/c/set_values/TestSetValues.py +++ b/lldb/test/API/lang/c/set_values/TestSetValues.py @@ -82,7 +82,7 @@ def test(self): self.expect( "frame variable --show-types", VARIABLES_DISPLAYED_CORRECTLY, - patterns=["\((short unsigned int|unsigned short)\) i = 33"], + patterns=[r"\((short unsigned int|unsigned short)\) i = 33"], ) # Now set variable 'i' and check that it is correctly displayed. @@ -90,7 +90,7 @@ def test(self): self.expect( "frame variable --show-types", VARIABLES_DISPLAYED_CORRECTLY, - patterns=["\((short unsigned int|unsigned short)\) i = 333"], + patterns=[r"\((short unsigned int|unsigned short)\) i = 333"], ) self.runCmd("continue") diff --git a/lldb/test/API/lang/c/strings/TestCStrings.py b/lldb/test/API/lang/c/strings/TestCStrings.py index 159f8a4e4b664..f11006f7feefe 100644 --- a/lldb/test/API/lang/c/strings/TestCStrings.py +++ b/lldb/test/API/lang/c/strings/TestCStrings.py @@ -20,7 +20,7 @@ def test_with_run_command(self): self.runCmd("process launch", RUN_SUCCEEDED) - self.expect("expression -- a[2]", patterns=["\((const )?char\) \$0 = 'c'"]) + self.expect("expression -- a[2]", patterns=[r"\((const )?char\) \$0 = 'c'"]) self.expect("expression -- z[2]", startstr="(const char) $1 = 'x'") diff --git a/lldb/test/API/lang/c/tls_globals/TestTlsGlobals.py b/lldb/test/API/lang/c/tls_globals/TestTlsGlobals.py index 2bffd2eea123a..56bb25b3f3c3d 100644 --- a/lldb/test/API/lang/c/tls_globals/TestTlsGlobals.py +++ b/lldb/test/API/lang/c/tls_globals/TestTlsGlobals.py @@ -71,12 +71,12 @@ def test(self): self.expect( "expr var_static", VARIABLES_DISPLAYED_CORRECTLY, - patterns=["\(int\) \$.* = 88"], + patterns=[r"\(int\) \$.* = 88"], ) self.expect( "expr var_shared", VARIABLES_DISPLAYED_CORRECTLY, - patterns=["\(int\) \$.* = 66"], + patterns=[r"\(int\) \$.* = 66"], ) # Continue on the main thread @@ -102,10 +102,10 @@ def test(self): self.expect( "expr var_static", VARIABLES_DISPLAYED_CORRECTLY, - patterns=["\(int\) \$.* = 44"], + patterns=[r"\(int\) \$.* = 44"], ) self.expect( "expr var_shared", VARIABLES_DISPLAYED_CORRECTLY, - patterns=["\(int\) \$.* = 33"], + patterns=[r"\(int\) \$.* = 33"], ) diff --git a/lldb/test/API/lang/cpp/char1632_t/TestChar1632T.py b/lldb/test/API/lang/cpp/char1632_t/TestChar1632T.py index e1cbe0509ae04..e2521d88258bc 100644 --- a/lldb/test/API/lang/cpp/char1632_t/TestChar1632T.py +++ b/lldb/test/API/lang/cpp/char1632_t/TestChar1632T.py @@ -74,8 +74,8 @@ def test(self): self.expect( "frame variable as16 as32", patterns=[ - "\(char16_t\[[0-9]+\]\) as16 = ", - "\(char32_t\[[0-9]+\]\) as32 = ", + r"\(char16_t\[[0-9]+\]\) as16 = ", + r"\(char32_t\[[0-9]+\]\) as32 = ", ], substrs=['u"ﺸﺵۻ"', 'U"ЕЙРГЖО"'], ) @@ -103,8 +103,8 @@ def test(self): self.expect( "frame variable as16 as32", patterns=[ - "\(char16_t\[[0-9]+\]\) as16 = ", - "\(char32_t\[[0-9]+\]\) as32 = ", + r"\(char16_t\[[0-9]+\]\) as16 = ", + r"\(char32_t\[[0-9]+\]\) as32 = ", ], substrs=['"色ハ匂ヘト散リヌルヲ"', '"෴"'], ) diff --git a/lldb/test/API/lang/cpp/class_static/TestStaticVariables.py b/lldb/test/API/lang/cpp/class_static/TestStaticVariables.py index 04678ec018bdf..41181e468c308 100644 --- a/lldb/test/API/lang/cpp/class_static/TestStaticVariables.py +++ b/lldb/test/API/lang/cpp/class_static/TestStaticVariables.py @@ -38,7 +38,7 @@ def test_with_run_command(self): self.expect( "target variable A::g_points", VARIABLES_DISPLAYED_CORRECTLY, - patterns=["\(PointType\[[1-9]*\]\) A::g_points = {"], + patterns=[r"\(PointType\[[1-9]*\]\) A::g_points = {"], ) self.expect( "target variable g_points", @@ -76,7 +76,7 @@ def test_with_run_command_complete(self): "target variable A::g_points", VARIABLES_DISPLAYED_CORRECTLY, patterns=[ - "\(PointType\[[1-9]*\]\) A::g_points = {", + r"\(PointType\[[1-9]*\]\) A::g_points = {", "(x = 1, y = 2)", "(x = 11, y = 22)", ], diff --git a/lldb/test/API/lang/cpp/class_types/TestClassTypes.py b/lldb/test/API/lang/cpp/class_types/TestClassTypes.py index 80781f5d90912..b5d5dd2370781 100644 --- a/lldb/test/API/lang/cpp/class_types/TestClassTypes.py +++ b/lldb/test/API/lang/cpp/class_types/TestClassTypes.py @@ -179,7 +179,7 @@ def test_with_expr_parser(self): self.expect( "expression this->m_c_int", VARIABLES_DISPLAYED_CORRECTLY, - patterns=["\(int\) \$[0-9]+ = 66"], + patterns=[r"\(int\) \$[0-9]+ = 66"], ) def test_with_constructor_name(self): diff --git a/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py b/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py index e016168f047c1..32ef009279713 100644 --- a/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py +++ b/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py @@ -129,7 +129,7 @@ def test_get_dynamic_vals(self): self.expect( "frame var -d run-target --ptr-depth=2 --show-types anotherA.m_client_A", "frame var finds its way into a child member", - patterns=["\(B \*\)"], + patterns=[r"\(B \*\)"], ) # Now make sure we also get it right for a reference as well: diff --git a/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py b/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py index 5cc43f3cd9910..8efa53bdbf722 100644 --- a/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py +++ b/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py @@ -23,7 +23,7 @@ def test_frame_recognizer(self): # We never hide the frame of the entry-point into the standard library, even # if the name starts with `__` which usually indicates an internal function. "ranges_sort_less(int, int)": [ - re.compile("ranges::__sort::(__fn::)?operator\(\)"), + re.compile(r"ranges::__sort::(__fn::)?operator\(\)"), "test_algorithms", ], # `ranges::views::transform` internally uses `std::invoke`, and that diff --git a/lldb/test/API/lang/cpp/namespace/TestNamespace.py b/lldb/test/API/lang/cpp/namespace/TestNamespace.py index 40cbff9cb3c94..d790002dea072 100644 --- a/lldb/test/API/lang/cpp/namespace/TestNamespace.py +++ b/lldb/test/API/lang/cpp/namespace/TestNamespace.py @@ -237,12 +237,12 @@ def test_with_run_command(self): self.expect( "expression myanonfunc", - patterns=["\(anonymous namespace\)::myanonfunc\(int\)"], + patterns=[r"\(anonymous namespace\)::myanonfunc\(int\)"], ) self.expect( "expression variadic_sum", - patterns=["\(anonymous namespace\)::variadic_sum\(int, ...\)"], + patterns=[r"\(anonymous namespace\)::variadic_sum\(int, ...\)"], ) self.expect_expr("::B::Bar b; b.x()", result_type="int", result_value="42") diff --git a/lldb/test/API/lang/cpp/signed_types/TestSignedTypes.py b/lldb/test/API/lang/cpp/signed_types/TestSignedTypes.py index a08c2e721328d..b8c2c23613868 100644 --- a/lldb/test/API/lang/cpp/signed_types/TestSignedTypes.py +++ b/lldb/test/API/lang/cpp/signed_types/TestSignedTypes.py @@ -57,8 +57,8 @@ def test(self): "frame variable --show-types --no-args", VARIABLES_DISPLAYED_CORRECTLY, patterns=[ - "\((short int|short)\) the_signed_short = 99", - "\((signed char|char)\) the_signed_char = 'c'", + r"\((short int|short)\) the_signed_short = 99", + r"\((signed char|char)\) the_signed_char = 'c'", ], substrs=[ "(int) the_signed_int = 99", diff --git a/lldb/test/API/lang/cpp/unsigned_types/TestUnsignedTypes.py b/lldb/test/API/lang/cpp/unsigned_types/TestUnsignedTypes.py index ffce9534e05bd..4796c84966675 100644 --- a/lldb/test/API/lang/cpp/unsigned_types/TestUnsignedTypes.py +++ b/lldb/test/API/lang/cpp/unsigned_types/TestUnsignedTypes.py @@ -22,7 +22,7 @@ def test(self): "frame variable --show-types --no-args", VARIABLES_DISPLAYED_CORRECTLY, patterns=[ - "\((short unsigned int|unsigned short)\) the_unsigned_short = 99" + r"\((short unsigned int|unsigned short)\) the_unsigned_short = 99" ], substrs=[ "(unsigned char) the_unsigned_char = 'c'", diff --git a/lldb/test/API/lang/mixed/TestMixedLanguages.py b/lldb/test/API/lang/mixed/TestMixedLanguages.py index 1637d59a5edcb..d7ab89e89ab7f 100644 --- a/lldb/test/API/lang/mixed/TestMixedLanguages.py +++ b/lldb/test/API/lang/mixed/TestMixedLanguages.py @@ -22,12 +22,12 @@ def cleanup(): self.addTearDownHook(cleanup) self.runCmd("settings show frame-format") - m = re.match('^frame-format \(format-string\) = "(.*)"$', self.res.GetOutput()) + m = re.match(r'^frame-format \(format-string\) = "(.*)"$', self.res.GetOutput()) self.assertTrue(m, "Bad settings string") self.format_string = m.group(1) # Change the default format to print the language. - format_string = "frame #${frame.index}: ${frame.pc}{ ${module.file.basename}\`${function.name}{${function.pc-offset}}}{, lang=${language}}\n" + format_string = "frame #${frame.index}: ${frame.pc}{ ${module.file.basename}\\`${function.name}{${function.pc-offset}}}{, lang=${language}}\n" self.runCmd("settings set frame-format %s" % format_string) self.expect( "settings show frame-format", diff --git a/lldb/test/API/lang/objc/foundation/TestObjCMethods.py b/lldb/test/API/lang/objc/foundation/TestObjCMethods.py index 634e4730c764b..5fa3f280d33bf 100644 --- a/lldb/test/API/lang/objc/foundation/TestObjCMethods.py +++ b/lldb/test/API/lang/objc/foundation/TestObjCMethods.py @@ -166,7 +166,7 @@ def test_data_type_and_expr(self): "frame variable --show-types --scope", VARIABLES_DISPLAYED_CORRECTLY, substrs=["ARG: (MyString *) self"], - patterns=["ARG: \(.*\) _cmd", "(objc_selector *)|(SEL)"], + patterns=[r"ARG: \(.*\) _cmd", "(objc_selector *)|(SEL)"], ) # rdar://problem/8651752 diff --git a/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSArray.py b/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSArray.py index 81c409d08364c..2cd1386289b9e 100644 --- a/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSArray.py +++ b/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSArray.py @@ -19,10 +19,10 @@ def test_NSArray_expr_commands(self): self.runCmd("thread backtrace") self.expect( - "expression (int)[nil_mutable_array count]", patterns=["\(int\) \$.* = 0"] + "expression (int)[nil_mutable_array count]", patterns=[r"\(int\) \$.* = 0"] ) - self.expect("expression (int)[array1 count]", patterns=["\(int\) \$.* = 3"]) - self.expect("expression (int)[array2 count]", patterns=["\(int\) \$.* = 3"]) - self.expect("expression (int)array1.count", patterns=["\(int\) \$.* = 3"]) - self.expect("expression (int)array2.count", patterns=["\(int\) \$.* = 3"]) + self.expect("expression (int)[array1 count]", patterns=[r"\(int\) \$.* = 3"]) + self.expect("expression (int)[array2 count]", patterns=[r"\(int\) \$.* = 3"]) + self.expect("expression (int)array1.count", patterns=[r"\(int\) \$.* = 3"]) + self.expect("expression (int)array2.count", patterns=[r"\(int\) \$.* = 3"]) self.runCmd("process continue") diff --git a/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSError.py b/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSError.py index c9e801422b46c..a14035db5e057 100644 --- a/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSError.py +++ b/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSError.py @@ -20,7 +20,7 @@ def test_runtime_types(self): # Test_NSString: self.runCmd("thread backtrace") - self.expect("expression [str length]", patterns=["\(NSUInteger\) \$.* ="]) + self.expect("expression [str length]", patterns=[r"\(NSUInteger\) \$.* ="]) self.expect("expression str.length") self.expect('expression str = [NSString stringWithCString: "new"]') self.expect( diff --git a/lldb/test/API/lang/objc/foundation/TestObjCMethodsString.py b/lldb/test/API/lang/objc/foundation/TestObjCMethodsString.py index 4d8c9c960ea59..3e33077b8e2d7 100644 --- a/lldb/test/API/lang/objc/foundation/TestObjCMethodsString.py +++ b/lldb/test/API/lang/objc/foundation/TestObjCMethodsString.py @@ -21,11 +21,11 @@ def test_NSString_expr_commands(self): # Test_NSString: self.runCmd("thread backtrace") - self.expect("expression (int)[str length]", patterns=["\(int\) \$.* ="]) - self.expect("expression (int)[str_id length]", patterns=["\(int\) \$.* ="]) - self.expect("expression (id)[str description]", patterns=["\(id\) \$.* = 0x"]) + self.expect("expression (int)[str length]", patterns=[r"\(int\) \$.* ="]) + self.expect("expression (int)[str_id length]", patterns=[r"\(int\) \$.* ="]) + self.expect("expression (id)[str description]", patterns=[r"\(id\) \$.* = 0x"]) self.expect( - "expression (id)[str_id description]", patterns=["\(id\) \$.* = 0x"] + "expression (id)[str_id description]", patterns=[r"\(id\) \$.* = 0x"] ) self.expect("expression str.length") self.expect('expression str = @"new"') @@ -42,6 +42,6 @@ def test_MyString_dump_with_runtime(self): ) self.expect( "expression --show-types -- *my", - patterns=["\(MyString\) \$.* = ", "\(MyBase\)"], + patterns=[r"\(MyString\) \$.* = ", r"\(MyBase\)"], ) self.runCmd("process continue") diff --git a/lldb/test/API/lang/objc/objc-dynamic-value/TestObjCDynamicValue.py b/lldb/test/API/lang/objc/objc-dynamic-value/TestObjCDynamicValue.py index 3ba68f4c35a5c..4d439fa0046e4 100644 --- a/lldb/test/API/lang/objc/objc-dynamic-value/TestObjCDynamicValue.py +++ b/lldb/test/API/lang/objc/objc-dynamic-value/TestObjCDynamicValue.py @@ -107,7 +107,7 @@ def test_get_objc_dynamic_vals(self): self.expect( "frame var -d run-target myObserver->_source", "frame var finds its way into a child member", - patterns=["\(SourceDerived \*\)"], + patterns=[r"\(SourceDerived \*\)"], ) # check that our ObjC GetISA() does a good job at hiding KVO swizzled diff --git a/lldb/test/API/lang/objcxx/objc-builtin-types/TestObjCBuiltinTypes.py b/lldb/test/API/lang/objcxx/objc-builtin-types/TestObjCBuiltinTypes.py index 3cdca31b8969b..ac107de7e00a7 100644 --- a/lldb/test/API/lang/objcxx/objc-builtin-types/TestObjCBuiltinTypes.py +++ b/lldb/test/API/lang/objcxx/objc-builtin-types/TestObjCBuiltinTypes.py @@ -51,11 +51,11 @@ def test_with_python_api(self): frame = thread_list[0].GetFrameAtIndex(0) self.assertTrue(frame, "Got a valid frame 0 frame.") - self.expect("expr (foo)", patterns=["\(ns::id\) \$.* = 0"]) + self.expect("expr (foo)", patterns=[r"\(ns::id\) \$.* = 0"]) self.expect( "expr --language Objective-C++ -- id my_id = 0; my_id", - patterns=["\(id\) \$.* = nil"], + patterns=[r"\(id\) \$.* = nil"], ) self.expect("expr --language C++ -- id my_id = 0; my_id", error=True) diff --git a/lldb/test/API/linux/aarch64/mte_core_file/TestAArch64LinuxMTEMemoryTagCoreFile.py b/lldb/test/API/linux/aarch64/mte_core_file/TestAArch64LinuxMTEMemoryTagCoreFile.py index 6309648819026..a9879f67d8b8f 100644 --- a/lldb/test/API/linux/aarch64/mte_core_file/TestAArch64LinuxMTEMemoryTagCoreFile.py +++ b/lldb/test/API/linux/aarch64/mte_core_file/TestAArch64LinuxMTEMemoryTagCoreFile.py @@ -88,9 +88,9 @@ def test_mte_tag_core_file_tag_read(self): "memory tag read {addr}+16 {addr}".format(addr=self.MTE_BUF_ADDR), error=True, patterns=[ - "error: End address \(0x[A-Fa-f0-9]+\) " + r"error: End address \(0x[A-Fa-f0-9]+\) " "must be greater than the start address " - "\(0x[A-Fa-f0-9]+\)" + r"\(0x[A-Fa-f0-9]+\)" ], ) @@ -100,8 +100,8 @@ def test_mte_tag_core_file_tag_read(self): "memory tag read {addr} {addr}+32".format(addr=self.MTE_BUF_ADDR), patterns=[ "Allocation tags:\n" - "\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\): 0x0\n" - "\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\): 0x1 \(mismatch\)$" + "\\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\\): 0x0\n" + "\\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\\): 0x1 \\(mismatch\\)$" ], ) @@ -110,7 +110,7 @@ def test_mte_tag_core_file_tag_read(self): self.expect( "memory tag read {addr} {addr}+16".format(addr=self.MTE_BUF_ADDR), patterns=[ - "Allocation tags:\n" "\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\): 0x0$" + "Allocation tags:\n" r"\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\): 0x0$" ], ) # Get the other half of the first byte. @@ -119,7 +119,7 @@ def test_mte_tag_core_file_tag_read(self): "memory tag read {addr}+16 {addr}+32".format(addr=self.MTE_BUF_ADDR), patterns=[ "Allocation tags:\n" - "\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\): 0x1 \(mismatch\)$" + r"\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\): 0x1 \(mismatch\)$" ], ) @@ -128,18 +128,18 @@ def test_mte_tag_core_file_tag_read(self): "memory tag read {addr} {addr}+48".format(addr=self.MTE_BUF_ADDR), patterns=[ "Allocation tags:\n" - "\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\): 0x0\n" - "\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\): 0x1 \(mismatch\)\n" - "\[0x[A-Fa-f0-9]+20, 0x[A-Fa-f0-9]+30\): 0x2 \(mismatch\)$" + "\\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\\): 0x0\n" + "\\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\\): 0x1 \\(mismatch\\)\n" + "\\[0x[A-Fa-f0-9]+20, 0x[A-Fa-f0-9]+30\\): 0x2 \\(mismatch\\)$" ], ) self.expect( "memory tag read {addr}+16 {addr}+64".format(addr=self.MTE_BUF_ADDR), patterns=[ "Allocation tags:\n" - "\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\): 0x1 \(mismatch\)\n" - "\[0x[A-Fa-f0-9]+20, 0x[A-Fa-f0-9]+30\): 0x2 \(mismatch\)\n" - "\[0x[A-Fa-f0-9]+30, 0x[A-Fa-f0-9]+40\): 0x3 \(mismatch\)$" + "\\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\\): 0x1 \\(mismatch\\)\n" + "\\[0x[A-Fa-f0-9]+20, 0x[A-Fa-f0-9]+30\\): 0x2 \\(mismatch\\)\n" + "\\[0x[A-Fa-f0-9]+30, 0x[A-Fa-f0-9]+40\\): 0x3 \\(mismatch\\)$" ], ) # Here both start and end are unaligned. @@ -147,10 +147,10 @@ def test_mte_tag_core_file_tag_read(self): "memory tag read {addr}+16 {addr}+80".format(addr=self.MTE_BUF_ADDR), patterns=[ "Allocation tags:\n" - "\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\): 0x1 \(mismatch\)\n" - "\[0x[A-Fa-f0-9]+20, 0x[A-Fa-f0-9]+30\): 0x2 \(mismatch\)\n" - "\[0x[A-Fa-f0-9]+30, 0x[A-Fa-f0-9]+40\): 0x3 \(mismatch\)\n" - "\[0x[A-Fa-f0-9]+40, 0x[A-Fa-f0-9]+50\): 0x4 \(mismatch\)$" + "\\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\\): 0x1 \\(mismatch\\)\n" + "\\[0x[A-Fa-f0-9]+20, 0x[A-Fa-f0-9]+30\\): 0x2 \\(mismatch\\)\n" + "\\[0x[A-Fa-f0-9]+30, 0x[A-Fa-f0-9]+40\\): 0x3 \\(mismatch\\)\n" + "\\[0x[A-Fa-f0-9]+40, 0x[A-Fa-f0-9]+50\\): 0x4 \\(mismatch\\)$" ], ) @@ -159,7 +159,7 @@ def test_mte_tag_core_file_tag_read(self): self.expect( "memory tag read {addr} {addr}+1".format(addr=self.MTE_BUF_ADDR), patterns=[ - "Allocation tags:\n" "\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\): 0x0$" + "Allocation tags:\n" r"\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\): 0x0$" ], ) @@ -169,8 +169,8 @@ def test_mte_tag_core_file_tag_read(self): "memory tag read {addr} {addr}+17".format(addr=self.MTE_BUF_ADDR), patterns=[ "Allocation tags:\n" - "\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\): 0x0\n" - "\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\): 0x1 \(mismatch\)$" + "\\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\\): 0x0\n" + "\\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\\): 0x1 \\(mismatch\\)$" ], ) @@ -179,9 +179,9 @@ def test_mte_tag_core_file_tag_read(self): "memory tag read {addr} {addr}+33".format(addr=self.MTE_BUF_ADDR), patterns=[ "Allocation tags:\n" - "\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\): 0x0\n" - "\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\): 0x1 \(mismatch\)\n", - "\[0x[A-Fa-f0-9]+20, 0x[A-Fa-f0-9]+30\): 0x2 \(mismatch\)$", + "\\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\\): 0x0\n" + "\\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\\): 0x1 \\(mismatch\\)\n", + "\\[0x[A-Fa-f0-9]+20, 0x[A-Fa-f0-9]+30\\): 0x2 \\(mismatch\\)$", ], ) diff --git a/lldb/test/API/linux/aarch64/mte_tag_access/TestAArch64LinuxMTEMemoryTagAccess.py b/lldb/test/API/linux/aarch64/mte_tag_access/TestAArch64LinuxMTEMemoryTagAccess.py index c72eb72b05cce..8a76d6c6c40c3 100644 --- a/lldb/test/API/linux/aarch64/mte_tag_access/TestAArch64LinuxMTEMemoryTagAccess.py +++ b/lldb/test/API/linux/aarch64/mte_tag_access/TestAArch64LinuxMTEMemoryTagAccess.py @@ -86,8 +86,8 @@ def test_mte_tag_read(self): self.expect( "memory tag read mte_buf mte_buf-16", patterns=[ - "error: End address \(0x[A-Fa-f0-9]+\) must be " - "greater than the start address \(0x[A-Fa-f0-9]+\)" + r"error: End address \(0x[A-Fa-f0-9]+\) must be " + r"greater than the start address \(0x[A-Fa-f0-9]+\)" ], error=True, ) @@ -95,8 +95,8 @@ def test_mte_tag_read(self): self.expect( "memory tag read mte_buf mte_buf", patterns=[ - "error: End address \(0x[A-Fa-f0-9]+\) must be " - "greater than the start address \(0x[A-Fa-f0-9]+\)" + r"error: End address \(0x[A-Fa-f0-9]+\) must be " + r"greater than the start address \(0x[A-Fa-f0-9]+\)" ], error=True, ) @@ -117,7 +117,7 @@ def test_mte_tag_read(self): patterns=[ "Logical tag: 0x9\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x0 \(mismatch\)$" + r"\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x0 \(mismatch\)$" ], ) @@ -127,7 +127,7 @@ def test_mte_tag_read(self): patterns=[ "Logical tag: 0x9\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x0 \(mismatch\)$" + r"\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x0 \(mismatch\)$" ], ) @@ -137,8 +137,8 @@ def test_mte_tag_read(self): patterns=[ "Logical tag: 0x9\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x0 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\): 0x1 \(mismatch\)$" + "\\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\\): 0x0 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\\): 0x1 \\(mismatch\\)$" ], ) @@ -150,7 +150,7 @@ def test_mte_tag_read(self): patterns=[ "Logical tag: 0x0\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+, 0x[0-9A-Fa-f]+\): 0x0$" + r"\[0x[0-9A-Fa-f]+, 0x[0-9A-Fa-f]+\): 0x0$" ], ) @@ -179,8 +179,8 @@ def test_mte_tag_read(self): patterns=[ "Logical tag: 0x9\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+f0, 0x[0-9A-Fa-f]+00\): 0xf \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x0 \(mismatch\)$" + "\\[0x[0-9A-Fa-f]+f0, 0x[0-9A-Fa-f]+00\\): 0xf \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\\): 0x0 \\(mismatch\\)$" ], ) @@ -192,7 +192,7 @@ def test_mte_tag_read(self): patterns=[ "Logical tag: 0xa\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x0 \(mismatch\)$" + r"\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x0 \(mismatch\)$" ], ) @@ -202,9 +202,9 @@ def test_mte_tag_read(self): patterns=[ "Logical tag: 0x9\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+80, 0x[0-9A-Fa-f]+90\): 0x8 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+90, 0x[0-9A-Fa-f]+a0\): 0x9\n" - "\[0x[0-9A-Fa-f]+a0, 0x[0-9A-Fa-f]+b0\): 0xa \(mismatch\)$" + "\\[0x[0-9A-Fa-f]+80, 0x[0-9A-Fa-f]+90\\): 0x8 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+90, 0x[0-9A-Fa-f]+a0\\): 0x9\n" + "\\[0x[0-9A-Fa-f]+a0, 0x[0-9A-Fa-f]+b0\\): 0xa \\(mismatch\\)$" ], ) @@ -258,8 +258,8 @@ def test_mte_tag_write(self): patterns=[ "Logical tag: 0x9\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x9\n" - "\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\): 0x1 \(mismatch\)$" + "\\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\\): 0x9\n" + "\\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\\): 0x1 \\(mismatch\\)$" ], ) @@ -270,9 +270,9 @@ def test_mte_tag_write(self): patterns=[ "Logical tag: 0x9\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0xa \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\): 0xb \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+20, 0x[0-9A-Fa-f]+30\): 0xc \(mismatch\)$" + "\\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\\): 0xa \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\\): 0xb \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+20, 0x[0-9A-Fa-f]+30\\): 0xc \\(mismatch\\)$" ], ) @@ -284,7 +284,7 @@ def test_mte_tag_write(self): patterns=[ "Logical tag: 0x0\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+, 0x[0-9A-Fa-f]+\): 0xe \(mismatch\)$" + r"\[0x[0-9A-Fa-f]+, 0x[0-9A-Fa-f]+\): 0xe \(mismatch\)$" ], ) @@ -323,8 +323,8 @@ def test_mte_tag_write(self): patterns=[ "Logical tag: 0x9\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+f0, 0x[0-9A-Fa-f]+00\): 0x1 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x2 \(mismatch\)$" + "\\[0x[0-9A-Fa-f]+f0, 0x[0-9A-Fa-f]+00\\): 0x1 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\\): 0x2 \\(mismatch\\)$" ], ) @@ -335,7 +335,7 @@ def test_mte_tag_write(self): patterns=[ "Logical tag: 0x0\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x1 \(mismatch\)$" + r"\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x1 \(mismatch\)$" ], ) @@ -361,16 +361,16 @@ def test_mte_tag_write(self): self.expect( "memory tag write mte_buf_2 9 --end-addr mte_buf_2", patterns=[ - "error: End address \(0x[A-Fa-f0-9]+\) must be " - "greater than the start address \(0x[A-Fa-f0-9]+\)" + r"error: End address \(0x[A-Fa-f0-9]+\) must be " + r"greater than the start address \(0x[A-Fa-f0-9]+\)" ], error=True, ) self.expect( "memory tag write mte_buf_2 9 --end-addr mte_buf_2-16", patterns=[ - "error: End address \(0x[A-Fa-f0-9]+\) must be " - "greater than the start address \(0x[A-Fa-f0-9]+\)" + r"error: End address \(0x[A-Fa-f0-9]+\) must be " + r"greater than the start address \(0x[A-Fa-f0-9]+\)" ], error=True, ) @@ -391,10 +391,10 @@ def test_mte_tag_write(self): patterns=[ "Logical tag: 0x0\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x4 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\): 0x5 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+20, 0x[0-9A-Fa-f]+30\): 0x4 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+30, 0x[0-9A-Fa-f]+40\): 0x0$" + "\\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\\): 0x4 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\\): 0x5 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+20, 0x[0-9A-Fa-f]+30\\): 0x4 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+30, 0x[0-9A-Fa-f]+40\\): 0x0$" ], ) @@ -409,9 +409,9 @@ def test_mte_tag_write(self): patterns=[ "Logical tag: 0x0\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x6 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\): 0x6 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+20, 0x[0-9A-Fa-f]+30\): 0x4 \(mismatch\)$" + "\\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\\): 0x6 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\\): 0x6 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+20, 0x[0-9A-Fa-f]+30\\): 0x4 \\(mismatch\\)$" ], ) @@ -423,10 +423,10 @@ def test_mte_tag_write(self): patterns=[ "Logical tag: 0x0\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x3 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\): 0x3 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+20, 0x[0-9A-Fa-f]+30\): 0x3 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+30, 0x[0-9A-Fa-f]+40\): 0x0$" + "\\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\\): 0x3 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\\): 0x3 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+20, 0x[0-9A-Fa-f]+30\\): 0x3 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+30, 0x[0-9A-Fa-f]+40\\): 0x0$" ], ) @@ -452,8 +452,8 @@ def test_mte_memory_read_tag_display(self): self.expect( 'memory read mte_buf mte_buf+32 -f "x" -l 1 -s 16 --show-tags', patterns=[ - "0x[0-9A-Fa-f]+00: 0x0+ \(tag: 0x0\)\n" - "0x[0-9A-Fa-f]+10: 0x0+ \(tag: 0x1\)" + "0x[0-9A-Fa-f]+00: 0x0+ \\(tag: 0x0\\)\n" + "0x[0-9A-Fa-f]+10: 0x0+ \\(tag: 0x1\\)" ], ) @@ -461,13 +461,13 @@ def test_mte_memory_read_tag_display(self): # per line. self.expect( 'memory read mte_buf mte_buf+32 -f "x" -l 1 -s 32 --show-tags', - patterns=["0x[0-9A-Fa-f]+00: 0x0+ \(tags: 0x0 0x1\)\n"], + patterns=["0x[0-9A-Fa-f]+00: 0x0+ \\(tags: 0x0 0x1\\)\n"], ) # Reading half a granule still shows you the tag for that granule self.expect( 'memory read mte_buf mte_buf+8 -f "x" -l 1 -s 8 --show-tags', - patterns=["0x[0-9A-Fa-f]+00: 0x0+ \(tag: 0x0\)\n"], + patterns=["0x[0-9A-Fa-f]+00: 0x0+ \\(tag: 0x0\\)\n"], ) # We can read a whole number of granules but split them over more lines @@ -475,10 +475,10 @@ def test_mte_memory_read_tag_display(self): self.expect( 'memory read mte_buf+32 mte_buf+64 -f "x" -l 1 -s 8 --show-tags', patterns=[ - "0x[0-9A-Fa-f]+20: 0x0+ \(tag: 0x2\)\n" - "0x[0-9A-Fa-f]+28: 0x0+ \(tag: 0x2\)\n" - "0x[0-9A-Fa-f]+30: 0x0+ \(tag: 0x3\)\n" - "0x[0-9A-Fa-f]+38: 0x0+ \(tag: 0x3\)" + "0x[0-9A-Fa-f]+20: 0x0+ \\(tag: 0x2\\)\n" + "0x[0-9A-Fa-f]+28: 0x0+ \\(tag: 0x2\\)\n" + "0x[0-9A-Fa-f]+30: 0x0+ \\(tag: 0x3\\)\n" + "0x[0-9A-Fa-f]+38: 0x0+ \\(tag: 0x3\\)" ], ) @@ -488,10 +488,10 @@ def test_mte_memory_read_tag_display(self): self.expect( 'memory read mte_buf+32+8 mte_buf+64+8 -f "x" -l 1 -s 8 --show-tags', patterns=[ - "0x[0-9A-Fa-f]+28: 0x0+ \(tag: 0x2\)\n" - "0x[0-9A-Fa-f]+30: 0x0+ \(tag: 0x3\)\n" - "0x[0-9A-Fa-f]+38: 0x0+ \(tag: 0x3\)\n" - "0x[0-9A-Fa-f]+40: 0x0+ \(tag: 0x4\)" + "0x[0-9A-Fa-f]+28: 0x0+ \\(tag: 0x2\\)\n" + "0x[0-9A-Fa-f]+30: 0x0+ \\(tag: 0x3\\)\n" + "0x[0-9A-Fa-f]+38: 0x0+ \\(tag: 0x3\\)\n" + "0x[0-9A-Fa-f]+40: 0x0+ \\(tag: 0x4\\)" ], ) @@ -501,10 +501,10 @@ def test_mte_memory_read_tag_display(self): self.expect( 'memory read mte_buf+32+4 mte_buf+64+4 -f "x" -l 1 -s 8 --show-tags', patterns=[ - "0x[0-9A-Fa-f]+24: 0x0+ \(tag: 0x2\)\n" - "0x[0-9A-Fa-f]+2c: 0x0+ \(tags: 0x2 0x3\)\n" - "0x[0-9A-Fa-f]+34: 0x0+ \(tag: 0x3\)\n" - "0x[0-9A-Fa-f]+3c: 0x0+ \(tags: 0x3 0x4\)" + "0x[0-9A-Fa-f]+24: 0x0+ \\(tag: 0x2\\)\n" + "0x[0-9A-Fa-f]+2c: 0x0+ \\(tags: 0x2 0x3\\)\n" + "0x[0-9A-Fa-f]+34: 0x0+ \\(tag: 0x3\\)\n" + "0x[0-9A-Fa-f]+3c: 0x0+ \\(tags: 0x3 0x4\\)" ], ) @@ -516,15 +516,17 @@ def test_mte_memory_read_tag_display(self): 'memory read mte_buf-16 mte_buf+32 -f "x" -l 1 -s 16 --show-tags', patterns=[ "0x[0-9A-Fa-f]+f0: 0x0+\n" - "0x[0-9A-Fa-f]+00: 0x0+ \(tag: 0x0\)\n" - "0x[0-9A-Fa-f]+10: 0x0+ \(tag: 0x1\)" + "0x[0-9A-Fa-f]+00: 0x0+ \\(tag: 0x0\\)\n" + "0x[0-9A-Fa-f]+10: 0x0+ \\(tag: 0x1\\)" ], ) # End of range is untagged self.expect( 'memory read mte_buf+page_size-16 mte_buf+page_size+16 -f "x" -l 1 -s 16 --show-tags', - patterns=["0x[0-9A-Fa-f]+f0: 0x0+ \(tag: 0xf\)\n" "0x[0-9A-Fa-f]+00: 0x0+"], + patterns=[ + "0x[0-9A-Fa-f]+f0: 0x0+ \\(tag: 0xf\\)\n" "0x[0-9A-Fa-f]+00: 0x0+" + ], ) # The smallest MTE range we can get is a single page so we just check @@ -533,8 +535,8 @@ def test_mte_memory_read_tag_display(self): self.expect( 'memory read mte_read_only-16 mte_read_only+page_size+16 -f "x" -l 1 -s 16 --force --show-tags', patterns=[ - "0x[0-9A-Fa-f]+f0: 0x0+\n" "0x[0-9A-Fa-f]+00: 0x0+ \(tag: 0x0\)\n", - "0x[0-9A-Fa-f]+f0: 0x0+ \(tag: 0x0\)\n" "0x[0-9A-Fa-f]+00: 0x0+", + "0x[0-9A-Fa-f]+f0: 0x0+\n" "0x[0-9A-Fa-f]+00: 0x0+ \\(tag: 0x0\\)\n", + "0x[0-9A-Fa-f]+f0: 0x0+ \\(tag: 0x0\\)\n" "0x[0-9A-Fa-f]+00: 0x0+", ], ) @@ -542,21 +544,21 @@ def test_mte_memory_read_tag_display(self): # is shown in where the tag would be, to keep the order intact. self.expect( 'memory read mte_buf-16 mte_buf+32 -f "x" -l 1 -s 32 --show-tags', - patterns=["0x[0-9A-Fa-f]+f0: 0x0+ \(tags: 0x0\)"], + patterns=[r"0x[0-9A-Fa-f]+f0: 0x0+ \(tags: 0x0\)"], ) self.expect( 'memory read mte_read_only+page_size-16 mte_read_only+page_size+16 -f "x" -l 1 -s 32 --show-tags', - patterns=["0x[0-9A-Fa-f]+f0: 0x0+ \(tags: 0x0 \)"], + patterns=[r"0x[0-9A-Fa-f]+f0: 0x0+ \(tags: 0x0 \)"], ) # Here the start address is unaligned so we cover 3 granules instead of 2 self.expect( 'memory read mte_buf-16+4 mte_buf+32+4 -f "x" -l 1 -s 32 --show-tags', - patterns=["0x[0-9A-Fa-f]+f4: 0x0+ \(tags: 0x0 0x1\)"], + patterns=[r"0x[0-9A-Fa-f]+f4: 0x0+ \(tags: 0x0 0x1\)"], ) self.expect( 'memory read mte_read_only+page_size-16+4 mte_read_only+page_size+16+4 -f "x" -l 1 -s 32 --show-tags', - patterns=["0x[0-9A-Fa-f]+f4: 0x0+ \(tags: 0x0 \)"], + patterns=[r"0x[0-9A-Fa-f]+f4: 0x0+ \(tags: 0x0 \)"], ) # Some formats call DumpDataExtractor multiple times, @@ -564,24 +566,24 @@ def test_mte_memory_read_tag_display(self): self.expect( 'memory read mte_buf mte_buf+32 -f "x" --show-tags', patterns=[ - "0x[0-9A-Fa-f]+00: 0x0+ 0x0+ 0x0+ 0x0+ \(tag: 0x0\)\n", - "0x[0-9A-Fa-f]+10: 0x0+ 0x0+ 0x0+ 0x0+ \(tag: 0x1\)", + "0x[0-9A-Fa-f]+00: 0x0+ 0x0+ 0x0+ 0x0+ \\(tag: 0x0\\)\n", + "0x[0-9A-Fa-f]+10: 0x0+ 0x0+ 0x0+ 0x0+ \\(tag: 0x1\\)", ], ) self.expect( 'memory read mte_buf mte_buf+32 -f "bytes with ASCII" --show-tags', patterns=[ - "0x[0-9A-Fa-f]+00: (00 ){16} \.{16} \(tag: 0x0\)\n", - "0x[0-9A-Fa-f]+10: (00 ){16} \.{16} \(tag: 0x1\)", + "0x[0-9A-Fa-f]+00: (00 ){16} \\.{16} \\(tag: 0x0\\)\n", + "0x[0-9A-Fa-f]+10: (00 ){16} \\.{16} \\(tag: 0x1\\)", ], ) self.expect( 'memory read mte_buf mte_buf+32 -f "uint8_t[]" -s 16 -l 1 --show-tags', patterns=[ - "0x[0-9A-Fa-f]+00: \{(0x00 ){15}0x00\} \(tag: 0x0\)\n" - "0x[0-9A-Fa-f]+10: \{(0x00 ){15}0x00\} \(tag: 0x1\)" + "0x[0-9A-Fa-f]+00: \\{(0x00 ){15}0x00\\} \\(tag: 0x0\\)\n" + "0x[0-9A-Fa-f]+10: \\{(0x00 ){15}0x00\\} \\(tag: 0x1\\)" ], ) @@ -594,12 +596,12 @@ def test_mte_memory_read_tag_display_repeated(self): self.expect( 'memory read mte_buf mte_buf+16 -f "x" --show-tags', - patterns=["0x[0-9A-fa-f]+00: 0x0+ 0x0+ 0x0+ 0x0+ \(tag: 0x0\)"], + patterns=[r"0x[0-9A-fa-f]+00: 0x0+ 0x0+ 0x0+ 0x0+ \(tag: 0x0\)"], ) # Equivalent to just pressing enter on the command line. self.expect( "memory read", - patterns=["0x[0-9A-fa-f]+10: 0x0+ 0x0+ 0x0+ 0x0+ \(tag: 0x1\)"], + patterns=[r"0x[0-9A-fa-f]+10: 0x0+ 0x0+ 0x0+ 0x0+ \(tag: 0x1\)"], ) # You can add the argument to an existing repetition without resetting @@ -613,10 +615,10 @@ def test_mte_memory_read_tag_display_repeated(self): # Note that the formatting returns to default here. self.expect( "memory read --show-tags", - patterns=["0x[0-9A-fa-f]+20: (00 )+ \.+ \(tag: 0x2\)"], + patterns=[r"0x[0-9A-fa-f]+20: (00 )+ \.+ \(tag: 0x2\)"], ) self.expect( - "memory read", patterns=["0x[0-9A-fa-f]+30: (00 )+ \.+ \(tag: 0x3\)"] + "memory read", patterns=[r"0x[0-9A-fa-f]+30: (00 )+ \.+ \(tag: 0x3\)"] ) # A fresh command reverts to the default of tags being off. @@ -641,8 +643,8 @@ def test_mte_memory_find(self): cmd = 'memory find -s "LLDB" mte_buf+64 mte_buf+512' found_pattern = "data found at location: 0x[0-9A-Fa-f]+80" results_patterns = [ - "0x[0-9A-Fa-f]+80: 4c 4c 44 42 (00 )+ LLDB\.+", - "0x[0-9A-Fa-f]+90: 00 00 00 00 (00 )+ \.+", + r"0x[0-9A-Fa-f]+80: 4c 4c 44 42 (00 )+ LLDB\.+", + r"0x[0-9A-Fa-f]+90: 00 00 00 00 (00 )+ \.+", ] # Default is not to show tags @@ -651,8 +653,8 @@ def test_mte_memory_find(self): cmd + " --show-tags", patterns=[ found_pattern, - results_patterns[0] + " \(tag: 0x8\)", - results_patterns[1] + " \(tag: 0x9\)", + results_patterns[0] + r" \(tag: 0x8\)", + results_patterns[1] + r" \(tag: 0x9\)", ], ) @@ -661,7 +663,7 @@ def test_mte_memory_find(self): 'memory find -s "DB" mte_buf+64 mte_buf+512 --show-tags', patterns=[ "data found at location: 0x[0-9A-Fa-f]+82\n" - "0x[0-9A-Fa-f]+82: 44 42 (00 )+ DB\.+ \(tags: 0x8 0x9\)\n", - "0x[0-9A-Fa-f]+92: 00 00 (00 )+ ..\.+ \(tags: 0x9 0xa\)", + "0x[0-9A-Fa-f]+82: 44 42 (00 )+ DB\\.+ \\(tags: 0x8 0x9\\)\n", + "0x[0-9A-Fa-f]+92: 00 00 (00 )+ ..\\.+ \\(tags: 0x9 0xa\\)", ], ) diff --git a/lldb/test/API/linux/aarch64/mte_tag_faults/TestAArch64LinuxMTEMemoryTagFaults.py b/lldb/test/API/linux/aarch64/mte_tag_faults/TestAArch64LinuxMTEMemoryTagFaults.py index 2d6470505cf7c..331c32749e32c 100644 --- a/lldb/test/API/linux/aarch64/mte_tag_faults/TestAArch64LinuxMTEMemoryTagFaults.py +++ b/lldb/test/API/linux/aarch64/mte_tag_faults/TestAArch64LinuxMTEMemoryTagFaults.py @@ -50,9 +50,9 @@ def test_mte_tag_fault_sync(self): self.expect( "continue", patterns=[ - "\* thread #1, name = 'a.out', stop reason = signal SIGSEGV: " - "sync tag check fault \(fault address=0x9[0-9A-Fa-f]+11\ " - "logical tag=0x9 allocation tag=0xa\)" + r"\* thread #1, name = 'a.out', stop reason = signal SIGSEGV: " + r"sync tag check fault \(fault address=0x9[0-9A-Fa-f]+11\ " + r"logical tag=0x9 allocation tag=0xa\)" ], ) diff --git a/lldb/test/API/linux/aarch64/tagged_memory_region/TestAArch64LinuxTaggedMemoryRegion.py b/lldb/test/API/linux/aarch64/tagged_memory_region/TestAArch64LinuxTaggedMemoryRegion.py index 8eadd65466a49..509dae3aed855 100644 --- a/lldb/test/API/linux/aarch64/tagged_memory_region/TestAArch64LinuxTaggedMemoryRegion.py +++ b/lldb/test/API/linux/aarch64/tagged_memory_region/TestAArch64LinuxTaggedMemoryRegion.py @@ -39,7 +39,8 @@ def test_mte_regions(self): # Despite the non address bits we should find a region self.expect( - "memory region the_page", patterns=["\[0x[0-9A-Fa-f]+-0x[0-9A-Fa-f]+\) r-x"] + "memory region the_page", + patterns=[r"\[0x[0-9A-Fa-f]+-0x[0-9A-Fa-f]+\) r-x"], ) # Check that the usual error message is displayed after repeating @@ -68,5 +69,6 @@ def test_mte_regions(self): # This should not error, since the user supplied address overrides # the previous end address. self.expect( - "memory region the_page", patterns=["\[0x[0-9A-Fa-f]+-0x[0-9A-Fa-f]+\) r-x"] + "memory region the_page", + patterns=[r"\[0x[0-9A-Fa-f]+-0x[0-9A-Fa-f]+\) r-x"], ) diff --git a/lldb/test/API/macosx/add-dsym/TestAddDsymDownload.py b/lldb/test/API/macosx/add-dsym/TestAddDsymDownload.py index 52055f250a584..8ff72c9a74edd 100644 --- a/lldb/test/API/macosx/add-dsym/TestAddDsymDownload.py +++ b/lldb/test/API/macosx/add-dsym/TestAddDsymDownload.py @@ -6,7 +6,7 @@ @skipUnlessDarwin class AddDsymDownload(TestBase): - dwarfdump_uuid_regex = re.compile("UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") + dwarfdump_uuid_regex = re.compile(r"UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") def get_uuid(self): dwarfdump_cmd_output = subprocess.check_output( diff --git a/lldb/test/API/macosx/lc-note/firmware-corefile/TestFirmwareCorefiles.py b/lldb/test/API/macosx/lc-note/firmware-corefile/TestFirmwareCorefiles.py index db3074d7e7942..9309de4824ec4 100644 --- a/lldb/test/API/macosx/lc-note/firmware-corefile/TestFirmwareCorefiles.py +++ b/lldb/test/API/macosx/lc-note/firmware-corefile/TestFirmwareCorefiles.py @@ -285,7 +285,7 @@ def test_lc_note_main_bin_spec_os_plugin(self): for l in python_init: writer.write(l + "\n") - dwarfdump_uuid_regex = re.compile("UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") + dwarfdump_uuid_regex = re.compile(r"UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") dwarfdump_cmd_output = subprocess.check_output( ('/usr/bin/dwarfdump --uuid "%s"' % aout_exe), shell=True ).decode("utf-8") diff --git a/lldb/test/API/macosx/lc-note/kern-ver-str/TestKernVerStrLCNOTE.py b/lldb/test/API/macosx/lc-note/kern-ver-str/TestKernVerStrLCNOTE.py index d4366196c53c3..a3f9144572da5 100644 --- a/lldb/test/API/macosx/lc-note/kern-ver-str/TestKernVerStrLCNOTE.py +++ b/lldb/test/API/macosx/lc-note/kern-ver-str/TestKernVerStrLCNOTE.py @@ -32,7 +32,7 @@ def test_lc_note(self): lambda: os.environ.pop("LLDB_APPLE_DSYMFORUUID_EXECUTABLE", None) ) - dwarfdump_uuid_regex = re.compile("UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") + dwarfdump_uuid_regex = re.compile(r"UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") dwarfdump_cmd_output = subprocess.check_output( ('/usr/bin/dwarfdump --uuid "%s"' % self.test_exe), shell=True ).decode("utf-8") diff --git a/lldb/test/API/macosx/lc-note/multiple-binary-corefile/TestMultipleBinaryCorefile.py b/lldb/test/API/macosx/lc-note/multiple-binary-corefile/TestMultipleBinaryCorefile.py index 897eab23e05e2..28472cb787d4a 100644 --- a/lldb/test/API/macosx/lc-note/multiple-binary-corefile/TestMultipleBinaryCorefile.py +++ b/lldb/test/API/macosx/lc-note/multiple-binary-corefile/TestMultipleBinaryCorefile.py @@ -107,7 +107,7 @@ def test_corefile_binaries_dsymforuuid(self): ) ) - dwarfdump_uuid_regex = re.compile("UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") + dwarfdump_uuid_regex = re.compile(r"UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") dwarfdump_cmd_output = subprocess.check_output( ('/usr/bin/dwarfdump --uuid "%s"' % self.libtwo_exe), shell=True ).decode("utf-8") diff --git a/lldb/test/API/macosx/simulator/TestSimulatorPlatform.py b/lldb/test/API/macosx/simulator/TestSimulatorPlatform.py index a69f7a055c79b..faf2256b03a0d 100644 --- a/lldb/test/API/macosx/simulator/TestSimulatorPlatform.py +++ b/lldb/test/API/macosx/simulator/TestSimulatorPlatform.py @@ -72,7 +72,7 @@ def run_with(self, arch, os, vers, env, expected_load_command): self, "break here", lldb.SBFileSpec("hello.c") ) triple_re = "-".join([arch, "apple", os + vers + ".*"] + env_list) - self.expect("image list -b -t", patterns=["a\.out " + triple_re]) + self.expect("image list -b -t", patterns=[r"a\.out " + triple_re]) self.check_debugserver(log, os + env, vers) @skipIfAsan diff --git a/lldb/test/API/macosx/skinny-corefile/TestSkinnyCorefile.py b/lldb/test/API/macosx/skinny-corefile/TestSkinnyCorefile.py index 02ab856aabc6b..bc19c69df7620 100644 --- a/lldb/test/API/macosx/skinny-corefile/TestSkinnyCorefile.py +++ b/lldb/test/API/macosx/skinny-corefile/TestSkinnyCorefile.py @@ -42,7 +42,7 @@ def test_lc_note(self): lambda: os.environ.pop("LLDB_APPLE_DSYMFORUUID_EXECUTABLE", None) ) - dwarfdump_uuid_regex = re.compile("UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") + dwarfdump_uuid_regex = re.compile(r"UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") dwarfdump_cmd_output = subprocess.check_output( ('/usr/bin/dwarfdump --uuid "%s"' % self.aout_exe), shell=True ).decode("utf-8") diff --git a/lldb/test/API/python_api/address_range/TestAddressRange.py b/lldb/test/API/python_api/address_range/TestAddressRange.py index ae4b8c7c90ce4..3564022702497 100644 --- a/lldb/test/API/python_api/address_range/TestAddressRange.py +++ b/lldb/test/API/python_api/address_range/TestAddressRange.py @@ -191,7 +191,7 @@ def test_address_range_print_resolved(self): interp.HandleCommand(script, result, False) self.assertTrue(result.Succeeded(), "script command succeeded") # [0x1000-0x2000] // Resolved with target or addresses without sections - self.assertRegex(result.GetOutput(), "^\[0x[0-9a-f]+\-0x[0-9a-f]+\)") + self.assertRegex(result.GetOutput(), r"^\[0x[0-9a-f]+\-0x[0-9a-f]+\)") process.Kill() def test_address_range_print_no_section_resolved(self): @@ -215,7 +215,7 @@ def test_address_range_print_no_section_resolved(self): range_str = str(range) # [0x1000-0x2000] // Resolved with target or addresses without sections - self.assertRegex(range_str, "^\[0x[0-9a-f]+\-0x[0-9a-f]+\)$") + self.assertRegex(range_str, r"^\[0x[0-9a-f]+\-0x[0-9a-f]+\)$") process.Kill() def test_address_range_print_not_resolved(self): @@ -223,7 +223,7 @@ def test_address_range_print_not_resolved(self): range = lldb.SBAddressRange(self.addr1, 8) range_str = str(range) # a.out[0x1000-0x2000] // Without target - self.assertRegex(range_str, "^a.out\[0x[0-9a-f]+\-0x[0-9a-f]+\)$") + self.assertRegex(range_str, r"^a.out\[0x[0-9a-f]+\-0x[0-9a-f]+\)$") def test_address_range_list_print(self): """Make sure the SBAddressRangeList can be printed.""" diff --git a/lldb/test/API/python_api/target-arch-from-module/TestTargetArchFromModule.py b/lldb/test/API/python_api/target-arch-from-module/TestTargetArchFromModule.py index 260f37c9ed230..0141828ae1eab 100644 --- a/lldb/test/API/python_api/target-arch-from-module/TestTargetArchFromModule.py +++ b/lldb/test/API/python_api/target-arch-from-module/TestTargetArchFromModule.py @@ -34,7 +34,7 @@ def test_target_arch_init(self): lambda: os.environ.pop("LLDB_APPLE_DSYMFORUUID_EXECUTABLE", None) ) - dwarfdump_uuid_regex = re.compile("UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") + dwarfdump_uuid_regex = re.compile(r"UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") dwarfdump_cmd_output = subprocess.check_output( ('/usr/bin/dwarfdump --uuid "%s"' % aout_exe), shell=True ).decode("utf-8") diff --git a/lldb/test/API/source-manager/TestSourceManager.py b/lldb/test/API/source-manager/TestSourceManager.py index 7d9ce86cdc353..1283c73e152a9 100644 --- a/lldb/test/API/source-manager/TestSourceManager.py +++ b/lldb/test/API/source-manager/TestSourceManager.py @@ -256,7 +256,7 @@ def test_modify_source_file_while_debugging(self): # of breakpoints for the current line, i.e., self.line. import re - m = re.search("^\[(\d+)\].*// Set break point at this line.", output) + m = re.search(r"^\[(\d+)\].*// Set break point at this line.", output) if not m: self.fail("Fail to display source level breakpoints") self.assertGreater(int(m.group(1)), 0) diff --git a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py index e143c2798b209..9df44cc454d5d 100644 --- a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py +++ b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py @@ -44,7 +44,7 @@ def test_by_pid(self): """ Tests attaching to a process by process ID. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") self.process = subprocess.Popen( [program], @@ -60,7 +60,7 @@ def test_by_name(self): """ Tests attaching to a process by process name. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() orig_program = self.getBuildArtifact("a.out") # Since we are going to attach by process name, we need a unique # process name that has minimal chance to match a process that is @@ -101,7 +101,7 @@ def test_by_name_waitFor(self): next instance of a process to be launched, ingoring all current ones. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") self.spawn_thread = threading.Thread( target=spawn_and_wait, @@ -137,7 +137,7 @@ def test_commands(self): "terminateCommands" are a list of LLDB commands that get executed when the debugger session terminates. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") # Here we just create a target and launch the process as a way to test # if we are able to use attach commands to create any kind of a target @@ -211,7 +211,7 @@ def test_terminate_commands(self): Tests that the "terminateCommands", that can be passed during attach, are run when the debugger is disconnected. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") # Here we just create a target and launch the process as a way to test # if we are able to use attach commands to create any kind of a target diff --git a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attachByPortNum.py b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attachByPortNum.py index fbabc857bd0e0..9024120c868fd 100644 --- a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attachByPortNum.py +++ b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attachByPortNum.py @@ -60,7 +60,7 @@ def test_by_port(self): """ Tests attaching to a process by port. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") debug_server_tool = self.getBuiltinDebugServerTool() @@ -92,7 +92,7 @@ def test_by_port_and_pid(self): """ Tests attaching to a process by process ID and port number. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") # It is not necessary to launch "lldb-server" to obtain the actual port and pid for attaching. @@ -120,7 +120,7 @@ def test_by_invalid_port(self): """ Tests attaching to a process by invalid port number 0. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") port = 0 @@ -139,7 +139,7 @@ def test_by_illegal_port(self): """ Tests attaching to a process by illegal/greater port number 65536 """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") port = 65536 diff --git a/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py b/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py index a20384b75f5c0..11573eba06907 100644 --- a/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py +++ b/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py @@ -41,7 +41,7 @@ def test_breakpoint_events(self): foo_bp1_line = line_number("foo.cpp", "foo breakpoint 1") foo_bp2_line = line_number("foo.cpp", "foo breakpoint 2") - # Visual Studio Code Debug Adaptors have no way to specify the file + # Visual Studio Code Debug Adapters have no way to specify the file # without launching or attaching to a process, so we must start a # process in order to be able to set breakpoints. program = self.getBuildArtifact("a.out") diff --git a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py index c62feda64a125..26df2573555df 100644 --- a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py +++ b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py @@ -27,7 +27,7 @@ def test_source_map(self): with the corresponding source maps to have breakpoints and frames working. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() other_basename = "other-copy.c" other_path = self.getBuildArtifact(other_basename) @@ -100,7 +100,7 @@ def test_source_map(self): @skipIfWindows def test_set_and_clear(self): """Tests setting and clearing source file and line breakpoints. - This packet is a bit tricky on the debug adaptor side since there + This packet is a bit tricky on the debug adapter side since there is no "clearBreakpoints" packet. Source file and line breakpoints are set by sending a "setBreakpoints" packet with a source file specified and zero or more source lines. If breakpoints have been @@ -116,7 +116,7 @@ def test_set_and_clear(self): third_line = line_number("main.cpp", "break 14") lines = [first_line, third_line, second_line] - # Visual Studio Code Debug Adaptors have no way to specify the file + # Visual Studio Code Debug Adapters have no way to specify the file # without launching or attaching to a process, so we must start a # process in order to be able to set breakpoints. program = self.getBuildArtifact("a.out") @@ -257,7 +257,7 @@ def test_clear_breakpoints_unset_breakpoints(self): line_number("main.cpp", "break 13"), ] - # Visual Studio Code Debug Adaptors have no way to specify the file + # Visual Studio Code Debug Adapters have no way to specify the file # without launching or attaching to a process, so we must start a # process in order to be able to set breakpoints. program = self.getBuildArtifact("a.out") diff --git a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setExceptionBreakpoints.py b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setExceptionBreakpoints.py index b2ab12e51bf68..92ac66cd44c5d 100644 --- a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setExceptionBreakpoints.py +++ b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setExceptionBreakpoints.py @@ -14,7 +14,7 @@ class TestDAP_setExceptionBreakpoints(lldbdap_testcase.DAPTestCaseBase): @skipIfWindows def test_functionality(self): """Tests setting and clearing exception breakpoints. - This packet is a bit tricky on the debug adaptor side since there + This packet is a bit tricky on the debug adapter side since there is no "clear exception breakpoints" packet. Exception breakpoints are set by sending a "setExceptionBreakpoints" packet with zero or more exception filters. If exception breakpoints have been set @@ -26,7 +26,7 @@ def test_functionality(self): and the functionality of each breakpoint, like 'conditions' and x'hitCondition' settings. """ - # Visual Studio Code Debug Adaptors have no way to specify the file + # Visual Studio Code Debug Adapters have no way to specify the file # without launching or attaching to a process, so we must start a # process in order to be able to set breakpoints. program = self.getBuildArtifact("a.out") diff --git a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setFunctionBreakpoints.py b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setFunctionBreakpoints.py index 8f00f42574b56..946595f639edc 100644 --- a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setFunctionBreakpoints.py +++ b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setFunctionBreakpoints.py @@ -14,7 +14,7 @@ class TestDAP_setFunctionBreakpoints(lldbdap_testcase.DAPTestCaseBase): @skipIfWindows def test_set_and_clear(self): """Tests setting and clearing function breakpoints. - This packet is a bit tricky on the debug adaptor side since there + This packet is a bit tricky on the debug adapter side since there is no "clearFunction Breakpoints" packet. Function breakpoints are set by sending a "setFunctionBreakpoints" packet with zero or more function names. If function breakpoints have been set before, @@ -25,7 +25,7 @@ def test_set_and_clear(self): correctly. It doesn't test hitting breakpoints and the functionality of each breakpoint, like 'conditions' and 'hitCondition' settings. """ - # Visual Studio Code Debug Adaptors have no way to specify the file + # Visual Studio Code Debug Adapters have no way to specify the file # without launching or attaching to a process, so we must start a # process in order to be able to set breakpoints. program = self.getBuildArtifact("a.out") diff --git a/lldb/test/API/tools/lldb-dap/commands/TestDAP_commands.py b/lldb/test/API/tools/lldb-dap/commands/TestDAP_commands.py index e4cf903fc0d11..25ecbb5cf106b 100644 --- a/lldb/test/API/tools/lldb-dap/commands/TestDAP_commands.py +++ b/lldb/test/API/tools/lldb-dap/commands/TestDAP_commands.py @@ -75,7 +75,7 @@ def test_command_directive_abort_on_error_attach_commands(self): "settings set target.show-hex-variable-values-with-leading-zeroes false" ) command_abort_on_error = "settings set foo bar" - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() self.attach( program, attachCommands=["?!" + command_quiet, "!" + command_abort_on_error], diff --git a/lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py b/lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py index 5189435185607..1896acea15a99 100644 --- a/lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py +++ b/lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py @@ -18,7 +18,7 @@ def test_core_file(self): exe_file = os.path.join(current_dir, "linux-x86_64.out") core_file = os.path.join(current_dir, "linux-x86_64.core") - self.create_debug_adaptor() + self.create_debug_adapter() self.attach(exe_file, coreFile=core_file) expected_frames = [ @@ -64,7 +64,7 @@ def test_core_file_source_mapping_array(self): exe_file = os.path.join(current_dir, "linux-x86_64.out") core_file = os.path.join(current_dir, "linux-x86_64.core") - self.create_debug_adaptor() + self.create_debug_adapter() source_map = [["/home/labath/test", current_dir]] self.attach(exe_file, coreFile=core_file, sourceMap=source_map) @@ -78,7 +78,7 @@ def test_core_file_source_mapping_object(self): exe_file = os.path.join(current_dir, "linux-x86_64.out") core_file = os.path.join(current_dir, "linux-x86_64.core") - self.create_debug_adaptor() + self.create_debug_adapter() source_map = {"/home/labath/test": current_dir} self.attach(exe_file, coreFile=core_file, sourceMap=source_map) diff --git a/lldb/test/API/tools/lldb-dap/disconnect/TestDAP_disconnect.py b/lldb/test/API/tools/lldb-dap/disconnect/TestDAP_disconnect.py index f9e461adecb15..0cb792d662a80 100644 --- a/lldb/test/API/tools/lldb-dap/disconnect/TestDAP_disconnect.py +++ b/lldb/test/API/tools/lldb-dap/disconnect/TestDAP_disconnect.py @@ -52,7 +52,7 @@ def test_attach(self): before the file is created, and as the process is not terminated upon disconnection, the file is created anyway. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") # Use a file as a synchronization point between test and inferior. diff --git a/lldb/test/API/tools/lldb-dap/extendedStackTrace/TestDAP_extendedStackTrace.py b/lldb/test/API/tools/lldb-dap/extendedStackTrace/TestDAP_extendedStackTrace.py index ba35ea9e0b64e..f6b613da964b8 100644 --- a/lldb/test/API/tools/lldb-dap/extendedStackTrace/TestDAP_extendedStackTrace.py +++ b/lldb/test/API/tools/lldb-dap/extendedStackTrace/TestDAP_extendedStackTrace.py @@ -66,11 +66,11 @@ def test_stackTrace(self): self.assertEqual(len(stackLabels), 2, "expected two label stack frames") self.assertRegex( stackLabels[0][1]["name"], - "Enqueued from com.apple.root.default-qos \(Thread \d\)", + r"Enqueued from com.apple.root.default-qos \(Thread \d\)", ) self.assertRegex( stackLabels[1][1]["name"], - "Enqueued from com.apple.main-thread \(Thread \d\)", + r"Enqueued from com.apple.main-thread \(Thread \d\)", ) for i, frame in stackLabels: diff --git a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py index 7898d01457afc..0c92e5bff07c6 100644 --- a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py +++ b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py @@ -32,7 +32,7 @@ def test_termination(self): Tests the correct termination of lldb-dap upon a 'disconnect' request. """ - self.create_debug_adaptor() + self.create_debug_adapter() # The underlying lldb-dap process must be alive self.assertEqual(self.dap_server.process.poll(), None) @@ -92,7 +92,7 @@ def test_cwd(self): def test_debuggerRoot(self): """ Tests the "debuggerRoot" will change the working directory of - the lldb-dap debug adaptor. + the lldb-dap debug adapter. """ program = self.getBuildArtifact("a.out") program_parent_dir = os.path.realpath(os.path.dirname(os.path.dirname(program))) @@ -376,7 +376,7 @@ def test_extra_launch_commands(self): """ Tests the "launchCommands" with extra launching settings """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") source = "main.c" @@ -440,7 +440,7 @@ def test_failing_launch_commands(self): """ Tests "launchCommands" failures prevents a launch. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") # Run an invalid launch command, in this case a bad path. @@ -483,7 +483,7 @@ def test_terminate_commands(self): Tests that the "terminateCommands", that can be passed during launch, are run when the debugger is disconnected. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") terminateCommands = ["expr 4+2"] diff --git a/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py b/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py index ac96bcc1364a2..9141565ac1b9b 100644 --- a/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py +++ b/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py @@ -118,7 +118,7 @@ def test_runInTerminalWithObjectEnv(self): def test_runInTerminalInvalidTarget(self): if not self.isTestSupported(): return - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() response = self.launch( "INVALIDPROGRAM", runInTerminal=True, @@ -247,4 +247,4 @@ def test_NonAttachedRunInTerminalLauncher(self): self.readPidMessage(comm_file) _, stderr = proc.communicate() - self.assertIn("Timed out trying to get messages from the debug adaptor", stderr) + self.assertIn("Timed out trying to get messages from the debug adapter", stderr) diff --git a/lldb/test/API/tools/lldb-dap/server/TestDAP_server.py b/lldb/test/API/tools/lldb-dap/server/TestDAP_server.py index 1f562e989533a..7a9a4f434e04b 100644 --- a/lldb/test/API/tools/lldb-dap/server/TestDAP_server.py +++ b/lldb/test/API/tools/lldb-dap/server/TestDAP_server.py @@ -15,7 +15,7 @@ class TestDAP_server(lldbdap_testcase.DAPTestCaseBase): def start_server(self, connection): log_file_path = self.getBuildArtifact("dap.txt") - (process, connection) = dap_server.DebugAdaptorServer.launch( + (process, connection) = dap_server.DebugAdapterServer.launch( executable=self.lldbDAPExec, connection=connection, log_file=log_file_path, @@ -29,7 +29,7 @@ def cleanup(): return (process, connection) def run_debug_session(self, connection, name): - self.dap_server = dap_server.DebugAdaptorServer( + self.dap_server = dap_server.DebugAdapterServer( connection=connection, ) program = self.getBuildArtifact("a.out") @@ -83,7 +83,7 @@ def test_server_interrupt(self): """ self.build() (process, connection) = self.start_server(connection="tcp://localhost:0") - self.dap_server = dap_server.DebugAdaptorServer( + self.dap_server = dap_server.DebugAdapterServer( connection=connection, ) program = self.getBuildArtifact("a.out") diff --git a/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py index 580ad38ab51c1..fde66a28382c7 100644 --- a/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py +++ b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py @@ -113,7 +113,7 @@ def darwin_dwarf_missing_obj(self, initCommands): # error when we run to main and try to get variables os.unlink(main_obj) - self.create_debug_adaptor() + self.create_debug_adapter() self.assertTrue(os.path.exists(program), "executable must exist") self.launch(program=program, initCommands=initCommands) diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteModuleInfo.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteModuleInfo.py index 132072547e207..c8055f9b6320c 100644 --- a/lldb/test/API/tools/lldb-server/TestGdbRemoteModuleInfo.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteModuleInfo.py @@ -42,7 +42,7 @@ def test_module_info(self): context = self.expect_gdbremote_sequence() spec = context.get("spec") self.assertRegex(spec, '"file_path":".*"') - self.assertRegex(spec, '"file_offset":\d+') - self.assertRegex(spec, '"file_size":\d+') - self.assertRegex(spec, '"triple":"\w*-\w*-.*"') + self.assertRegex(spec, r'"file_offset":\d+') + self.assertRegex(spec, r'"file_size":\d+') + self.assertRegex(spec, r'"triple":"\w*-\w*-.*"') self.assertRegex(spec, '"uuid":"[A-Fa-f0-9]+"') diff --git a/lldb/test/API/tools/lldb-server/TestPtyServer.py b/lldb/test/API/tools/lldb-server/TestPtyServer.py index 345f68f6d87d3..c3cb1706cffd1 100644 --- a/lldb/test/API/tools/lldb-server/TestPtyServer.py +++ b/lldb/test/API/tools/lldb-server/TestPtyServer.py @@ -65,7 +65,7 @@ def test_pty_server(self): "read packet: $qXfer:features:read:target.xml:0,200000#00", { "direction": "send", - "regex": re.compile("^\$l(.+)#[0-9a-fA-F]{2}$", flags=re.DOTALL), + "regex": re.compile(r"^\$l(.+)#[0-9a-fA-F]{2}$", flags=re.DOTALL), "capture": {1: "target_xml"}, }, ], diff --git a/lldb/test/API/tools/lldb-server/registers-target-xml-reading/TestGdbRemoteTargetXmlPacket.py b/lldb/test/API/tools/lldb-server/registers-target-xml-reading/TestGdbRemoteTargetXmlPacket.py index bd78a83c65655..c1a92eeb5d5d1 100644 --- a/lldb/test/API/tools/lldb-server/registers-target-xml-reading/TestGdbRemoteTargetXmlPacket.py +++ b/lldb/test/API/tools/lldb-server/registers-target-xml-reading/TestGdbRemoteTargetXmlPacket.py @@ -24,7 +24,7 @@ def test_g_target_xml_returns_correct_data(self): ), { "direction": "send", - "regex": re.compile("^\$l(.+)#[0-9a-fA-F]{2}$", flags=re.DOTALL), + "regex": re.compile(r"^\$l(.+)#[0-9a-fA-F]{2}$", flags=re.DOTALL), "capture": {1: "target_xml"}, }, ], diff --git a/lldb/test/API/types/AbstractBase.py b/lldb/test/API/types/AbstractBase.py index 043a5510c62f2..fb1e25254b281 100644 --- a/lldb/test/API/types/AbstractBase.py +++ b/lldb/test/API/types/AbstractBase.py @@ -20,7 +20,7 @@ def Msg(var, val, using_frame_variable): class GenericTester(TestBase): # This is the pattern by design to match the " var = 'value'" output from # printf() stmts (see basic_type.cpp). - pattern = re.compile(" (\*?a[^=]*) = '([^=]*)'$") + pattern = re.compile(r" (\*?a[^=]*) = '([^=]*)'$") # Assert message. DATA_TYPE_GROKKED = "Data type from expr parser output is parsed correctly" @@ -205,7 +205,7 @@ def generic_type_tester( # output: (char) a_array_bounded[0] = 'a' # try: - dt = re.match("^\((.*)\)", output).group(1) + dt = re.match(r"^\((.*)\)", output).group(1) except: self.fail(self.DATA_TYPE_GROKKED) @@ -284,7 +284,7 @@ def generic_type_expr_tester( # output: (double) $0 = 1100.12 # try: - dt = re.match("^\((.*)\) \$[0-9]+ = ", output).group(1) + dt = re.match(r"^\((.*)\) \$[0-9]+ = ", output).group(1) except: self.fail(self.DATA_TYPE_GROKKED) diff --git a/lldb/tools/lldb-dap/DAP.cpp b/lldb/tools/lldb-dap/DAP.cpp index cd53e2aca3fb6..53c514b790f38 100644 --- a/lldb/tools/lldb-dap/DAP.cpp +++ b/lldb/tools/lldb-dap/DAP.cpp @@ -64,7 +64,7 @@ namespace lldb_dap { DAP::DAP(std::string name, llvm::StringRef path, std::ofstream *log, lldb::IOObjectSP input, lldb::IOObjectSP output, ReplMode repl_mode, std::vector pre_init_commands) - : name(std::move(name)), debug_adaptor_path(path), log(log), + : name(std::move(name)), debug_adapter_path(path), log(log), input(std::move(input)), output(std::move(output)), broadcaster("lldb-dap"), exception_breakpoints(), pre_init_commands(std::move(pre_init_commands)), diff --git a/lldb/tools/lldb-dap/DAP.h b/lldb/tools/lldb-dap/DAP.h index a7c7e5d9bbc19..8b2e498a28c95 100644 --- a/lldb/tools/lldb-dap/DAP.h +++ b/lldb/tools/lldb-dap/DAP.h @@ -146,7 +146,7 @@ struct SendEventRequestHandler : public lldb::SBCommandPluginInterface { struct DAP { std::string name; - llvm::StringRef debug_adaptor_path; + llvm::StringRef debug_adapter_path; std::ofstream *log; InputStream input; OutputStream output; diff --git a/lldb/tools/lldb-dap/Handler/RequestHandler.cpp b/lldb/tools/lldb-dap/Handler/RequestHandler.cpp index 0a32e39ea3aff..606ada90ce2e5 100644 --- a/lldb/tools/lldb-dap/Handler/RequestHandler.cpp +++ b/lldb/tools/lldb-dap/Handler/RequestHandler.cpp @@ -100,7 +100,7 @@ static llvm::Error RunInTerminal(DAP &dap, debugger_pid = getpid(); #endif llvm::json::Object reverse_request = CreateRunInTerminalReverseRequest( - launch_request, dap.debug_adaptor_path, comm_file.m_path, debugger_pid); + launch_request, dap.debug_adapter_path, comm_file.m_path, debugger_pid); dap.SendReverseRequest("runInTerminal", std::move(reverse_request)); diff --git a/lldb/tools/lldb-dap/JSONUtils.cpp b/lldb/tools/lldb-dap/JSONUtils.cpp index 9f08efb2a3ac1..9dec4ca1df49a 100644 --- a/lldb/tools/lldb-dap/JSONUtils.cpp +++ b/lldb/tools/lldb-dap/JSONUtils.cpp @@ -1436,7 +1436,7 @@ llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit &unit) { /// https://microsoft.github.io/debug-adapter-protocol/specification#Reverse_Requests_RunInTerminal llvm::json::Object CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request, - llvm::StringRef debug_adaptor_path, + llvm::StringRef debug_adapter_path, llvm::StringRef comm_file, lldb::pid_t debugger_pid) { llvm::json::Object run_in_terminal_args; @@ -1446,7 +1446,7 @@ CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request, const auto *launch_request_arguments = launch_request.getObject("arguments"); // The program path must be the first entry in the "args" field - std::vector args = {debug_adaptor_path.str(), "--comm-file", + std::vector args = {debug_adapter_path.str(), "--comm-file", comm_file.str()}; if (debugger_pid != LLDB_INVALID_PROCESS_ID) { args.push_back("--debugger-pid"); diff --git a/lldb/tools/lldb-dap/JSONUtils.h b/lldb/tools/lldb-dap/JSONUtils.h index db56d98777347..55d2360e0a224 100644 --- a/lldb/tools/lldb-dap/JSONUtils.h +++ b/lldb/tools/lldb-dap/JSONUtils.h @@ -233,7 +233,7 @@ void AppendBreakpoint( std::optional request_path = std::nullopt, std::optional request_line = std::nullopt); -/// Converts breakpoint location to a debug adaptor protocol "Breakpoint". +/// Converts breakpoint location to a debug adapter protocol "Breakpoint". /// /// \param[in] bp /// A LLDB breakpoint object to convert into a JSON value @@ -290,7 +290,7 @@ llvm::json::Value CreateModule(lldb::SBTarget &target, lldb::SBModule &module); llvm::json::Object CreateEventObject(const llvm::StringRef event_name); /// Create a "ExceptionBreakpointsFilter" JSON object as described in -/// the debug adaptor definition. +/// the debug adapter definition. /// /// \param[in] bp /// The exception breakpoint object to use @@ -301,7 +301,7 @@ llvm::json::Object CreateEventObject(const llvm::StringRef event_name); llvm::json::Value CreateExceptionBreakpointFilter(const ExceptionBreakpoint &bp); -/// Create a "Scope" JSON object as described in the debug adaptor definition. +/// Create a "Scope" JSON object as described in the debug adapter definition. /// /// \param[in] name /// The value to place into the "name" key @@ -322,7 +322,7 @@ llvm::json::Value CreateScope(const llvm::StringRef name, int64_t variablesReference, int64_t namedVariables, bool expensive); -/// Create a "Source" JSON object as described in the debug adaptor definition. +/// Create a "Source" JSON object as described in the debug adapter definition. /// /// \param[in] file /// The SBFileSpec to use when populating out the "Source" object @@ -332,7 +332,7 @@ llvm::json::Value CreateScope(const llvm::StringRef name, /// definition outlined by Microsoft. llvm::json::Value CreateSource(const lldb::SBFileSpec &file); -/// Create a "Source" JSON object as described in the debug adaptor definition. +/// Create a "Source" JSON object as described in the debug adapter definition. /// /// \param[in] line_entry /// The LLDB line table to use when populating out the "Source" @@ -573,8 +573,8 @@ llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit &unit); /// The original launch_request object whose fields are used to construct /// the reverse request object. /// -/// \param[in] debug_adaptor_path -/// Path to the current debug adaptor. It will be used to delegate the +/// \param[in] debug_adapter_path +/// Path to the current debug adapter. It will be used to delegate the /// launch of the target. /// /// \param[in] comm_file @@ -590,7 +590,7 @@ llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit &unit); /// Microsoft. llvm::json::Object CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request, - llvm::StringRef debug_adaptor_path, + llvm::StringRef debug_adapter_path, llvm::StringRef comm_file, lldb::pid_t debugger_pid); diff --git a/lldb/tools/lldb-dap/Options.td b/lldb/tools/lldb-dap/Options.td index 97a6ec118c47b..a1baf2f0370bd 100644 --- a/lldb/tools/lldb-dap/Options.td +++ b/lldb/tools/lldb-dap/Options.td @@ -33,7 +33,7 @@ def launch_target: S<"launch-target">, def comm_file: S<"comm-file">, MetaVarName<"">, - HelpText<"The fifo file used to communicate the with the debug adaptor " + HelpText<"The fifo file used to communicate the with the debug adapter " "when using --launch-target.">; def debugger_pid: S<"debugger-pid">, diff --git a/lldb/tools/lldb-dap/RunInTerminal.cpp b/lldb/tools/lldb-dap/RunInTerminal.cpp index 4fe09e2885a8e..9f309dd78221a 100644 --- a/lldb/tools/lldb-dap/RunInTerminal.cpp +++ b/lldb/tools/lldb-dap/RunInTerminal.cpp @@ -97,9 +97,9 @@ static Error ToError(const RunInTerminalMessage &message) { RunInTerminalLauncherCommChannel::RunInTerminalLauncherCommChannel( StringRef comm_file) - : m_io(comm_file, "debug adaptor") {} + : m_io(comm_file, "debug adapter") {} -Error RunInTerminalLauncherCommChannel::WaitUntilDebugAdaptorAttaches( +Error RunInTerminalLauncherCommChannel::WaitUntilDebugAdapterAttaches( std::chrono::milliseconds timeout) { if (Expected message = GetNextMessage(m_io, timeout)) { diff --git a/lldb/tools/lldb-dap/RunInTerminal.h b/lldb/tools/lldb-dap/RunInTerminal.h index b20f8beb6071d..457850c8ea538 100644 --- a/lldb/tools/lldb-dap/RunInTerminal.h +++ b/lldb/tools/lldb-dap/RunInTerminal.h @@ -72,7 +72,7 @@ class RunInTerminalLauncherCommChannel { public: RunInTerminalLauncherCommChannel(llvm::StringRef comm_file); - /// Wait until the debug adaptor attaches. + /// Wait until the debug adapter attaches. /// /// \param[in] timeout /// How long to wait to be attached. @@ -80,16 +80,16 @@ class RunInTerminalLauncherCommChannel { /// \return /// An \a llvm::Error object in case of errors or if this operation times /// out. - llvm::Error WaitUntilDebugAdaptorAttaches(std::chrono::milliseconds timeout); + llvm::Error WaitUntilDebugAdapterAttaches(std::chrono::milliseconds timeout); - /// Notify the debug adaptor this process' pid. + /// Notify the debug adapter this process' pid. /// /// \return /// An \a llvm::Error object in case of errors or if this operation times /// out. llvm::Error NotifyPid(); - /// Notify the debug adaptor that there's been an error. + /// Notify the debug adapter that there's been an error. void NotifyError(llvm::StringRef error); private: @@ -122,7 +122,7 @@ class RunInTerminalDebugAdapterCommChannel { FifoFileIO m_io; }; -/// Create a fifo file used to communicate the debug adaptor with +/// Create a fifo file used to communicate the debug adapter with /// the runInTerminal launcher. llvm::Expected> CreateRunInTerminalCommFile(); diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp index 6dff960daede9..d005eccfae903 100644 --- a/lldb/tools/lldb-dap/lldb-dap.cpp +++ b/lldb/tools/lldb-dap/lldb-dap.cpp @@ -175,22 +175,22 @@ static void PrintHelp(LLDBDAPOptTable &table, llvm::StringRef tool_name) { // If --launch-target is provided, this instance of lldb-dap becomes a // runInTerminal launcher. It will ultimately launch the program specified in // the --launch-target argument, which is the original program the user wanted -// to debug. This is done in such a way that the actual debug adaptor can +// to debug. This is done in such a way that the actual debug adapter can // place breakpoints at the beginning of the program. // -// The launcher will communicate with the debug adaptor using a fifo file in the +// The launcher will communicate with the debug adapter using a fifo file in the // directory specified in the --comm-file argument. // -// Regarding the actual flow, this launcher will first notify the debug adaptor +// Regarding the actual flow, this launcher will first notify the debug adapter // of its pid. Then, the launcher will be in a pending state waiting to be -// attached by the adaptor. +// attached by the adapter. // // Once attached and resumed, the launcher will exec and become the program // specified by --launch-target, which is the original target the // user wanted to run. // // In case of errors launching the target, a suitable error message will be -// emitted to the debug adaptor. +// emitted to the debug adapter. static llvm::Error LaunchRunInTerminalTarget(llvm::opt::Arg &target_arg, llvm::StringRef comm_file, lldb::pid_t debugger_pid, @@ -219,7 +219,7 @@ static llvm::Error LaunchRunInTerminalTarget(llvm::opt::Arg &target_arg, const char *timeout_env_var = getenv("LLDB_DAP_RIT_TIMEOUT_IN_MS"); int timeout_in_ms = timeout_env_var != nullptr ? atoi(timeout_env_var) : 20000; - if (llvm::Error err = comm_channel.WaitUntilDebugAdaptorAttaches( + if (llvm::Error err = comm_channel.WaitUntilDebugAdapterAttaches( std::chrono::milliseconds(timeout_in_ms))) { return err; } diff --git a/lldb/tools/lldb-dap/package.json b/lldb/tools/lldb-dap/package.json index 31d808eda4c35..cd450a614b3f7 100644 --- a/lldb/tools/lldb-dap/package.json +++ b/lldb/tools/lldb-dap/package.json @@ -88,6 +88,12 @@ "additionalProperties": { "type": "string" } + }, + "lldb-dap.serverMode": { + "scope": "resource", + "type": "boolean", + "markdownDescription": "Run lldb-dap in server mode.\n\nWhen enabled, lldb-dap will start a background server that will be reused between debug sessions. This allows caching of debug symbols between sessions and improves launch performance.", + "default": false } } }, @@ -543,4 +549,4 @@ } ] } -} +} \ No newline at end of file diff --git a/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts b/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts index 36107336ebc4d..1f76fe31b00ad 100644 --- a/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts +++ b/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts @@ -4,6 +4,8 @@ import * as vscode from "vscode"; import * as child_process from "child_process"; import * as fs from "node:fs/promises"; +const exec = util.promisify(child_process.execFile); + export async function isExecutable(path: string): Promise { try { await fs.access(path, fs.constants.X_OK); @@ -16,7 +18,6 @@ export async function isExecutable(path: string): Promise { async function findWithXcrun(executable: string): Promise { if (process.platform === "darwin") { try { - const exec = util.promisify(child_process.execFile); let { stdout, stderr } = await exec("/usr/bin/xcrun", [ "-find", executable, @@ -24,7 +25,7 @@ async function findWithXcrun(executable: string): Promise { if (stdout) { return stdout.toString().trimEnd(); } - } catch (error) {} + } catch (error) { } } return undefined; } @@ -97,8 +98,15 @@ async function getDAPExecutable( * depending on the session configuration. */ export class LLDBDapDescriptorFactory - implements vscode.DebugAdapterDescriptorFactory -{ + implements vscode.DebugAdapterDescriptorFactory, vscode.Disposable { + private server?: Promise<{ process: child_process.ChildProcess, host: string, port: number }>; + + dispose() { + this.server?.then(({ process }) => { + process.kill(); + }); + } + async createDebugAdapterDescriptor( session: vscode.DebugSession, executable: vscode.DebugAdapterExecutable | undefined, @@ -115,7 +123,18 @@ export class LLDBDapDescriptorFactory } const configEnvironment = config.get<{ [key: string]: string }>("environment") || {}; - const dapPath = await getDAPExecutable(session); + const dapPath = (await getDAPExecutable(session)) ?? executable?.command; + + if (!dapPath) { + LLDBDapDescriptorFactory.showLLDBDapNotFoundMessage(); + return undefined; + } + + if (!(await isExecutable(dapPath))) { + LLDBDapDescriptorFactory.showLLDBDapNotFoundMessage(dapPath); + return; + } + const dbgOptions = { env: { ...executable?.options?.env, @@ -123,33 +142,52 @@ export class LLDBDapDescriptorFactory ...env, }, }; - if (dapPath) { - if (!(await isExecutable(dapPath))) { - LLDBDapDescriptorFactory.showLLDBDapNotFoundMessage(dapPath); - return undefined; - } - return new vscode.DebugAdapterExecutable(dapPath, [], dbgOptions); - } else if (executable) { - if (!(await isExecutable(executable.command))) { - LLDBDapDescriptorFactory.showLLDBDapNotFoundMessage(executable.command); - return undefined; - } - return new vscode.DebugAdapterExecutable( - executable.command, - executable.args, - dbgOptions, - ); + const dbgArgs = executable?.args ?? []; + + const serverMode = config.get('serverMode', false); + if (serverMode) { + const { host, port } = await this.startServer(dapPath, dbgArgs, dbgOptions); + return new vscode.DebugAdapterServer(port, host); } - return undefined; + + return new vscode.DebugAdapterExecutable(dapPath, dbgArgs, dbgOptions); + } + + startServer(dapPath: string, args: string[], options: child_process.CommonSpawnOptions): Promise<{ host: string, port: number }> { + if (this.server) return this.server; + + this.server = new Promise(resolve => { + args.push( + '--connection', + 'connect://localhost:0' + ); + const server = child_process.spawn(dapPath, args, options); + server.stdout!.setEncoding('utf8').once('data', (data: string) => { + const connection = /connection:\/\/\[([^\]]+)\]:(\d+)/.exec(data); + if (connection) { + const host = connection[1]; + const port = Number(connection[2]); + resolve({ process: server, host, port }); + } + }); + server.on('exit', () => { + this.server = undefined; + }) + }); + return this.server; } /** * Shows a message box when the debug adapter's path is not found */ - static async showLLDBDapNotFoundMessage(path: string) { + static async showLLDBDapNotFoundMessage(path?: string) { + const message = + path + ? `Debug adapter path: ${path} is not a valid file.` + : "Unable to find the path to the LLDB debug adapter executable."; const openSettingsAction = "Open Settings"; const callbackValue = await vscode.window.showErrorMessage( - `Debug adapter path: ${path} is not a valid file`, + message, openSettingsAction, ); diff --git a/lldb/tools/lldb-dap/src-ts/extension.ts b/lldb/tools/lldb-dap/src-ts/extension.ts index 71fd48298f8f5..a07bcdebcb68b 100644 --- a/lldb/tools/lldb-dap/src-ts/extension.ts +++ b/lldb/tools/lldb-dap/src-ts/extension.ts @@ -1,5 +1,3 @@ -import * as path from "path"; -import * as util from "util"; import * as vscode from "vscode"; import { @@ -15,13 +13,14 @@ import { DisposableContext } from "./disposable-context"; export class LLDBDapExtension extends DisposableContext { constructor() { super(); + const factory = new LLDBDapDescriptorFactory(); + this.pushSubscription(factory); this.pushSubscription( vscode.debug.registerDebugAdapterDescriptorFactory( "lldb-dap", - new LLDBDapDescriptorFactory(), - ), + factory, + ) ); - this.pushSubscription( vscode.workspace.onDidChangeConfiguration(async (event) => { if (event.affectsConfiguration("lldb-dap.executable-path")) { diff --git a/lldb/utils/lui/sourcewin.py b/lldb/utils/lui/sourcewin.py index efe3c01f3ebd1..a72dad5985ecd 100644 --- a/lldb/utils/lui/sourcewin.py +++ b/lldb/utils/lui/sourcewin.py @@ -210,7 +210,7 @@ def getLocations(event): # inlined frames, so we get the description (which does take # into account inlined functions) and parse it. desc = lldbutil.get_description(location, lldb.eDescriptionLevelFull) - match = re.search("at\ ([^:]+):([\d]+)", desc) + match = re.search(r"at\ ([^:]+):([\d]+)", desc) try: path = match.group(1) line = int(match.group(2).strip()) diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index f21594c557e0e..4390b45f1f730 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -203,6 +203,15 @@ bool getShuffleDemandedElts(int SrcWidth, ArrayRef Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts = false); +/// Does this shuffle mask represent either one slide shuffle or a pair of +/// two slide shuffles, combined with a select on some constant vector mask? +/// A slide is a shuffle mask which shifts some set of elements up or down +/// the vector, with all other elements being undefined. An identity shuffle +/// will be matched a slide by 0. The output parameter provides the source +/// (-1 means no source), and slide direction for each slide. +bool isMaskedSlidePair(ArrayRef Mask, int NumElts, + std::array, 2> &SrcInfo); + /// Replace each shuffle mask index with the scaled sequential indices for an /// equivalent mask of narrowed elements. Mask elements that are less than 0 /// (sentinel values) are repeated in the output mask. diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index d679409770ca1..563953516a354 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2056,12 +2056,33 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { } case Intrinsic::experimental_vector_match: return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind); - case Intrinsic::sincos: { + case Intrinsic::modf: + case Intrinsic::sincos: + case Intrinsic::sincospi: { Type *Ty = getContainedTypes(RetTy).front(); EVT VT = getTLI()->getValueType(DL, Ty); - RTLIB::Libcall LC = RTLIB::getSINCOS(VT.getScalarType()); - if (auto Cost = - getMultipleResultIntrinsicVectorLibCallCost(ICA, CostKind, LC)) + + RTLIB::Libcall LC = [&] { + switch (ICA.getID()) { + case Intrinsic::modf: + return RTLIB::getMODF; + case Intrinsic::sincos: + return RTLIB::getSINCOS; + case Intrinsic::sincospi: + return RTLIB::getSINCOSPI; + default: + llvm_unreachable("unexpected intrinsic"); + } + }()(VT.getScalarType()); + + std::optional CallRetElementIndex; + // The first element of the modf result is returned by value in the + // libcall. + if (ICA.getID() == Intrinsic::modf) + CallRetElementIndex = 0; + + if (auto Cost = getMultipleResultIntrinsicVectorLibCallCost( + ICA, CostKind, LC, CallRetElementIndex)) return *Cost; // Otherwise, fallback to default scalarization cost. break; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index cecb4094c9a57..fbbd0d3d74b15 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -1580,18 +1580,36 @@ class ExecutionSession { return EPC->getBootstrapSymbols(Pairs); } - /// Run a wrapper function in the executor. + /// Run a wrapper function in the executor. The given WFRHandler will be + /// called on the result when it is returned. /// /// The wrapper function should be callable as: /// /// \code{.cpp} /// CWrapperFunctionResult fn(uint8_t *Data, uint64_t Size); /// \endcode{.cpp} - /// - /// The given OnComplete function will be called to return the result. - template - void callWrapperAsync(ArgTs &&... Args) { - EPC->callWrapperAsync(std::forward(Args)...); + void callWrapperAsync(ExecutorAddr WrapperFnAddr, + ExecutorProcessControl::IncomingWFRHandler OnComplete, + ArrayRef ArgBuffer) { + EPC->callWrapperAsync(WrapperFnAddr, std::move(OnComplete), ArgBuffer); + } + + /// Run a wrapper function in the executor using the given Runner to dispatch + /// OnComplete when the result is ready. + template + void callWrapperAsync(RunPolicyT &&Runner, ExecutorAddr WrapperFnAddr, + FnT &&OnComplete, ArrayRef ArgBuffer) { + EPC->callWrapperAsync(std::forward(Runner), WrapperFnAddr, + std::forward(OnComplete), ArgBuffer); + } + + /// Run a wrapper function in the executor. OnComplete will be dispatched + /// as a GenericNamedTask using this instance's TaskDispatch object. + template + void callWrapperAsync(ExecutorAddr WrapperFnAddr, FnT &&OnComplete, + ArrayRef ArgBuffer) { + EPC->callWrapperAsync(WrapperFnAddr, std::forward(OnComplete), + ArgBuffer); } /// Run a wrapper function in the executor. The wrapper function should be diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPContext.h b/llvm/include/llvm/Frontend/OpenMP/OMPContext.h index a501eaf2356ff..26163fdb4b63d 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPContext.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPContext.h @@ -188,7 +188,7 @@ bool isVariantApplicableInContext(const VariantMatchInfo &VMI, bool DeviceSetOnly = false); /// Return the index (into \p VMIs) of the variant with the highest score -/// from the ones applicble in \p Ctx. See llvm::isVariantApplicableInContext. +/// from the ones applicable in \p Ctx. See llvm::isVariantApplicableInContext. int getBestVariantMatchForContext(const SmallVectorImpl &VMIs, const OMPContext &Ctx); diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 876a6f816ad3f..3118ded81d4c9 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2529,13 +2529,13 @@ def int_amdgcn_set_inactive_chain_arg : // Return if the given flat pointer points to a local memory address. def int_amdgcn_is_shared : ClangBuiltin<"__builtin_amdgcn_is_shared">, DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], - [IntrNoMem, IntrSpeculatable, NoCapture>] + [IntrNoMem, IntrSpeculatable] // FIXME: This should be captures(ret: address) >; // Return if the given flat pointer points to a prvate memory address. def int_amdgcn_is_private : ClangBuiltin<"__builtin_amdgcn_is_private">, DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], - [IntrNoMem, IntrSpeculatable, NoCapture>] + [IntrNoMem, IntrSpeculatable] // FIXME: This should be captures(ret: address) >; // A uniform tail call to a function with the `amdgpu_cs_chain` or diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h index 3aa1d7864fcb7..57a6db6c4e5aa 100644 --- a/llvm/include/llvm/Object/ELF.h +++ b/llvm/include/llvm/Object/ELF.h @@ -41,10 +41,10 @@ struct VerdAux { struct VerDef { unsigned Offset; - unsigned Version; - unsigned Flags; - unsigned Ndx; - unsigned Cnt; + uint16_t Version; + uint16_t Flags; + uint16_t Ndx; + uint16_t Cnt; unsigned Hash; std::string Name; std::vector AuxV; @@ -1057,8 +1057,8 @@ ELFFile::getVersionDefinitions(const Elf_Shdr &Sec) const { VerdAux Aux; Aux.Offset = VerdauxBuf - Start; - if (Verdaux->vda_name <= StrTabOrErr->size()) - Aux.Name = std::string(StrTabOrErr->drop_front(Verdaux->vda_name)); + if (Verdaux->vda_name < StrTabOrErr->size()) + Aux.Name = std::string(StrTabOrErr->drop_front(Verdaux->vda_name).data()); else Aux.Name = ("vda_name) + ">").str(); return Aux; diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 995ee54a73ce4..05951f87b5062 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1208,7 +1208,8 @@ struct InformationCache { TargetTriple(M.getTargetTriple()) { if (UseExplorer) Explorer = new (Allocator) MustBeExecutedContextExplorer( - /* ExploreInterBlock */ true, /* ExploreCFGForward */ true, + /* ExploreInterBlock */ + true, /* ExploreCFGForward */ true, /* ExploreCFGBackward */ true, /* LIGetter */ [&](const Function &F) { return AG.getAnalysis(F); }, diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h index 7d11fc0ad6938..84292c716a0a9 100644 --- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h +++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h @@ -94,6 +94,10 @@ class JumpThreadingPass : public PassInfoMixin { SmallPtrSet LoopHeaders; #endif + // JumpThreading must not processes blocks unreachable from entry. It's a + // waste of compute time and can potentially lead to hangs. + SmallPtrSet Unreachable; + unsigned BBDupThreshold; unsigned DefaultBBDupThreshold; diff --git a/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h b/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h index f789c3af75c68..810fef29f4010 100644 --- a/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h +++ b/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h @@ -110,7 +110,9 @@ struct ControlFlowHub { Branches.emplace_back(BB, Succ0, Succ1); } - BasicBlock * + /// Return the unified loop exit block and a flag indicating if the CFG was + /// changed at all. + std::pair finalize(DomTreeUpdater *DTU, SmallVectorImpl &GuardBlocks, const StringRef Prefix, std::optional MaxControlFlowBooleans = std::nullopt); diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.h index 7ea9386f08bee..fea53329719b9 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.h @@ -37,6 +37,9 @@ class SandboxVectorizerPass : public PassInfoMixin { // within FPM may register/unregister callbacks, so they need access to // Context. sandboxir::FunctionPassManager FPM; + /// \Returns true if we should attempt to vectorize \p SrcFilePath based on + /// `AllowFiles` option. + bool allowFile(const std::string &SrcFilePath); bool runImpl(Function &F); diff --git a/llvm/lib/Analysis/CaptureTracking.cpp b/llvm/lib/Analysis/CaptureTracking.cpp index 6e5748c233240..98f68d322287f 100644 --- a/llvm/lib/Analysis/CaptureTracking.cpp +++ b/llvm/lib/Analysis/CaptureTracking.cpp @@ -279,9 +279,9 @@ UseCaptureInfo llvm::DetermineUseCaptureKind( case Instruction::Invoke: { auto *Call = cast(I); // Not captured if the callee is readonly, doesn't return a copy through - // its return value and doesn't unwind (a readonly function can leak bits - // by throwing an exception or not depending on the input value). - if (Call->onlyReadsMemory() && Call->doesNotThrow() && + // its return value and doesn't unwind or diverge (a readonly function can + // leak bits by throwing an exception or not depending on the input value). + if (Call->onlyReadsMemory() && Call->doesNotThrow() && Call->willReturn() && Call->getType()->isVoidTy()) return CaptureComponents::None; diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index dcfd3d5a8bd6e..53150684b4e4a 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -73,6 +73,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::sin: case Intrinsic::cos: case Intrinsic::sincos: + case Intrinsic::sincospi: case Intrinsic::tan: case Intrinsic::sinh: case Intrinsic::cosh: @@ -88,6 +89,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::maxnum: case Intrinsic::minimum: case Intrinsic::maximum: + case Intrinsic::modf: case Intrinsic::copysign: case Intrinsic::floor: case Intrinsic::ceil: @@ -186,7 +188,9 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg( case Intrinsic::ucmp: case Intrinsic::scmp: return OpdIdx == -1 || OpdIdx == 0; + case Intrinsic::modf: case Intrinsic::sincos: + case Intrinsic::sincospi: case Intrinsic::is_fpclass: case Intrinsic::vp_is_fpclass: return OpdIdx == 0; @@ -415,6 +419,36 @@ bool llvm::getShuffleDemandedElts(int SrcWidth, ArrayRef Mask, return true; } +bool llvm::isMaskedSlidePair(ArrayRef Mask, int NumElts, + std::array, 2> &SrcInfo) { + const int SignalValue = NumElts * 2; + SrcInfo[0] = {-1, SignalValue}; + SrcInfo[1] = {-1, SignalValue}; + for (auto [i, M] : enumerate(Mask)) { + if (M < 0) + continue; + int Src = M >= (int)NumElts; + int Diff = (int)i - (M % NumElts); + bool Match = false; + for (int j = 0; j < 2; j++) { + auto &[SrcE, DiffE] = SrcInfo[j]; + if (SrcE == -1) { + assert(DiffE == SignalValue); + SrcE = Src; + DiffE = Diff; + } + if (SrcE == Src && DiffE == Diff) { + Match = true; + break; + } + } + if (!Match) + return false; + } + // Avoid all undef masks + return SrcInfo[0].first != -1; +} + void llvm::narrowShuffleMaskElts(int Scale, ArrayRef Mask, SmallVectorImpl &ScaledMask) { assert(Scale > 0 && "Unexpected scaling factor"); diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp index dbc724629d3be..8d91e7119d0ba 100644 --- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp +++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp @@ -600,12 +600,12 @@ class MemLocFragmentFill { break; } - auto CurrentLiveInEntry = LiveIn.find(&BB); // If there's no LiveIn entry for the block yet, add it. - if (CurrentLiveInEntry == LiveIn.end()) { + auto [CurrentLiveInEntry, Inserted] = LiveIn.try_emplace(&BB); + if (Inserted) { LLVM_DEBUG(dbgs() << "change=true (first) on meet on " << BB.getName() << "\n"); - LiveIn[&BB] = std::move(BBLiveIn); + CurrentLiveInEntry->second = std::move(BBLiveIn); return /*Changed=*/true; } diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 5860a76c66bff..471666568e79a 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -63,6 +63,10 @@ using namespace llvm; +static cl::opt + PrintMIAddrs("print-mi-addrs", cl::Hidden, + cl::desc("Print addresses of MachineInstrs when dumping")); + static const MachineFunction *getMFIfAvailable(const MachineInstr &MI) { if (const MachineBasicBlock *MBB = MI.getParent()) if (const MachineFunction *MF = MBB->getParent()) @@ -2076,6 +2080,9 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, } // TODO: DBG_LABEL + if (PrintMIAddrs) + OS << " ; " << this; + if (AddNewLine) OS << '\n'; } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f56097fdbb51a..2b8818482a333 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2913,7 +2913,9 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) { } SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) { - MVT VecVT = Node->getOperand(1).getSimpleValueType(); + bool IsVPOpcode = ISD::isVPOpcode(Node->getOpcode()); + MVT VecVT = IsVPOpcode ? Node->getOperand(1).getSimpleValueType() + : Node->getOperand(0).getSimpleValueType(); MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT); MVT ScalarVT = Node->getSimpleValueType(0); MVT NewScalarVT = NewVecVT.getVectorElementType(); @@ -2921,16 +2923,13 @@ SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) { SDLoc DL(Node); SmallVector Operands(Node->getNumOperands()); - // promote the initial value. // FIXME: Support integer. assert(Node->getOperand(0).getValueType().isFloatingPoint() && "Only FP promotion is supported"); - Operands[0] = - DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(0)); - for (unsigned j = 1; j != Node->getNumOperands(); ++j) + for (unsigned j = 0; j != Node->getNumOperands(); ++j) if (Node->getOperand(j).getValueType().isVector() && - !(ISD::isVPOpcode(Node->getOpcode()) && + !(IsVPOpcode && ISD::getVPMaskIdx(Node->getOpcode()) == j)) { // Skip mask operand. // promote the vector operand. // FIXME: Support integer. @@ -2938,6 +2937,10 @@ SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) { "Only FP promotion is supported"); Operands[j] = DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j)); + } else if (Node->getOperand(j).getValueType().isFloatingPoint()) { + // promote the initial value. + Operands[j] = + DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(j)); } else { Operands[j] = Node->getOperand(j); // Skip VL operand. } @@ -5049,7 +5052,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Node->getOpcode() == ISD::SINT_TO_FP || Node->getOpcode() == ISD::SETCC || Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT || - Node->getOpcode() == ISD::INSERT_VECTOR_ELT) { + Node->getOpcode() == ISD::INSERT_VECTOR_ELT || + Node->getOpcode() == ISD::VECREDUCE_FMAX || + Node->getOpcode() == ISD::VECREDUCE_FMIN || + Node->getOpcode() == ISD::VECREDUCE_FMAXIMUM || + Node->getOpcode() == ISD::VECREDUCE_FMINIMUM) { OVT = Node->getOperand(0).getSimpleValueType(); } if (Node->getOpcode() == ISD::ATOMIC_STORE || @@ -5796,6 +5803,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); break; } + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + case ISD::VECREDUCE_FMAXIMUM: + case ISD::VECREDUCE_FMINIMUM: case ISD::VP_REDUCE_FMAX: case ISD::VP_REDUCE_FMIN: case ISD::VP_REDUCE_FMAXIMUM: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index de4447fb0cf1a..27bde7b96c857 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -503,13 +503,19 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::VECREDUCE_UMIN: case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: + case ISD::VECTOR_FIND_LAST_ACTIVE: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType()); + break; case ISD::VECREDUCE_FMAX: case ISD::VECREDUCE_FMIN: case ISD::VECREDUCE_FMAXIMUM: case ISD::VECREDUCE_FMINIMUM: - case ISD::VECTOR_FIND_LAST_ACTIVE: Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); + // Defer non-vector results to LegalizeDAG. + if (Action == TargetLowering::Promote) + Action = TargetLowering::Legal; break; case ISD::VECREDUCE_SEQ_FADD: case ISD::VECREDUCE_SEQ_FMUL: diff --git a/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp index 8067e2a173a00..8ce7e74d67cde 100644 --- a/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp @@ -324,11 +324,18 @@ Error lowerPointer64AuthEdgesToSigningFunction(LinkGraph &G) { uint64_t EncodedInfo = E.getAddend(); int32_t RealAddend = (uint32_t)(EncodedInfo & 0xffffffff); + auto ValueToSign = E.getTarget().getAddress() + RealAddend; + if (!ValueToSign) { + LLVM_DEBUG(dbgs() << " " << B->getFixupAddress(E) << " <- null\n"); + E.setAddend(RealAddend); + E.setKind(aarch64::Pointer64); + continue; + } + uint32_t InitialDiscriminator = (EncodedInfo >> 32) & 0xffff; bool AddressDiversify = (EncodedInfo >> 48) & 0x1; uint32_t Key = (EncodedInfo >> 49) & 0x3; uint32_t HighBits = EncodedInfo >> 51; - auto ValueToSign = E.getTarget().getAddress() + RealAddend; if (HighBits != 0x1000) return make_error( diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp index 944fca000d61f..1989d8ca101e1 100644 --- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp @@ -98,9 +98,9 @@ void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI, std::lock_guard Lock(Mutex); // This is the maximum range whose permission have been possibly modified - Allocations[MinAddr].Size = MaxAddr - MinAddr; - Allocations[MinAddr].DeinitializationActions = - std::move(*DeinitializeActions); + auto &Alloc = Allocations[MinAddr]; + Alloc.Size = MaxAddr - MinAddr; + Alloc.DeinitializationActions = std::move(*DeinitializeActions); Reservations[AI.MappingBase.toPtr()].Allocations.push_back(MinAddr); } diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 11e5a9cd33260..a52c4d88ac836 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -88,6 +88,14 @@ using namespace llvm; +static cl::opt + PrintInstAddrs("print-inst-addrs", cl::Hidden, + cl::desc("Print addresses of instructions when dumping")); + +static cl::opt PrintInstDebugLocs( + "print-inst-debug-locs", cl::Hidden, + cl::desc("Pretty print debug locations of instructions when dumping")); + // Make virtual table appear in this compilation unit. AssemblyAnnotationWriter::~AssemblyAnnotationWriter() = default; @@ -4256,6 +4264,18 @@ void AssemblyWriter::printInfoComment(const Value &V) { if (AnnotationWriter) { AnnotationWriter->printInfoComment(V, Out); } + + if (PrintInstDebugLocs) { + if (auto *I = dyn_cast(&V)) { + if (I->getDebugLoc()) { + Out << " ; "; + I->getDebugLoc().print(Out); + } + } + } + + if (PrintInstAddrs) + Out << " ; " << &V; } static void maybePrintCallAddrSpace(const Value *Operand, const Instruction *I, diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp index 2cb1908695308..2bac99b6309af 100644 --- a/llvm/lib/MCA/InstrBuilder.cpp +++ b/llvm/lib/MCA/InstrBuilder.cpp @@ -634,16 +634,14 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI, bool IsVariadic = MCDesc.isVariadic(); if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) { auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID); - Descriptors[DKey] = std::move(ID); - return *Descriptors[DKey]; + return *(Descriptors[DKey] = std::move(ID)); } auto VDKey = std::make_pair(hashMCInst(MCI), SchedClassID); assert( !VariantDescriptors.contains(VDKey) && "Expected VariantDescriptors to not already have a value for this key."); - VariantDescriptors[VDKey] = std::move(ID); - return *VariantDescriptors[VDKey]; + return *(VariantDescriptors[VDKey] = std::move(ID)); } Expected diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index f112ea2efcaa9..18aa76c865bc8 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -230,7 +230,8 @@ void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other, auto Name = Other.Name; auto Hash = Other.Hash; Other.accumulateCounts(FuncLevelOverlap.Test); - if (!FunctionData.contains(Name)) { + auto It = FunctionData.find(Name); + if (It == FunctionData.end()) { Overlap.addOneUnique(FuncLevelOverlap.Test); return; } @@ -238,7 +239,7 @@ void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other, Overlap.Overlap.NumEntries += 1; return; } - auto &ProfileDataMap = FunctionData[Name]; + auto &ProfileDataMap = It->second; bool NewFunc; ProfilingData::iterator Where; std::tie(Where, NewFunc) = diff --git a/llvm/lib/SandboxIR/Region.cpp b/llvm/lib/SandboxIR/Region.cpp index 086993e6dc872..2eb84bd72ed00 100644 --- a/llvm/lib/SandboxIR/Region.cpp +++ b/llvm/lib/SandboxIR/Region.cpp @@ -64,7 +64,7 @@ void Region::setAux(ArrayRef Aux) { auto &LLVMCtx = Ctx.LLVMCtx; for (auto [Idx, I] : enumerate(Aux)) { llvm::ConstantInt *IdxC = - llvm::ConstantInt::get(LLVMCtx, llvm::APInt(32, Idx, false)); + llvm::ConstantInt::get(llvm::Type::getInt32Ty(LLVMCtx), Idx, false); assert(cast(I->Val)->getMetadata(AuxMDKind) == nullptr && "Instruction already in Aux!"); cast(I->Val)->setMetadata( diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 35b222e2d55eb..7a471662ea075 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1701,6 +1701,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_ROUND, VT, Custom); setOperationAction(ISD::MLOAD, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 501e659eb1813..8255b267bd7e9 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1962,6 +1962,8 @@ multiclass sve_int_sel_vvv { def : SVE_3_Op_Pat(NAME # _D)>; def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; def : InstAlias<"mov $Zd, $Pg/m, $Zn", (!cast(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, ZPR8:$Zn, ZPR8:$Zd), 1>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp index e46d0587e7943..76b1775f0d096 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp @@ -151,23 +151,18 @@ PreservedAnalyses AMDGPUAtomicOptimizerPass::run(Function &F, } bool AMDGPUAtomicOptimizerImpl::run() { - // Scan option None disables the Pass - if (ScanImpl == ScanOptions::None) { + if (ScanImpl == ScanOptions::None) return false; - } visit(F); + if (ToReplace.empty()) + return false; - const bool Changed = !ToReplace.empty(); - - for (ReplacementInfo &Info : ToReplace) { - optimizeAtomic(*Info.I, Info.Op, Info.ValIdx, Info.ValDivergent); - } - + for (auto &[I, Op, ValIdx, ValDivergent] : ToReplace) + optimizeAtomic(*I, Op, ValIdx, ValDivergent); ToReplace.clear(); - - return Changed; + return true; } static bool isLegalCrossLaneType(Type *Ty) { @@ -247,9 +242,7 @@ void AMDGPUAtomicOptimizerImpl::visitAtomicRMWInst(AtomicRMWInst &I) { // If we get here, we can optimize the atomic using a single wavefront-wide // atomic operation to do the calculation for the entire wavefront, so // remember the instruction so we can come back to it. - const ReplacementInfo Info = {&I, Op, ValIdx, ValDivergent}; - - ToReplace.push_back(Info); + ToReplace.push_back({&I, Op, ValIdx, ValDivergent}); } void AMDGPUAtomicOptimizerImpl::visitIntrinsicInst(IntrinsicInst &I) { @@ -333,17 +326,14 @@ void AMDGPUAtomicOptimizerImpl::visitIntrinsicInst(IntrinsicInst &I) { // If any of the other arguments to the intrinsic are divergent, we can't // optimize the operation. for (unsigned Idx = 1; Idx < I.getNumOperands(); Idx++) { - if (UA.isDivergentUse(I.getOperandUse(Idx))) { + if (UA.isDivergentUse(I.getOperandUse(Idx))) return; - } } // If we get here, we can optimize the atomic using a single wavefront-wide // atomic operation to do the calculation for the entire wavefront, so // remember the instruction so we can come back to it. - const ReplacementInfo Info = {&I, Op, ValIdx, ValDivergent}; - - ToReplace.push_back(Info); + ToReplace.push_back({&I, Op, ValIdx, ValDivergent}); } // Use the builder to create the non-atomic counterpart of the specified diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index cb918e16f0f3b..d69cfbbe4088e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -1569,6 +1569,10 @@ std::optional GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic( std::function SimplifyAndSetOp) const { switch (II.getIntrinsicID()) { + case Intrinsic::amdgcn_readfirstlane: + // TODO: For a vector extract, should reduce the intrinsic call type. + SimplifyAndSetOp(&II, 0, DemandedElts, UndefElts); + return std::nullopt; case Intrinsic::amdgcn_raw_buffer_load: case Intrinsic::amdgcn_raw_ptr_buffer_load: case Intrinsic::amdgcn_raw_buffer_load_format: diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index f293b3aba7b79..33018ae9677a3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -314,18 +314,20 @@ RegBankLegalizeRules::getRulesForOpc(MachineInstr &MI) const { Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS || Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) { unsigned IntrID = cast(MI).getIntrinsicID(); - if (!IRulesAlias.contains(IntrID)) { + auto IRAIt = IRulesAlias.find(IntrID); + if (IRAIt == IRulesAlias.end()) { LLVM_DEBUG(dbgs() << "MI: "; MI.dump();); llvm_unreachable("No rules defined for intrinsic opcode"); } - return IRules.at(IRulesAlias.at(IntrID)); + return IRules.at(IRAIt->second); } - if (!GRulesAlias.contains(Opc)) { + auto GRAIt = GRulesAlias.find(Opc); + if (GRAIt == GRulesAlias.end()) { LLVM_DEBUG(dbgs() << "MI: "; MI.dump();); llvm_unreachable("No rules defined for generic opcode"); } - return GRules.at(GRulesAlias.at(Opc)); + return GRules.at(GRAIt->second); } // Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'. diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 1ff75095b220a..582da42a0dc4e 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -111,25 +111,6 @@ static bool isSMovRel(unsigned Opcode) { } } -static bool isDGEMM(unsigned Opcode) { - return AMDGPU::getMAIIsDGEMM(Opcode); -} - -static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) { - unsigned Opcode = MI.getOpcode(); - - if (!SIInstrInfo::isMAI(MI) || - isDGEMM(Opcode) || - Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 || - Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64) - return false; - - if (!ST.hasGFX940Insts()) - return true; - - return AMDGPU::getMAIIsGFX940XDL(Opcode); -} - static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, const MachineInstr &MI) { if (TII.isAlwaysGDS(MI.getOpcode())) @@ -2375,7 +2356,8 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { unsigned Opc1 = MI1->getOpcode(); int NeedWaitStates = 0; if (OpNo == SrcCIdx) { - if (!isDGEMM(Opc) && (!ST.hasGFX940Insts() && isDGEMM(Opc1))) { + if (!SIInstrInfo::isDGEMM(Opc) && + (!ST.hasGFX940Insts() && SIInstrInfo::isDGEMM(Opc1))) { NeedWaitStates = 0; } else if (FullReg) { if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 || @@ -2392,7 +2374,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64: case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64: case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64: - if (!isXDL(ST, *MI)) + if (!TII.isXDL(*MI)) NeedWaitStates = ST.hasGFX950Insts() ? GFX950_DMFMA16x16WritesVGPROverlappedSrcCWaitStates @@ -2400,18 +2382,18 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { break; case AMDGPU::V_MFMA_F64_4X4X4F64_e64: case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64: - if (!isXDL(ST, *MI)) + if (!TII.isXDL(*MI)) NeedWaitStates = DMFMA4x4WritesVGPROverlappedSrcCWaitStates; break; default: int NumPasses = TSchedModel.computeInstrLatency(MI1); if (ST.hasGFX940Insts()) { - if (isXDL(ST, *MI) && !isXDL(ST, *MI1)) + if (TII.isXDL(*MI) && !TII.isXDL(*MI1)) break; NeedWaitStates = - isXDL(ST, *MI1) - ? (isXDL(ST, *MI) + TII.isXDL(*MI1) + ? (TII.isXDL(*MI) ? GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates( NumPasses, ST.hasGFX950Insts()) : GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates( @@ -2424,18 +2406,19 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { switch (NumPasses) { case 2: NeedWaitStates = - isDGEMM(Opc) ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates - : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates; + SIInstrInfo::isDGEMM(Opc) + ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates + : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates; break; case 8: NeedWaitStates = - isDGEMM(Opc) + SIInstrInfo::isDGEMM(Opc) ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates; break; case 16: NeedWaitStates = - isDGEMM(Opc) + SIInstrInfo::isDGEMM(Opc) ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates; break; @@ -2464,7 +2447,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { if (ST.hasGFX940Insts()) { NeedWaitStates = - isXDL(ST, *MI1) + TII.isXDL(*MI1) ? GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates( NumPasses, ST.hasGFX950Insts()) : GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates( @@ -2631,7 +2614,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { return 0; auto IsDGEMMFn = [](const MachineInstr &MI) -> bool { - return isDGEMM(MI.getOpcode()); + return SIInstrInfo::isDGEMM(MI.getOpcode()); }; // This is checked in checkMAIHazards90A() @@ -2670,7 +2653,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { bool DGEMMAfterVALUWrite = false; auto IsDGEMMHazard = [&DGEMMAfterVALUWrite, this](const MachineInstr &MI) { // Found DGEMM on reverse traversal to def. - if (isDGEMM(MI.getOpcode())) + if (SIInstrInfo::isDGEMM(MI.getOpcode())) DGEMMAfterVALUWrite = true; // Only hazard if register is defined by a VALU and a DGEMM is found after @@ -2745,7 +2728,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { int NumPasses = HazardDefLatency; int NeedWaitStates = MaxWaitStates; - if (isDGEMM(MFMA->getOpcode())) { + if (SIInstrInfo::isDGEMM(MFMA->getOpcode())) { switch (HazardDefLatency) { case 4: NeedWaitStates = IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates @@ -2765,7 +2748,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { } } else if (ST.hasGFX940Insts()) { NeedWaitStates = - isXDL(ST, *MFMA) + TII.isXDL(*MFMA) ? GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates( NumPasses, ST.hasGFX950Insts()) : GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates( @@ -2838,7 +2821,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { int NeedWaitStates = MaxWaitStates; int NumPasses = TSchedModel.computeInstrLatency(MFMA); - if (isDGEMM(MFMA->getOpcode())) { + if (SIInstrInfo::isDGEMM(MFMA->getOpcode())) { switch (NumPasses) { case 4: NeedWaitStates = DMFMA4x4WriteVgprVALUWriteWaitStates; @@ -2852,7 +2835,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { } } else if (ST.hasGFX940Insts()) { NeedWaitStates = - isXDL(ST, *MFMA) + TII.isXDL(*MFMA) ? GFX940_XDL_N_PassWriteVgprVALUWawWaitStates( NumPasses, ST.hasGFX950Insts()) : GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(NumPasses); @@ -2880,11 +2863,11 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { } auto IsSMFMAReadAsCFn = [&Reg, &MFMA, this](const MachineInstr &MI) { - if (!SIInstrInfo::isMFMA(MI) || isDGEMM(MI.getOpcode()) || + if (!SIInstrInfo::isMFMA(MI) || SIInstrInfo::isDGEMM(MI.getOpcode()) || !MI.readsRegister(Reg, &TRI)) return false; - if (ST.hasGFX940Insts() && !isXDL(ST, MI)) + if (ST.hasGFX940Insts() && !TII.isXDL(MI)) return false; const MachineOperand *SrcC = diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 9aec2bef0c18a..0face8108d249 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -10251,3 +10251,17 @@ bool SIInstrInfo::isGlobalMemoryObject(const MachineInstr *MI) const { return TargetInstrInfo::isGlobalMemoryObject(MI); } + +bool SIInstrInfo::isXDL(const MachineInstr &MI) const { + unsigned Opcode = MI.getOpcode(); + + if (!SIInstrInfo::isMAI(MI) || isDGEMM(Opcode) || + Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 || + Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64) + return false; + + if (!ST.hasGFX940Insts()) + return true; + + return AMDGPU::getMAIIsGFX940XDL(Opcode); +} diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 428322a5a2f04..88b1e477f13e4 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -847,6 +847,10 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { return get(Opcode).TSFlags & SIInstrFlags::IsDOT; } + bool isXDL(const MachineInstr &MI) const; + + static bool isDGEMM(unsigned Opcode) { return AMDGPU::getMAIIsDGEMM(Opcode); } + static bool isLDSDIR(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR; } diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index 920c3e11e4718..745e4086bc7fe 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -632,6 +632,8 @@ bool SIOptimizeExecMasking::optimizeVCMPSaveExecSequence( TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::clamp); + TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::op_sel); + // The kill flags may no longer be correct. if (Src0->isReg()) MRI->clearKillFlags(Src0->getReg()); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 9bf346b916f8f..8a5cdd7412bf3 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -1078,8 +1078,6 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) { return tryLDGLDU(N); } - unsigned int PointerSize = - CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace()); SDLoc DL(N); SDValue Chain = N->getOperand(0); @@ -1112,37 +1110,24 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { FromType = getLdStRegType(ScalarVT); // Create the machine instruction DAG - SDValue N1 = N->getOperand(1); SDValue Offset, Base; - std::optional Opcode; - MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy; - - SmallVector Ops({getI32Imm(Ordering, DL), getI32Imm(Scope, DL), - getI32Imm(CodeAddrSpace, DL), - getI32Imm(VecType, DL), getI32Imm(FromType, DL), - getI32Imm(FromTypeWidth, DL)}); - - if (SelectADDRsi(N1.getNode(), N1, Base, Offset)) { - Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi, - NVPTX::LD_i32_asi, NVPTX::LD_i64_asi, - NVPTX::LD_f32_asi, NVPTX::LD_f64_asi); - } else { - if (PointerSize == 64) { - SelectADDRri64(N1.getNode(), N1, Base, Offset); - Opcode = - pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64, - NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, - NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64); - } else { - SelectADDRri(N1.getNode(), N1, Base, Offset); - Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, - NVPTX::LD_i32_ari, NVPTX::LD_i64_ari, - NVPTX::LD_f32_ari, NVPTX::LD_f64_ari); - } - } + SelectADDR(N->getOperand(1), Base, Offset); + SDValue Ops[] = {getI32Imm(Ordering, DL), + getI32Imm(Scope, DL), + getI32Imm(CodeAddrSpace, DL), + getI32Imm(VecType, DL), + getI32Imm(FromType, DL), + getI32Imm(FromTypeWidth, DL), + Base, + Offset, + Chain}; + + const MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy; + const std::optional Opcode = + pickOpcodeForVT(TargetVT, NVPTX::LD_i8, NVPTX::LD_i16, NVPTX::LD_i32, + NVPTX::LD_i64, NVPTX::LD_f32, NVPTX::LD_f64); if (!Opcode) return false; - Ops.append({Base, Offset, Chain}); SDNode *NVPTXLD = CurDAG->getMachineNode(*Opcode, DL, TargetVT, MVT::Other, Ops); @@ -1178,8 +1163,6 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) { if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) { return tryLDGLDU(N); } - unsigned int PointerSize = - CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace()); SDLoc DL(N); SDValue Chain = N->getOperand(0); @@ -1227,77 +1210,38 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) { FromTypeWidth = 32; } - SDValue Op1 = N->getOperand(1); SDValue Offset, Base; - std::optional Opcode; - SDNode *LD; + SelectADDR(N->getOperand(1), Base, Offset); + SDValue Ops[] = {getI32Imm(Ordering, DL), + getI32Imm(Scope, DL), + getI32Imm(CodeAddrSpace, DL), + getI32Imm(VecType, DL), + getI32Imm(FromType, DL), + getI32Imm(FromTypeWidth, DL), + Base, + Offset, + Chain}; - SmallVector Ops({getI32Imm(Ordering, DL), getI32Imm(Scope, DL), - getI32Imm(CodeAddrSpace, DL), - getI32Imm(VecType, DL), getI32Imm(FromType, DL), - getI32Imm(FromTypeWidth, DL)}); - - if (SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { - switch (N->getOpcode()) { - default: - return false; - case NVPTXISD::LoadV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi, - NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi, - NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi); - break; - case NVPTXISD::LoadV4: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi, - NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, - std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt); - break; - } - } else { - if (PointerSize == 64) { - SelectADDRri64(Op1.getNode(), Op1, Base, Offset); - switch (N->getOpcode()) { - default: - return false; - case NVPTXISD::LoadV2: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64, - NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64, - NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64); - break; - case NVPTXISD::LoadV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64, - NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt, - NVPTX::LDV_f32_v4_ari_64, std::nullopt); - break; - } - } else { - SelectADDRri(Op1.getNode(), Op1, Base, Offset); - switch (N->getOpcode()) { - default: - return false; - case NVPTXISD::LoadV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari, - NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari, - NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari); - break; - case NVPTXISD::LoadV4: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari, - NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, - std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt); - break; - } - } + std::optional Opcode; + switch (N->getOpcode()) { + default: + return false; + case NVPTXISD::LoadV2: + Opcode = + pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2, + NVPTX::LDV_i16_v2, NVPTX::LDV_i32_v2, NVPTX::LDV_i64_v2, + NVPTX::LDV_f32_v2, NVPTX::LDV_f64_v2); + break; + case NVPTXISD::LoadV4: + Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4, + NVPTX::LDV_i16_v4, NVPTX::LDV_i32_v4, std::nullopt, + NVPTX::LDV_f32_v4, std::nullopt); + break; } if (!Opcode) return false; - Ops.append({Base, Offset, Chain}); - LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops); + + SDNode *LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops); MachineMemOperand *MemRef = cast(N)->getMemOperand(); CurDAG->setNodeMemRefs(cast(LD), {MemRef}); @@ -1344,177 +1288,60 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { SDVTList InstVTList = CurDAG->getVTList(InstVTs); SDValue Chain = N->getOperand(0); - std::optional Opcode; - SDLoc DL(N); - SDNode *LD; SDValue Base, Offset; + SelectADDR(Op1, Base, Offset); + SDValue Ops[] = {Base, Offset, Chain}; - if (SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { - switch (N->getOpcode()) { - default: - return false; - case ISD::LOAD: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8asi, - NVPTX::INT_PTX_LDG_GLOBAL_i16asi, NVPTX::INT_PTX_LDG_GLOBAL_i32asi, - NVPTX::INT_PTX_LDG_GLOBAL_i64asi, NVPTX::INT_PTX_LDG_GLOBAL_f32asi, - NVPTX::INT_PTX_LDG_GLOBAL_f64asi); - break; - case ISD::INTRINSIC_W_CHAIN: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8asi, - NVPTX::INT_PTX_LDU_GLOBAL_i16asi, NVPTX::INT_PTX_LDU_GLOBAL_i32asi, - NVPTX::INT_PTX_LDU_GLOBAL_i64asi, NVPTX::INT_PTX_LDU_GLOBAL_f32asi, - NVPTX::INT_PTX_LDU_GLOBAL_f64asi); - break; - case NVPTXISD::LoadV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_G_v2i8_ELE_asi, - NVPTX::INT_PTX_LDG_G_v2i16_ELE_asi, - NVPTX::INT_PTX_LDG_G_v2i32_ELE_asi, - NVPTX::INT_PTX_LDG_G_v2i64_ELE_asi, - NVPTX::INT_PTX_LDG_G_v2f32_ELE_asi, - NVPTX::INT_PTX_LDG_G_v2f64_ELE_asi); - break; - case NVPTXISD::LDUV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_G_v2i8_ELE_asi, - NVPTX::INT_PTX_LDU_G_v2i16_ELE_asi, - NVPTX::INT_PTX_LDU_G_v2i32_ELE_asi, - NVPTX::INT_PTX_LDU_G_v2i64_ELE_asi, - NVPTX::INT_PTX_LDU_G_v2f32_ELE_asi, - NVPTX::INT_PTX_LDU_G_v2f64_ELE_asi); - break; - case NVPTXISD::LoadV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_asi, - NVPTX::INT_PTX_LDG_G_v4i16_ELE_asi, - NVPTX::INT_PTX_LDG_G_v4i32_ELE_asi, std::nullopt, - NVPTX::INT_PTX_LDG_G_v4f32_ELE_asi, std::nullopt); - break; - case NVPTXISD::LDUV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_asi, - NVPTX::INT_PTX_LDU_G_v4i16_ELE_asi, - NVPTX::INT_PTX_LDU_G_v4i32_ELE_asi, std::nullopt, - NVPTX::INT_PTX_LDU_G_v4f32_ELE_asi, std::nullopt); - break; - } - } else { - if (TM.is64Bit()) { - SelectADDRri64(Op1.getNode(), Op1, Base, Offset); - switch (N->getOpcode()) { - default: - return false; - case ISD::LOAD: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_GLOBAL_i8ari64, - NVPTX::INT_PTX_LDG_GLOBAL_i16ari64, - NVPTX::INT_PTX_LDG_GLOBAL_i32ari64, - NVPTX::INT_PTX_LDG_GLOBAL_i64ari64, - NVPTX::INT_PTX_LDG_GLOBAL_f32ari64, - NVPTX::INT_PTX_LDG_GLOBAL_f64ari64); - break; - case ISD::INTRINSIC_W_CHAIN: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_GLOBAL_i8ari64, - NVPTX::INT_PTX_LDU_GLOBAL_i16ari64, - NVPTX::INT_PTX_LDU_GLOBAL_i32ari64, - NVPTX::INT_PTX_LDU_GLOBAL_i64ari64, - NVPTX::INT_PTX_LDU_GLOBAL_f32ari64, - NVPTX::INT_PTX_LDU_GLOBAL_f64ari64); - break; - case NVPTXISD::LoadV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64); - break; - case NVPTXISD::LDUV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64); - break; - case NVPTXISD::LoadV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt, - NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt); - break; - case NVPTXISD::LDUV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt, - NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt); - break; - } - } else { - SelectADDRri(Op1.getNode(), Op1, Base, Offset); - switch (N->getOpcode()) { - default: - return false; - case ISD::LOAD: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8ari, - NVPTX::INT_PTX_LDG_GLOBAL_i16ari, NVPTX::INT_PTX_LDG_GLOBAL_i32ari, - NVPTX::INT_PTX_LDG_GLOBAL_i64ari, NVPTX::INT_PTX_LDG_GLOBAL_f32ari, - NVPTX::INT_PTX_LDG_GLOBAL_f64ari); - break; - case ISD::INTRINSIC_W_CHAIN: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8ari, - NVPTX::INT_PTX_LDU_GLOBAL_i16ari, NVPTX::INT_PTX_LDU_GLOBAL_i32ari, - NVPTX::INT_PTX_LDU_GLOBAL_i64ari, NVPTX::INT_PTX_LDU_GLOBAL_f32ari, - NVPTX::INT_PTX_LDU_GLOBAL_f64ari); - break; - case NVPTXISD::LoadV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32); - break; - case NVPTXISD::LDUV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32); - break; - case NVPTXISD::LoadV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt, - NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt); - break; - case NVPTXISD::LDUV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt, - NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt); - break; - } - } + std::optional Opcode; + switch (N->getOpcode()) { + default: + return false; + case ISD::LOAD: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8, + NVPTX::INT_PTX_LDG_GLOBAL_i16, NVPTX::INT_PTX_LDG_GLOBAL_i32, + NVPTX::INT_PTX_LDG_GLOBAL_i64, NVPTX::INT_PTX_LDG_GLOBAL_f32, + NVPTX::INT_PTX_LDG_GLOBAL_f64); + break; + case ISD::INTRINSIC_W_CHAIN: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8, + NVPTX::INT_PTX_LDU_GLOBAL_i16, NVPTX::INT_PTX_LDU_GLOBAL_i32, + NVPTX::INT_PTX_LDU_GLOBAL_i64, NVPTX::INT_PTX_LDU_GLOBAL_f32, + NVPTX::INT_PTX_LDU_GLOBAL_f64); + break; + case NVPTXISD::LoadV2: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v2i8_ELE, + NVPTX::INT_PTX_LDG_G_v2i16_ELE, NVPTX::INT_PTX_LDG_G_v2i32_ELE, + NVPTX::INT_PTX_LDG_G_v2i64_ELE, NVPTX::INT_PTX_LDG_G_v2f32_ELE, + NVPTX::INT_PTX_LDG_G_v2f64_ELE); + break; + case NVPTXISD::LDUV2: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v2i8_ELE, + NVPTX::INT_PTX_LDU_G_v2i16_ELE, NVPTX::INT_PTX_LDU_G_v2i32_ELE, + NVPTX::INT_PTX_LDU_G_v2i64_ELE, NVPTX::INT_PTX_LDU_G_v2f32_ELE, + NVPTX::INT_PTX_LDU_G_v2f64_ELE); + break; + case NVPTXISD::LoadV4: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE, + NVPTX::INT_PTX_LDG_G_v4i16_ELE, NVPTX::INT_PTX_LDG_G_v4i32_ELE, + std::nullopt, NVPTX::INT_PTX_LDG_G_v4f32_ELE, std::nullopt); + break; + case NVPTXISD::LDUV4: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE, + NVPTX::INT_PTX_LDU_G_v4i16_ELE, NVPTX::INT_PTX_LDU_G_v4i32_ELE, + std::nullopt, NVPTX::INT_PTX_LDU_G_v4f32_ELE, std::nullopt); + break; } if (!Opcode) return false; - SDValue Ops[] = {Base, Offset, Chain}; - LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops); + + SDLoc DL(N); + SDNode *LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops); // For automatic generation of LDG (through SelectLoad[Vector], not the // intrinsics), we may have an extending load like: @@ -1572,8 +1399,6 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { // Address Space Setting unsigned int CodeAddrSpace = getCodeAddrSpace(ST); - unsigned int PointerSize = - CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace()); SDLoc DL(N); SDValue Chain = ST->getChain(); @@ -1598,38 +1423,28 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { // Create the machine instruction DAG SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal(); - SDValue BasePtr = ST->getBasePtr(); + SDValue Offset, Base; - std::optional Opcode; - MVT::SimpleValueType SourceVT = + SelectADDR(ST->getBasePtr(), Base, Offset); + + SDValue Ops[] = {Value, + getI32Imm(Ordering, DL), + getI32Imm(Scope, DL), + getI32Imm(CodeAddrSpace, DL), + getI32Imm(VecType, DL), + getI32Imm(ToType, DL), + getI32Imm(ToTypeWidth, DL), + Base, + Offset, + Chain}; + + const MVT::SimpleValueType SourceVT = Value.getNode()->getSimpleValueType(0).SimpleTy; - - SmallVector Ops( - {Value, getI32Imm(Ordering, DL), getI32Imm(Scope, DL), - getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL), - getI32Imm(ToType, DL), getI32Imm(ToTypeWidth, DL)}); - - if (SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) { - Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi, - NVPTX::ST_i32_asi, NVPTX::ST_i64_asi, - NVPTX::ST_f32_asi, NVPTX::ST_f64_asi); - } else { - if (PointerSize == 64) { - SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset); - Opcode = - pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64, - NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, - NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64); - } else { - SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset); - Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari, - NVPTX::ST_i32_ari, NVPTX::ST_i64_ari, - NVPTX::ST_f32_ari, NVPTX::ST_f64_ari); - } - } + const std::optional Opcode = + pickOpcodeForVT(SourceVT, NVPTX::ST_i8, NVPTX::ST_i16, NVPTX::ST_i32, + NVPTX::ST_i64, NVPTX::ST_f32, NVPTX::ST_f64); if (!Opcode) return false; - Ops.append({Base, Offset, Chain}); SDNode *NVPTXST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops); @@ -1644,9 +1459,6 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) { SDValue Op1 = N->getOperand(1); - SDValue Offset, Base; - std::optional Opcode; - SDNode *ST; EVT EltVT = Op1.getValueType(); MemSDNode *MemSD = cast(N); EVT StoreVT = MemSD->getMemoryVT(); @@ -1657,8 +1469,6 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) { report_fatal_error("Cannot store to pointer that points to constant " "memory space"); } - unsigned int PointerSize = - CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace()); SDLoc DL(N); SDValue Chain = N->getOperand(0); @@ -1697,72 +1507,35 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) { ToTypeWidth = 32; } + SDValue Offset, Base; + SelectADDR(N2, Base, Offset); + Ops.append({getI32Imm(Ordering, DL), getI32Imm(Scope, DL), getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL), - getI32Imm(ToType, DL), getI32Imm(ToTypeWidth, DL)}); + getI32Imm(ToType, DL), getI32Imm(ToTypeWidth, DL), Base, Offset, + Chain}); - if (SelectADDRsi(N2.getNode(), N2, Base, Offset)) { - switch (N->getOpcode()) { - default: - return false; - case NVPTXISD::StoreV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi, - NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi, - NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi); - break; - case NVPTXISD::StoreV4: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi, - NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, - std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt); - break; - } - } else { - if (PointerSize == 64) { - SelectADDRri64(N2.getNode(), N2, Base, Offset); - switch (N->getOpcode()) { - default: - return false; - case NVPTXISD::StoreV2: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64, - NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64, - NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64); - break; - case NVPTXISD::StoreV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64, - NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt, - NVPTX::STV_f32_v4_ari_64, std::nullopt); - break; - } - } else { - SelectADDRri(N2.getNode(), N2, Base, Offset); - switch (N->getOpcode()) { - default: - return false; - case NVPTXISD::StoreV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari, - NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari, - NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari); - break; - case NVPTXISD::StoreV4: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari, - NVPTX::STV_i32_v4_ari, std::nullopt, - NVPTX::STV_f32_v4_ari, std::nullopt); - break; - } - } + std::optional Opcode; + switch (N->getOpcode()) { + default: + return false; + case NVPTXISD::StoreV2: + Opcode = + pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2, + NVPTX::STV_i16_v2, NVPTX::STV_i32_v2, NVPTX::STV_i64_v2, + NVPTX::STV_f32_v2, NVPTX::STV_f64_v2); + break; + case NVPTXISD::StoreV4: + Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4, + NVPTX::STV_i16_v4, NVPTX::STV_i32_v4, std::nullopt, + NVPTX::STV_f32_v4, std::nullopt); + break; } + if (!Opcode) return false; - Ops.append({Base, Offset, Chain}); - ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops); + SDNode *ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops); MachineMemOperand *MemRef = cast(N)->getMemOperand(); CurDAG->setNodeMemRefs(cast(ST), {MemRef}); @@ -2413,27 +2186,28 @@ static inline bool isAddLike(const SDValue V) { (V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint()); } -// SelectDirectAddr - Match a direct address for DAG. -// A direct address could be a globaladdress or externalsymbol. -bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { +// selectBaseADDR - Match a dag node which will serve as the base address for an +// ADDR operand pair. +static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) { // Return true if TGA or ES. if (N.getOpcode() == ISD::TargetGlobalAddress || - N.getOpcode() == ISD::TargetExternalSymbol) { - Address = N; - return true; - } - if (N.getOpcode() == NVPTXISD::Wrapper) { - Address = N.getOperand(0); - return true; - } + N.getOpcode() == ISD::TargetExternalSymbol) + return N; + + if (N.getOpcode() == NVPTXISD::Wrapper) + return N.getOperand(0); + // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol - if (AddrSpaceCastSDNode *CastN = dyn_cast(N)) { + if (AddrSpaceCastSDNode *CastN = dyn_cast(N)) if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC && CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM && CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam) - return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address); - } - return false; + return selectBaseADDR(CastN->getOperand(0).getOperand(0), DAG); + + if (auto *FIN = dyn_cast(N)) + return DAG->getTargetFrameIndex(FIN->getIndex(), FIN->getValueType(0)); + + return N; } static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) { @@ -2454,37 +2228,17 @@ static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) { MVT::i32); } -// symbol+offset -bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset) { - Offset = accumulateOffset(Addr, SDLoc(OpNode), CurDAG); - return SelectDirectAddr(Addr, Base); -} - -// register+offset -void NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset, - MVT VT) { - - Offset = accumulateOffset(Addr, SDLoc(OpNode), CurDAG); - if (auto *FIN = dyn_cast(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); - return; - } - Base = Addr; -} - -// register+offset -bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset) { - SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32); - return true; -} - -// register+offset -bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset) { - SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64); +// Select a pair of operands which represent a valid PTX address, this could be +// one of the following things: +// - [var] - Offset is simply set to 0 +// - [reg] - Offset is simply set to 0 +// - [reg+immOff] +// - [var+immOff] +// Note that immOff must fit into a 32-bit signed integer. +bool NVPTXDAGToDAGISel::SelectADDR(SDValue Addr, SDValue &Base, + SDValue &Offset) { + Offset = accumulateOffset(Addr, SDLoc(Addr), CurDAG); + Base = selectBaseADDR(Addr, CurDAG); return true; } @@ -2513,12 +2267,7 @@ bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( default: return true; case InlineAsm::ConstraintCode::m: // memory - if (SelectDirectAddr(Op, Op0)) { - OutOps.push_back(Op0); - OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); - return false; - } - if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) { + if (SelectADDR(Op, Op0, Op1)) { OutOps.push_back(Op0); OutOps.push_back(Op1); return false; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index eb0c6fe982688..42891b8ca8d8d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -106,17 +106,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { return CurDAG->getTargetConstant(Imm, DL, MVT::i32); } - // Match direct address complex pattern. - bool SelectDirectAddr(SDValue N, SDValue &Address); - - void SelectADDRri_imp(SDNode *OpNode, SDValue Addr, SDValue &Base, - SDValue &Offset, MVT VT); - bool SelectADDRri(SDNode *OpNode, SDValue Addr, SDValue &Base, - SDValue &Offset); - bool SelectADDRri64(SDNode *OpNode, SDValue Addr, SDValue &Base, - SDValue &Offset); - bool SelectADDRsi(SDNode *OpNode, SDValue Addr, SDValue &Base, - SDValue &Offset); + bool SelectADDR(SDValue Addr, SDValue &Base, SDValue &Offset); bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 6a0f708021a16..36a0a06bdb8aa 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1917,27 +1917,15 @@ defm SET_f64 : SET<"f64", Float64Regs, f64imm>; // Data Movement (Load / Store, Move) //----------------------------------- -let WantsRoot = true in { - def ADDRri : ComplexPattern; - def ADDRri64 : ComplexPattern; -} -def ADDRvar : ComplexPattern; +def addr : ComplexPattern; -def MEMri : Operand { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops Int32Regs, i32imm); -} -def MEMri64 : Operand { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops Int64Regs, i64imm); -} - -def imem : Operand { +def ADDR_base : Operand { let PrintMethod = "printOperand"; } -def imemAny : Operand { - let PrintMethod = "printOperand"; +def ADDR : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops ADDR_base, i32imm); } def LdStCode : Operand { @@ -1956,10 +1944,10 @@ def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; // Load a memory address into a u32 or u64 register. -def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a), +def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR_base:$a), "mov.u32 \t$dst, $a;", [(set i32:$dst, (Wrapper tglobaladdr:$a))]>; -def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a), +def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins ADDR_base:$a), "mov.u64 \t$dst, $a;", [(set i64:$dst, (Wrapper tglobaladdr:$a))]>; @@ -2021,12 +2009,17 @@ def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>; def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>; //---- Copy Frame Index ---- -def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr), - "add.u32 \t$dst, ${addr:add};", - [(set i32:$dst, ADDRri:$addr)]>; -def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr), - "add.u64 \t$dst, ${addr:add};", - [(set i64:$dst, ADDRri64:$addr)]>; +def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR:$addr), + "add.u32 \t$dst, ${addr:add};", []>; +def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins ADDR:$addr), + "add.u64 \t$dst, ${addr:add};", []>; + +def to_tframeindex : SDNodeXFormgetTargetFrameIndex(N->getIndex(), N->getValueType(0)); +}]>; + +def : Pat<(i32 frameindex:$fi), (LEA_ADDRi (to_tframeindex $fi), 0)>; +def : Pat<(i64 frameindex:$fi), (LEA_ADDRi64 (to_tframeindex $fi), 0)>; //----------------------------------- // Comparison and Selection @@ -2660,7 +2653,7 @@ def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ", def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a", [(LastCallArg (i32 1), (i32 imm:$a))]>; -def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), "$addr, ", +def CallVoidInst : NVPTXInst<(outs), (ins ADDR_base:$addr), "$addr, ", [(CallVoid (Wrapper tglobaladdr:$addr))]>; def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ", [(CallVoid i32:$addr)]>; @@ -2753,109 +2746,56 @@ foreach vt = [v2f16, v2bf16, v2i16, v4i8] in { // // Load / Store Handling // -multiclass LD { - def _ari : NVPTXInst< +class LD + : NVPTXInst< (outs regclass:$dst), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), - "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr$offset];", []>; - def _ari_64 : NVPTXInst< - (outs regclass:$dst), - (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), - "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr$offset];", []>; - def _asi : NVPTXInst< - (outs regclass:$dst), - (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), + i32imm:$fromWidth, ADDR:$addr), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr$offset];", []>; -} + "\t$dst, [$addr];", []>; let mayLoad=1, hasSideEffects=0 in { - defm LD_i8 : LD; - defm LD_i16 : LD; - defm LD_i32 : LD; - defm LD_i64 : LD; - defm LD_f32 : LD; - defm LD_f64 : LD; + def LD_i8 : LD; + def LD_i16 : LD; + def LD_i32 : LD; + def LD_i64 : LD; + def LD_f32 : LD; + def LD_f64 : LD; } -multiclass ST { - def _ari : NVPTXInst< +class ST + : NVPTXInst< (outs), (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, - Offseti32imm:$offset), + LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, ADDR:$addr), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr$offset], $src;", []>; - def _ari_64 : NVPTXInst< - (outs), - (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, - Offseti32imm:$offset), - "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr$offset], $src;", []>; - def _asi : NVPTXInst< - (outs), - (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, imem:$addr, - Offseti32imm:$offset), - "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr$offset], $src;", []>; -} + " \t[$addr], $src;", []>; let mayStore=1, hasSideEffects=0 in { - defm ST_i8 : ST; - defm ST_i16 : ST; - defm ST_i32 : ST; - defm ST_i64 : ST; - defm ST_f32 : ST; - defm ST_f64 : ST; + def ST_i8 : ST; + def ST_i16 : ST; + def ST_i32 : ST; + def ST_i64 : ST; + def ST_f32 : ST; + def ST_f64 : ST; } // The following is used only in and after vector elementizations. Vector // elementization happens at the machine instruction level, so the following // instructions never appear in the DAG. multiclass LD_VEC { - def _v2_ari : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2), - (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), - "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr$offset];", []>; - def _v2_ari_64 : NVPTXInst< + def _v2 : NVPTXInst< (outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr$offset];", []>; - def _v2_asi : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2), - (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), - "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr$offset];", []>; - def _v4_ari : NVPTXInst< + "\t{{$dst1, $dst2}}, [$addr];", []>; + def _v4 : NVPTXInst< (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>; - def _v4_ari_64 : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), - (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), - "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>; - def _v4_asi : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), - (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), - "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>; + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>; } let mayLoad=1, hasSideEffects=0 in { defm LDV_i8 : LD_VEC; @@ -2867,48 +2807,20 @@ let mayLoad=1, hasSideEffects=0 in { } multiclass ST_VEC { - def _v2_ari : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope, - LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, - Int32Regs:$addr, Offseti32imm:$offset), - "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr$offset], {{$src1, $src2}};", []>; - def _v2_ari_64 : NVPTXInst< + def _v2 : NVPTXInst< (outs), (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, - Int64Regs:$addr, Offseti32imm:$offset), + ADDR:$addr), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr$offset], {{$src1, $src2}};", []>; - def _v2_asi : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope, - LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, - imem:$addr, Offseti32imm:$offset), - "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr$offset], {{$src1, $src2}};", []>; - def _v4_ari : NVPTXInst< + "\t[$addr], {{$src1, $src2}};", []>; + def _v4 : NVPTXInst< (outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>; - def _v4_ari_64 : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, - LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), - "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>; - def _v4_asi : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, - LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), - "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}" - "$fromWidth \t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>; + "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>; } let mayStore=1, hasSideEffects=0 in { diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index bcb3f05f3f8a8..7d7e69adafcd0 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -2728,65 +2728,46 @@ defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">; // Scalar -multiclass LDU_G { - def asi: NVPTXInst<(outs regclass:$result), (ins imemAny:$src, Offseti32imm:$offset), - "ldu.global." # TyStr # " \t$result, [$src$offset];", - []>, Requires<[hasLDU]>; - def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), +class LDU_G + : NVPTXInst<(outs regclass:$result), (ins ADDR:$src), "ldu.global." # TyStr # " \t$result, [$src];", []>, Requires<[hasLDU]>; - def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), - "ldu.global." # TyStr # " \t$result, [$src];", - []>, Requires<[hasLDU]>; -} -defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8", Int16Regs>; -defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16", Int16Regs>; -defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32", Int32Regs>; -defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64", Int64Regs>; -defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32", Float32Regs>; -defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64", Float64Regs>; +def INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8", Int16Regs>; +def INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16", Int16Regs>; +def INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32", Int32Regs>; +def INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64", Int64Regs>; +def INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32", Float32Regs>; +def INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64", Float64Regs>; // vector // Elementized vector ldu -multiclass VLDU_G_ELE_V2 { - def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins MEMri:$src), - "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>; - def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins MEMri64:$src), +class VLDU_G_ELE_V2 + : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins ADDR:$src), "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>; - def _asi: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins imemAny:$src, Offseti32imm:$offset), - "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src$offset];", []>; -} -multiclass VLDU_G_ELE_V4 { - def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins MEMri:$src), - "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>; - def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins MEMri64:$src), + +class VLDU_G_ELE_V4 + : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins ADDR:$src), "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>; - def _asi: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins imemAny:$src, Offseti32imm:$offset), - "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src$offset];", []>; -} -defm INT_PTX_LDU_G_v2i8_ELE : VLDU_G_ELE_V2<"u8", Int16Regs>; -defm INT_PTX_LDU_G_v2i16_ELE : VLDU_G_ELE_V2<"u16", Int16Regs>; -defm INT_PTX_LDU_G_v2i32_ELE : VLDU_G_ELE_V2<"u32", Int32Regs>; -defm INT_PTX_LDU_G_v2f32_ELE : VLDU_G_ELE_V2<"f32", Float32Regs>; -defm INT_PTX_LDU_G_v2i64_ELE : VLDU_G_ELE_V2<"u64", Int64Regs>; -defm INT_PTX_LDU_G_v2f64_ELE : VLDU_G_ELE_V2<"f64", Float64Regs>; -defm INT_PTX_LDU_G_v4i8_ELE : VLDU_G_ELE_V4<"u8", Int16Regs>; -defm INT_PTX_LDU_G_v4i16_ELE : VLDU_G_ELE_V4<"u16", Int16Regs>; -defm INT_PTX_LDU_G_v4i32_ELE : VLDU_G_ELE_V4<"u32", Int32Regs>; -defm INT_PTX_LDU_G_v4f16_ELE : VLDU_G_ELE_V4<"b16", Int16Regs>; -defm INT_PTX_LDU_G_v4f16x2_ELE : VLDU_G_ELE_V4<"b32", Int32Regs>; -defm INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"f32", Float32Regs>; +def INT_PTX_LDU_G_v2i8_ELE : VLDU_G_ELE_V2<"u8", Int16Regs>; +def INT_PTX_LDU_G_v2i16_ELE : VLDU_G_ELE_V2<"u16", Int16Regs>; +def INT_PTX_LDU_G_v2i32_ELE : VLDU_G_ELE_V2<"u32", Int32Regs>; +def INT_PTX_LDU_G_v2f32_ELE : VLDU_G_ELE_V2<"f32", Float32Regs>; +def INT_PTX_LDU_G_v2i64_ELE : VLDU_G_ELE_V2<"u64", Int64Regs>; +def INT_PTX_LDU_G_v2f64_ELE : VLDU_G_ELE_V2<"f64", Float64Regs>; + +def INT_PTX_LDU_G_v4i8_ELE : VLDU_G_ELE_V4<"u8", Int16Regs>; +def INT_PTX_LDU_G_v4i16_ELE : VLDU_G_ELE_V4<"u16", Int16Regs>; +def INT_PTX_LDU_G_v4i32_ELE : VLDU_G_ELE_V4<"u32", Int32Regs>; +def INT_PTX_LDU_G_v4f16_ELE : VLDU_G_ELE_V4<"b16", Int16Regs>; +def INT_PTX_LDU_G_v4f16x2_ELE : VLDU_G_ELE_V4<"b32", Int32Regs>; +def INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"f32", Float32Regs>; //----------------------------------- @@ -2797,64 +2778,44 @@ defm INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"f32", Float32Regs>; // non-coherent texture cache, and therefore the values read must be read-only // during the lifetime of the kernel. -multiclass LDG_G { - def asi: NVPTXInst<(outs regclass:$result), (ins imemAny:$src, Offseti32imm:$offset), - "ld.global.nc." # TyStr # " \t$result, [$src$offset];", - []>, Requires<[hasLDG]>; - def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), - "ld.global.nc." # TyStr # " \t$result, [$src];", - []>, Requires<[hasLDG]>; - def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), +class LDG_G + : NVPTXInst<(outs regclass:$result), (ins ADDR:$src), "ld.global.nc." # TyStr # " \t$result, [$src];", []>, Requires<[hasLDG]>; -} -defm INT_PTX_LDG_GLOBAL_i8 : LDG_G<"u8", Int16Regs>; -defm INT_PTX_LDG_GLOBAL_i16 : LDG_G<"u16", Int16Regs>; -defm INT_PTX_LDG_GLOBAL_i32 : LDG_G<"u32", Int32Regs>; -defm INT_PTX_LDG_GLOBAL_i64 : LDG_G<"u64", Int64Regs>; -defm INT_PTX_LDG_GLOBAL_f32 : LDG_G<"f32", Float32Regs>; -defm INT_PTX_LDG_GLOBAL_f64 : LDG_G<"f64", Float64Regs>; +def INT_PTX_LDG_GLOBAL_i8 : LDG_G<"u8", Int16Regs>; +def INT_PTX_LDG_GLOBAL_i16 : LDG_G<"u16", Int16Regs>; +def INT_PTX_LDG_GLOBAL_i32 : LDG_G<"u32", Int32Regs>; +def INT_PTX_LDG_GLOBAL_i64 : LDG_G<"u64", Int64Regs>; +def INT_PTX_LDG_GLOBAL_f32 : LDG_G<"f32", Float32Regs>; +def INT_PTX_LDG_GLOBAL_f64 : LDG_G<"f64", Float64Regs>; // vector // Elementized vector ldg -multiclass VLDG_G_ELE_V2 { - def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins MEMri:$src), - "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>; - def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins MEMri64:$src), - "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>; - def _asi: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins imemAny:$src, Offseti32imm:$offset), - "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src$offset];", []>; -} - -multiclass VLDG_G_ELE_V4 { - def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins MEMri:$src), - "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>; - def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins MEMri64:$src), - "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>; - def _asi: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins imemAny:$src, Offseti32imm:$offset), - "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src$offset];", []>; -} +class VLDG_G_ELE_V2 : + NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins ADDR:$src), + "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>; + + +class VLDG_G_ELE_V4 : + NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), + (ins ADDR:$src), + "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>; // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. -defm INT_PTX_LDG_G_v2i8_ELE : VLDG_G_ELE_V2<"u8", Int16Regs>; -defm INT_PTX_LDG_G_v2i16_ELE : VLDG_G_ELE_V2<"u16", Int16Regs>; -defm INT_PTX_LDG_G_v2i32_ELE : VLDG_G_ELE_V2<"u32", Int32Regs>; -defm INT_PTX_LDG_G_v2f32_ELE : VLDG_G_ELE_V2<"f32", Float32Regs>; -defm INT_PTX_LDG_G_v2i64_ELE : VLDG_G_ELE_V2<"u64", Int64Regs>; -defm INT_PTX_LDG_G_v2f64_ELE : VLDG_G_ELE_V2<"f64", Float64Regs>; +def INT_PTX_LDG_G_v2i8_ELE : VLDG_G_ELE_V2<"u8", Int16Regs>; +def INT_PTX_LDG_G_v2i16_ELE : VLDG_G_ELE_V2<"u16", Int16Regs>; +def INT_PTX_LDG_G_v2i32_ELE : VLDG_G_ELE_V2<"u32", Int32Regs>; +def INT_PTX_LDG_G_v2f32_ELE : VLDG_G_ELE_V2<"f32", Float32Regs>; +def INT_PTX_LDG_G_v2i64_ELE : VLDG_G_ELE_V2<"u64", Int64Regs>; +def INT_PTX_LDG_G_v2f64_ELE : VLDG_G_ELE_V2<"f64", Float64Regs>; -defm INT_PTX_LDG_G_v4i8_ELE : VLDG_G_ELE_V4<"u8", Int16Regs>; -defm INT_PTX_LDG_G_v4i16_ELE : VLDG_G_ELE_V4<"u16", Int16Regs>; -defm INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"u32", Int32Regs>; -defm INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"f32", Float32Regs>; +def INT_PTX_LDG_G_v4i8_ELE : VLDG_G_ELE_V4<"u8", Int16Regs>; +def INT_PTX_LDG_G_v4i16_ELE : VLDG_G_ELE_V4<"u16", Int16Regs>; +def INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"u32", Int32Regs>; +def INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"f32", Float32Regs>; multiclass NG_TO_G { @@ -2929,17 +2890,17 @@ def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), // @TODO: Are these actually needed, or will we always just see symbols // copied to registers first? -/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), +/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins ADDR_base:$s), "mov.u32 \t$r, $s;", [(set Int32Regs:$r, (int_nvvm_move_ptr texternalsym:$s))]>; -def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), +def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins ADDR_base:$s), "mov.u64 \t$r, $s;", [(set Int64Regs:$r, (int_nvvm_move_ptr texternalsym:$s))]>;*/ def texsurf_handles - : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), + : NVPTXInst<(outs Int64Regs:$result), (ins ADDR_base:$src), "mov.u64 \t$result, $src;", []>; //----------------------------------- @@ -7223,20 +7184,16 @@ class WMMA_REGINFO class BuildPatternI { // Build a dag pattern that matches the intrinsic call. dag ret = !foreach(tmp, Ins, - !subst(imem, ADDRvar, - !subst(MEMri64, ADDRri64, - !subst(MEMri, ADDRri, - !subst(ins, Intr, tmp))))); + !subst(ADDR, addr, + !subst(ins, Intr, tmp))); } // Same as above, but uses PatFrag instead of an Intrinsic. class BuildPatternPF { // Build a dag pattern that matches the intrinsic call. dag ret = !foreach(tmp, Ins, - !subst(imem, ADDRvar, - !subst(MEMri64, ADDRri64, - !subst(MEMri, ADDRri, - !subst(ins, Intr, tmp))))); + !subst(ADDR, addr, + !subst(ins, Intr, tmp))); } // Common WMMA-related fields used for building patterns for all MMA instructions. @@ -7253,10 +7210,9 @@ class WMMA_INSTR _Args> // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] // -class WMMA_LOAD +class WMMA_LOAD : WMMA_INSTR.record, - [!con((ins SrcOp:$src), + [!con((ins ADDR:$src), !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, Requires { // Load/store intrinsics are overloaded on pointer's address space. @@ -7293,9 +7249,9 @@ class WMMA_LOAD + bit WithStride> : WMMA_INSTR.record, - [!con((ins DstOp:$dst), + [!con((ins ADDR:$dst), Frag.Ins, !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, Requires { @@ -7334,14 +7290,12 @@ defset list MMA_LDSTs = { foreach layout = ["row", "col"] in { foreach stride = [false, true] in { foreach space = [".global", ".shared", ""] in { - foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { - foreach frag = NVVM_MMA_OPS.all_ld_ops in - if NVVM_WMMA_LDST_SUPPORTED.ret then - def : WMMA_LOAD, layout, space, stride, addr>; - foreach frag = NVVM_MMA_OPS.all_st_ops in - if NVVM_WMMA_LDST_SUPPORTED.ret then - def : WMMA_STORE_D, layout, space, stride, addr>; - } // addr + foreach frag = NVVM_MMA_OPS.all_ld_ops in + if NVVM_WMMA_LDST_SUPPORTED.ret then + def : WMMA_LOAD, layout, space, stride>; + foreach frag = NVVM_MMA_OPS.all_st_ops in + if NVVM_WMMA_LDST_SUPPORTED.ret then + def : WMMA_STORE_D, layout, space, stride>; } // space } // stride } // layout @@ -7468,9 +7422,8 @@ defset list MMAs = { // // ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16 // -class LDMATRIX - : WMMA_INSTR.record, [(ins SrcOp:$src)]>, +class LDMATRIX + : WMMA_INSTR.record, [(ins ADDR:$src)]>, Requires { // Build PatFrag that only matches particular address space. PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src), @@ -7494,12 +7447,9 @@ class LDMATRIX LDMATRIXs = { foreach transposed = [false, true] in { foreach space = [".shared", ""] in { - foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { - foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in - if NVVM_LDMATRIX_SUPPORTED.ret then - def : LDMATRIX, transposed, space, - addr>; - } // addr + foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in + if NVVM_LDMATRIX_SUPPORTED.ret then + def : LDMATRIX, transposed, space>; } // space } // transposed } // defset diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp index 4971d31691c54..46e4a905aa09a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -1800,7 +1800,7 @@ bool NVPTXReplaceImageHandles::replaceImageHandle(MachineOperand &Op, MachineInstr &TexHandleDef = *MRI.getVRegDef(Op.getReg()); switch (TexHandleDef.getOpcode()) { - case NVPTX::LD_i64_asi: { + case NVPTX::LD_i64: { // The handle is a parameter value being loaded, replace with the // parameter symbol const auto &TM = static_cast(MF.getTarget()); diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp index b9e03b6cb6d21..addac6f41a715 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp @@ -217,10 +217,11 @@ std::optional llvm::isPartOfGOTToPCRelPair(const MCInst &Inst, return (Inst.getOpcode() == PPC::PLDpc); } -MCELFStreamer *llvm::createPPCELFStreamer( - MCContext &Context, std::unique_ptr MAB, - std::unique_ptr OW, - std::unique_ptr Emitter) { - return new PPCELFStreamer(Context, std::move(MAB), std::move(OW), +MCStreamer * +llvm::createPPCELFStreamer(const Triple &T, MCContext &C, + std::unique_ptr &&MAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter) { + return new PPCELFStreamer(C, std::move(MAB), std::move(OW), std::move(Emitter)); } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h index 10204b184a49f..2b803950073f9 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h @@ -50,10 +50,10 @@ class PPCELFStreamer : public MCELFStreamer { std::optional isPartOfGOTToPCRelPair(const MCInst &Inst, const MCSubtargetInfo &STI); -MCELFStreamer *createPPCELFStreamer(MCContext &Context, - std::unique_ptr MAB, - std::unique_ptr OW, - std::unique_ptr Emitter); +MCStreamer *createPPCELFStreamer(const Triple &, MCContext &, + std::unique_ptr &&MAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter); } // end namespace llvm #endif // LLVM_LIB_TARGET_PPC_MCELFSTREAMER_PPCELFSTREAMER_H diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 0a0facb10e48a..5dde48fba5605 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -199,24 +199,6 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, return MAI; } -static MCStreamer * -createPPCELFStreamer(const Triple &T, MCContext &Context, - std::unique_ptr &&MAB, - std::unique_ptr &&OW, - std::unique_ptr &&Emitter) { - return createPPCELFStreamer(Context, std::move(MAB), std::move(OW), - std::move(Emitter)); -} - -static MCStreamer * -createPPCXCOFFStreamer(const Triple &T, MCContext &Context, - std::unique_ptr &&MAB, - std::unique_ptr &&OW, - std::unique_ptr &&Emitter) { - return createPPCXCOFFStreamer(Context, std::move(MAB), std::move(OW), - std::move(Emitter)); -} - namespace { class PPCTargetAsmStreamer : public PPCTargetStreamer { diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp index 72e3cff615662..2a6da4c097fc1 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp @@ -64,11 +64,11 @@ void PPCXCOFFStreamer::emitInstruction(const MCInst &Inst, emitPrefixedInstruction(Inst, STI); } -MCXCOFFStreamer * -llvm::createPPCXCOFFStreamer(MCContext &Context, - std::unique_ptr MAB, - std::unique_ptr OW, - std::unique_ptr Emitter) { - return new PPCXCOFFStreamer(Context, std::move(MAB), std::move(OW), +MCStreamer * +llvm::createPPCXCOFFStreamer(const Triple &, MCContext &C, + std::unique_ptr &&MAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter) { + return new PPCXCOFFStreamer(C, std::move(MAB), std::move(OW), std::move(Emitter)); } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.h index 5fa35127b70b4..1e3671c817eb8 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.h @@ -29,10 +29,10 @@ class PPCXCOFFStreamer : public MCXCOFFStreamer { void emitPrefixedInstruction(const MCInst &Inst, const MCSubtargetInfo &STI); }; -MCXCOFFStreamer *createPPCXCOFFStreamer(MCContext &Context, - std::unique_ptr MAB, - std::unique_ptr OW, - std::unique_ptr Emitter); +MCStreamer *createPPCXCOFFStreamer(const Triple &, MCContext &, + std::unique_ptr &&MAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter); } // end namespace llvm #endif // LLVM_LIB_TARGET_PPC_MCXCOFFSTREAMER_PPCXCOFFSTREAMER_H diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index d6c8e8d506799..91df5f467e59c 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1355,10 +1355,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::STORE, MVT::v256i1, Custom); } if (Subtarget.hasMMA()) { - if (Subtarget.isISAFuture()) + if (Subtarget.isISAFuture()) { addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass); - else + addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass); + setOperationAction(ISD::LOAD, MVT::v1024i1, Custom); + setOperationAction(ISD::STORE, MVT::v1024i1, Custom); + } else { addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass); + } setOperationAction(ISD::LOAD, MVT::v512i1, Custom); setOperationAction(ISD::STORE, MVT::v512i1, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom); @@ -11758,6 +11762,64 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, return Op; } +SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + LoadSDNode *LN = cast(Op.getNode()); + SDValue LoadChain = LN->getChain(); + SDValue BasePtr = LN->getBasePtr(); + EVT VT = Op.getValueType(); + + // Type v1024i1 is used for Dense Math dmr registers. + assert(VT == MVT::v1024i1 && "Unsupported type."); + assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) && + "Dense Math support required."); + assert(Subtarget.pairedVectorMemops() && "Vector pair support required."); + + SmallVector Loads; + SmallVector LoadChains; + SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32); + SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr}; + MachineMemOperand *MMO = LN->getMemOperand(); + unsigned NumVecs = VT.getSizeInBits() / 256; + for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { + MachineMemOperand *NewMMO = + DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32); + if (Idx > 0) { + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(32, dl, BasePtr.getValueType())); + LoadOps[2] = BasePtr; + } + SDValue Ld = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, + DAG.getVTList(MVT::v256i1, MVT::Other), + LoadOps, MVT::v256i1, NewMMO); + LoadChains.push_back(Ld.getValue(1)); + Loads.push_back(Ld); + } + + if (Subtarget.isLittleEndian()) { + std::reverse(Loads.begin(), Loads.end()); + std::reverse(LoadChains.begin(), LoadChains.end()); + } + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); + SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTFDMR512, dl, MVT::v512i1, Loads[0], + Loads[1]), + 0); + SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32); + SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTFDMR512_HI, dl, MVT::v512i1, + Loads[2], Loads[3]), + 0); + SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32); + SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32); + const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub}; + SDValue Value = + SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0); + + SDValue RetOps[] = {Value, TF}; + return DAG.getMergeValues(RetOps, dl); +} + SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -11766,6 +11828,9 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, SDValue BasePtr = LN->getBasePtr(); EVT VT = Op.getValueType(); + if (VT == MVT::v1024i1) + return LowerDMFVectorLoad(Op, DAG); + if (VT != MVT::v256i1 && VT != MVT::v512i1) return Op; @@ -11803,6 +11868,69 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, return DAG.getMergeValues(RetOps, dl); } +SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op, + SelectionDAG &DAG) const { + + SDLoc dl(Op); + StoreSDNode *SN = cast(Op.getNode()); + SDValue StoreChain = SN->getChain(); + SDValue BasePtr = SN->getBasePtr(); + SmallVector Values; + SmallVector Stores; + EVT VT = SN->getValue().getValueType(); + + // Type v1024i1 is used for Dense Math dmr registers. + assert(VT == MVT::v1024i1 && "Unsupported type."); + assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) && + "Dense Math support required."); + assert(Subtarget.pairedVectorMemops() && "Vector pair support required."); + + SDValue Lo( + DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, + Op.getOperand(1), + DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)), + 0); + SDValue Hi( + DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, + Op.getOperand(1), + DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)), + 0); + EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1}; + MachineSDNode *ExtNode = + DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo); + Values.push_back(SDValue(ExtNode, 0)); + Values.push_back(SDValue(ExtNode, 1)); + ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi); + Values.push_back(SDValue(ExtNode, 0)); + Values.push_back(SDValue(ExtNode, 1)); + + if (Subtarget.isLittleEndian()) + std::reverse(Values.begin(), Values.end()); + + SDVTList Tys = DAG.getVTList(MVT::Other); + SmallVector Ops{ + StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32), + Values[0], BasePtr}; + MachineMemOperand *MMO = SN->getMemOperand(); + unsigned NumVecs = VT.getSizeInBits() / 256; + for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { + MachineMemOperand *NewMMO = + DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32); + if (Idx > 0) { + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(32, dl, BasePtr.getValueType())); + Ops[3] = BasePtr; + } + Ops[2] = Values[Idx]; + SDValue St = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, + MVT::v256i1, NewMMO); + Stores.push_back(St); + } + + SDValue TF = DAG.getTokenFactor(dl, Stores); + return TF; +} + SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -11813,6 +11941,9 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, SDValue Value2 = SN->getValue(); EVT StoreVT = Value.getValueType(); + if (StoreVT == MVT::v1024i1) + return LowerDMFVectorStore(Op, DAG); + if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1) return Op; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 514329bbe92d7..1f22aa16a89be 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1344,6 +1344,8 @@ namespace llvm { SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDMFVectorStore(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCallResult(SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 9b526066fe75b..f12400490832b 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -5427,8 +5427,8 @@ void PPCInstrInfo::promoteInstr32To64ForElimEXTSW(const Register &Reg, --Iter; MachineInstrBuilder MIBuilder(*Iter->getMF(), Iter); for (unsigned i = 1; i < MI->getNumOperands(); i++) { - if (PromoteRegs.find(i) != PromoteRegs.end()) - MIBuilder.addReg(PromoteRegs[i], RegState::Kill); + if (auto It = PromoteRegs.find(i); It != PromoteRegs.end()) + MIBuilder.addReg(It->second, RegState::Kill); else Iter->addOperand(MI->getOperand(i)); } diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 36e2fa0262f9d..85d53f0c5045c 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -1684,14 +1684,9 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } case Match_InvalidStackAdj: { SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); - StringRef SpecName = "Zc"; - if (getSTI().hasFeature(RISCV::FeatureVendorXqccmp)) - SpecName = "Xqccmp"; - - return Error(ErrorLoc, - Twine("stack adjustment is invalid for this instruction") + - " and register list; refer to " + SpecName + - " spec for a detailed range of stack adjustment"); + return Error( + ErrorLoc, + "stack adjustment is invalid for this instruction and register list"); } } @@ -2771,12 +2766,25 @@ ParseStatus RISCVAsmParser::parseZcmpStackAdj(OperandVector &Operands, SMLoc S = getLoc(); int64_t StackAdjustment = getLexer().getTok().getIntVal(); - unsigned Spimm = 0; unsigned RlistVal = static_cast(Operands[1].get())->Rlist.Val; - if (Negative != ExpectNegative || - !RISCVZC::getSpimm(RlistVal, Spimm, StackAdjustment, isRV64())) - return ParseStatus::NoMatch; + assert(RlistVal != RISCVZC::INVALID_RLIST); + unsigned StackAdjBase = RISCVZC::getStackAdjBase(RlistVal, isRV64()); + if (Negative != ExpectNegative || StackAdjustment % 16 != 0 || + StackAdjustment < StackAdjBase || (StackAdjustment - StackAdjBase) > 48) { + int64_t Lower = StackAdjBase; + int64_t Upper = StackAdjBase + 48; + if (ExpectNegative) { + Lower = -Lower; + Upper = -Upper; + std::swap(Lower, Upper); + } + return generateImmOutOfRangeError(S, Lower, Upper, + "stack adjustment for register list must " + "be a multiple of 16 bytes in the range"); + } + + unsigned Spimm = (StackAdjustment - StackAdjBase) / 16; Operands.push_back(RISCVOperand::createSpimm(Spimm << 4, S)); getLexer().Lex(); return ParseStatus::Success; diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index e99df34908d6e..61deaa827a6df 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -657,16 +657,6 @@ DecodeStatus RISCVDisassembler::getInstruction32(MCInst &MI, uint64_t &Size, uint32_t Insn = support::endian::read32le(Bytes.data()); - TRY_TO_DECODE(STI.hasFeature(RISCV::FeatureStdExtZdinx) && - !STI.hasFeature(RISCV::Feature64Bit), - DecoderTableRV32Zdinx32, - "RV32Zdinx (Double in Integer and rv32)"); - TRY_TO_DECODE(STI.hasFeature(RISCV::FeatureStdExtZacas) && - !STI.hasFeature(RISCV::Feature64Bit), - DecoderTableRV32Zacas32, - "RV32Zacas (Compare-And-Swap and rv32)"); - TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZfinx, DecoderTableRVZfinx32, - "RVZfinx (Float in Integer)"); TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXVentanaCondOps, DecoderTableXVentana32, "XVentanaCondOps"); TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBa, DecoderTableXTHeadBa32, @@ -721,6 +711,11 @@ DecodeStatus RISCVDisassembler::getInstruction32(MCInst &MI, uint64_t &Size, TRY_TO_DECODE_FEATURE_ANY(XRivosFeatureGroup, DecoderTableXRivos32, "Rivos"); TRY_TO_DECODE(true, DecoderTable32, "RISCV32"); + TRY_TO_DECODE(true, DecoderTableRV32GPRPair32, + "RV32GPRPair (rv32 and GPR pairs)"); + TRY_TO_DECODE(true, DecoderTableZfinx32, "Zfinx (Float in Integer)"); + TRY_TO_DECODE(true, DecoderTableZdinxRV32GPRPair32, + "ZdinxRV32GPRPair (rv32 and Double in Integer)"); return MCDisassembler::Fail; } @@ -736,15 +731,6 @@ DecodeStatus RISCVDisassembler::getInstruction16(MCInst &MI, uint64_t &Size, Size = 2; uint32_t Insn = support::endian::read16le(Bytes.data()); - TRY_TO_DECODE_AND_ADD_SP(!STI.hasFeature(RISCV::Feature64Bit), - DecoderTableRISCV32Only_16, - "RISCV32Only_16 (16-bit Instruction)"); - TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZicfiss, DecoderTableZicfiss16, - "RVZicfiss (Shadow Stack)"); - TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZcmt, DecoderTableRVZcmt16, - "Zcmt (16-bit Table Jump Instructions)"); - TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZcmp, DecoderTableRVZcmp16, - "Zcmp (16-bit Push/Pop & Double Move Instructions)"); TRY_TO_DECODE_FEATURE_ANY(XqciFeatureGroup, DecoderTableXqci16, "Qualcomm uC 16bit"); @@ -753,8 +739,16 @@ DecodeStatus RISCVDisassembler::getInstruction16(MCInst &MI, uint64_t &Size, "Xqccmp (Qualcomm 16-bit Push/Pop & Double Move Instructions)"); TRY_TO_DECODE_AND_ADD_SP(STI.hasFeature(RISCV::FeatureVendorXwchc), DecoderTableXwchc16, "WCH QingKe XW"); + + // DecoderTableZicfiss16 must be checked before DecoderTable16. + TRY_TO_DECODE(true, DecoderTableZicfiss16, "RVZicfiss (Shadow Stack)"); TRY_TO_DECODE_AND_ADD_SP(true, DecoderTable16, "RISCV_C (16-bit Instruction)"); + TRY_TO_DECODE_AND_ADD_SP(true, DecoderTableRISCV32Only_16, + "RISCV32Only_16 (16-bit Instruction)"); + // Zc* instructions incompatible with Zcf or Zcd. + TRY_TO_DECODE(true, DecoderTableZcOverlap16, + "ZcOverlap (16-bit Instructions overlapping with Zcf/Zcd)"); return MCDisassembler::Fail; } diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp index 0881de90700ab..289f9aa51195d 100644 --- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp +++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp @@ -205,7 +205,7 @@ getEEWAndEMUL(unsigned Opcode, RISCVVType::VLMUL LMUL, uint8_t SEW) { return std::make_pair(EEW, *EMUL); } -bool opcodeHasEEWAndEMULInfo(unsigned short Opcode) { +static bool opcodeHasEEWAndEMULInfo(unsigned short Opcode) { return Opcode == RISCV::VLM_V || Opcode == RISCV::VSM_V || Opcode == RISCV::VLE8_V || Opcode == RISCV::VSE8_V || Opcode == RISCV::VLE16_V || Opcode == RISCV::VSE16_V || diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp index 3b2c0cba66d12..1829291cd0348 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -33,6 +33,24 @@ namespace RISCVInsnOpcode { #include "RISCVGenSearchableTables.inc" } // namespace RISCVInsnOpcode +namespace RISCVVInversePseudosTable { +using namespace RISCV; +#define GET_RISCVVInversePseudosTable_IMPL +#include "RISCVGenSearchableTables.inc" +} // namespace RISCVVInversePseudosTable + +namespace RISCV { +#define GET_RISCVVSSEGTable_IMPL +#define GET_RISCVVLSEGTable_IMPL +#define GET_RISCVVLXSEGTable_IMPL +#define GET_RISCVVSXSEGTable_IMPL +#define GET_RISCVVLETable_IMPL +#define GET_RISCVVSETable_IMPL +#define GET_RISCVVLXTable_IMPL +#define GET_RISCVVSXTable_IMPL +#include "RISCVGenSearchableTables.inc" +} // namespace RISCV + namespace RISCVABI { ABI computeTargetABI(const Triple &TT, const FeatureBitset &FeatureBits, StringRef ABIName) { diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 80ff18d914dca..82b562bec9bf6 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -484,8 +484,8 @@ struct SysReg { namespace RISCVInsnOpcode { struct RISCVOpcode { - const char *Name; - unsigned Value; + char Name[10]; + uint8_t Value; }; #define GET_RISCVOpcodesList_DECL @@ -637,23 +637,98 @@ inline static unsigned getStackAdjBase(unsigned RlistVal, bool IsRV64) { llvm_unreachable("Unexpected RlistVal"); } -inline static bool getSpimm(unsigned RlistVal, unsigned &SpimmVal, - int64_t StackAdjustment, bool IsRV64) { - if (RlistVal == RLISTENCODE::INVALID_RLIST) - return false; - unsigned StackAdjBase = getStackAdjBase(RlistVal, IsRV64); - StackAdjustment -= StackAdjBase; - if (StackAdjustment % 16 != 0) - return false; - SpimmVal = StackAdjustment / 16; - if (SpimmVal > 3) - return false; - return true; -} - void printRlist(unsigned SlistEncode, raw_ostream &OS); } // namespace RISCVZC +namespace RISCVVInversePseudosTable { +struct PseudoInfo { + uint16_t Pseudo; + uint16_t BaseInstr; + uint8_t VLMul; + uint8_t SEW; +}; + +#define GET_RISCVVInversePseudosTable_DECL +#include "RISCVGenSearchableTables.inc" +} // namespace RISCVVInversePseudosTable + +namespace RISCV { +struct VLSEGPseudo { + uint16_t NF : 4; + uint16_t Masked : 1; + uint16_t Strided : 1; + uint16_t FF : 1; + uint16_t Log2SEW : 3; + uint16_t LMUL : 3; + uint16_t Pseudo; +}; + +struct VLXSEGPseudo { + uint16_t NF : 4; + uint16_t Masked : 1; + uint16_t Ordered : 1; + uint16_t Log2SEW : 3; + uint16_t LMUL : 3; + uint16_t IndexLMUL : 3; + uint16_t Pseudo; +}; + +struct VSSEGPseudo { + uint16_t NF : 4; + uint16_t Masked : 1; + uint16_t Strided : 1; + uint16_t Log2SEW : 3; + uint16_t LMUL : 3; + uint16_t Pseudo; +}; + +struct VSXSEGPseudo { + uint16_t NF : 4; + uint16_t Masked : 1; + uint16_t Ordered : 1; + uint16_t Log2SEW : 3; + uint16_t LMUL : 3; + uint16_t IndexLMUL : 3; + uint16_t Pseudo; +}; + +struct VLEPseudo { + uint16_t Masked : 1; + uint16_t Strided : 1; + uint16_t FF : 1; + uint16_t Log2SEW : 3; + uint16_t LMUL : 3; + uint16_t Pseudo; +}; + +struct VSEPseudo { + uint16_t Masked : 1; + uint16_t Strided : 1; + uint16_t Log2SEW : 3; + uint16_t LMUL : 3; + uint16_t Pseudo; +}; + +struct VLX_VSXPseudo { + uint16_t Masked : 1; + uint16_t Ordered : 1; + uint16_t Log2SEW : 3; + uint16_t LMUL : 3; + uint16_t IndexLMUL : 3; + uint16_t Pseudo; +}; + +#define GET_RISCVVSSEGTable_DECL +#define GET_RISCVVLSEGTable_DECL +#define GET_RISCVVLXSEGTable_DECL +#define GET_RISCVVSXSEGTable_DECL +#define GET_RISCVVLETable_DECL +#define GET_RISCVVSETable_DECL +#define GET_RISCVVLXTable_DECL +#define GET_RISCVVSXTable_DECL +#include "RISCVGenSearchableTables.inc" +} // namespace RISCV + } // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp index 868614cbdad6d..5f1d7b03f3218 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp @@ -41,15 +41,6 @@ #define GET_SUBTARGETINFO_MC_DESC #include "RISCVGenSubtargetInfo.inc" -namespace llvm::RISCVVInversePseudosTable { - -using namespace RISCV; - -#define GET_RISCVVInversePseudosTable_IMPL -#include "RISCVGenSearchableTables.inc" - -} // namespace llvm::RISCVVInversePseudosTable - using namespace llvm; static MCInstrInfo *createRISCVMCInstrInfo() { diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h index 6cc22af601fdb..bdee7ed4f011e 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h @@ -36,20 +36,6 @@ MCAsmBackend *createRISCVAsmBackend(const Target &T, const MCSubtargetInfo &STI, std::unique_ptr createRISCVELFObjectWriter(uint8_t OSABI, bool Is64Bit); - -namespace RISCVVInversePseudosTable { - -struct PseudoInfo { - uint16_t Pseudo; - uint16_t BaseInstr; - uint8_t VLMul; - uint8_t SEW; -}; - -#define GET_RISCVVInversePseudosTable_DECL -#include "RISCVGenSearchableTables.inc" - -} // namespace RISCVVInversePseudosTable } // namespace llvm // Defines symbolic names for RISC-V registers. diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 7ea4bd94c0065..f3cce950ed7b5 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -34,18 +34,6 @@ static cl::opt UsePseudoMovImm( "constant materialization"), cl::init(false)); -namespace llvm::RISCV { -#define GET_RISCVVSSEGTable_IMPL -#define GET_RISCVVLSEGTable_IMPL -#define GET_RISCVVLXSEGTable_IMPL -#define GET_RISCVVSXSEGTable_IMPL -#define GET_RISCVVLETable_IMPL -#define GET_RISCVVSETable_IMPL -#define GET_RISCVVLXTable_IMPL -#define GET_RISCVVSXTable_IMPL -#include "RISCVGenSearchableTables.inc" -} // namespace llvm::RISCV - void RISCVDAGToDAGISel::PreprocessISelDAG() { SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index bb786e4b2bb40..5048a80fdd18f 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -204,83 +204,6 @@ class RISCVDAGToDAGISelLegacy : public SelectionDAGISelLegacy { CodeGenOptLevel OptLevel); }; -namespace RISCV { -struct VLSEGPseudo { - uint16_t NF : 4; - uint16_t Masked : 1; - uint16_t Strided : 1; - uint16_t FF : 1; - uint16_t Log2SEW : 3; - uint16_t LMUL : 3; - uint16_t Pseudo; -}; - -struct VLXSEGPseudo { - uint16_t NF : 4; - uint16_t Masked : 1; - uint16_t Ordered : 1; - uint16_t Log2SEW : 3; - uint16_t LMUL : 3; - uint16_t IndexLMUL : 3; - uint16_t Pseudo; -}; - -struct VSSEGPseudo { - uint16_t NF : 4; - uint16_t Masked : 1; - uint16_t Strided : 1; - uint16_t Log2SEW : 3; - uint16_t LMUL : 3; - uint16_t Pseudo; -}; - -struct VSXSEGPseudo { - uint16_t NF : 4; - uint16_t Masked : 1; - uint16_t Ordered : 1; - uint16_t Log2SEW : 3; - uint16_t LMUL : 3; - uint16_t IndexLMUL : 3; - uint16_t Pseudo; -}; - -struct VLEPseudo { - uint16_t Masked : 1; - uint16_t Strided : 1; - uint16_t FF : 1; - uint16_t Log2SEW : 3; - uint16_t LMUL : 3; - uint16_t Pseudo; -}; - -struct VSEPseudo { - uint16_t Masked :1; - uint16_t Strided : 1; - uint16_t Log2SEW : 3; - uint16_t LMUL : 3; - uint16_t Pseudo; -}; - -struct VLX_VSXPseudo { - uint16_t Masked : 1; - uint16_t Ordered : 1; - uint16_t Log2SEW : 3; - uint16_t LMUL : 3; - uint16_t IndexLMUL : 3; - uint16_t Pseudo; -}; - -#define GET_RISCVVSSEGTable_DECL -#define GET_RISCVVLSEGTable_DECL -#define GET_RISCVVLXSEGTable_DECL -#define GET_RISCVVSXSEGTable_DECL -#define GET_RISCVVLETable_DECL -#define GET_RISCVVSETable_DECL -#define GET_RISCVVLXTable_DECL -#define GET_RISCVVSXTable_DECL -#include "RISCVGenSearchableTables.inc" -} // namespace RISCV - } // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6076fe56416ad..4e6b3a224b79b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -959,13 +959,35 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // TODO: support more ops. static const unsigned ZvfhminZvfbfminPromoteOps[] = { - ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB, - ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT, - ISD::FCEIL, ISD::FTRUNC, ISD::FFLOOR, ISD::FROUND, - ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, ISD::IS_FPCLASS, - ISD::SETCC, ISD::FMAXIMUM, ISD::FMINIMUM, ISD::STRICT_FADD, - ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT, - ISD::STRICT_FMA}; + ISD::FMINNUM, + ISD::FMAXNUM, + ISD::FADD, + ISD::FSUB, + ISD::FMUL, + ISD::FMA, + ISD::FDIV, + ISD::FSQRT, + ISD::FCEIL, + ISD::FTRUNC, + ISD::FFLOOR, + ISD::FROUND, + ISD::FROUNDEVEN, + ISD::FRINT, + ISD::FNEARBYINT, + ISD::IS_FPCLASS, + ISD::SETCC, + ISD::FMAXIMUM, + ISD::FMINIMUM, + ISD::STRICT_FADD, + ISD::STRICT_FSUB, + ISD::STRICT_FMUL, + ISD::STRICT_FDIV, + ISD::STRICT_FSQRT, + ISD::STRICT_FMA, + ISD::VECREDUCE_FMIN, + ISD::VECREDUCE_FMAX, + ISD::VECREDUCE_FMINIMUM, + ISD::VECREDUCE_FMAXIMUM}; // TODO: support more vp ops. static const unsigned ZvfhminZvfbfminPromoteVPOps[] = { @@ -4562,32 +4584,9 @@ static bool isInterleaveShuffle(ArrayRef Mask, MVT VT, int &EvenSrc, /// Is this mask representing a masked combination of two slides? static bool isMaskedSlidePair(ArrayRef Mask, - std::pair SrcInfo[2]) { - int NumElts = Mask.size(); - int SignalValue = NumElts * 2; - SrcInfo[0] = {-1, SignalValue}; - SrcInfo[1] = {-1, SignalValue}; - for (unsigned i = 0; i != Mask.size(); ++i) { - int M = Mask[i]; - if (M < 0) - continue; - int Src = M >= (int)NumElts; - int Diff = (int)i - (M % NumElts); - bool Match = false; - for (int j = 0; j < 2; j++) { - if (SrcInfo[j].first == -1) { - assert(SrcInfo[j].second == SignalValue); - SrcInfo[j].first = Src; - SrcInfo[j].second = Diff; - } - if (SrcInfo[j].first == Src && SrcInfo[j].second == Diff) { - Match = true; - break; - } - } - if (!Match) - return false; - } + std::array, 2> &SrcInfo) { + if (!llvm::isMaskedSlidePair(Mask, Mask.size(), SrcInfo)) + return false; // Avoid matching vselect idioms if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0) @@ -4603,7 +4602,8 @@ static bool isMaskedSlidePair(ArrayRef Mask, // Exactly matches the semantics of a previously existing custom matcher // to allow migration to new matcher without changing output. -static bool isElementRotate(std::pair SrcInfo[2], unsigned NumElts) { +static bool isElementRotate(std::array, 2> &SrcInfo, + unsigned NumElts) { if (SrcInfo[1].first == -1) return true; return SrcInfo[0].second < 0 && SrcInfo[1].second > 0 && @@ -5604,10 +5604,10 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, // without masking. Avoid matching bit rotates (which are not also element // rotates) as slide pairs. This is a performance heuristic, not a // functional check. - std::pair SrcInfo[2]; + std::array, 2> SrcInfo; unsigned RotateAmt; MVT RotateVT; - if (isMaskedSlidePair(Mask, SrcInfo) && + if (::isMaskedSlidePair(Mask, SrcInfo) && (isElementRotate(SrcInfo, NumElts) || !isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))) { SDValue Sources[2]; @@ -5964,10 +5964,11 @@ bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { if (SVT.getScalarType() == MVT::i1) return false; - std::pair SrcInfo[2]; + std::array, 2> SrcInfo; int Dummy1, Dummy2; return ShuffleVectorInst::isReverseMask(M, NumElts) || - (isMaskedSlidePair(M, SrcInfo) && isElementRotate(SrcInfo, NumElts)) || + (::isMaskedSlidePair(M, SrcInfo) && + isElementRotate(SrcInfo, NumElts)) || isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget); } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index 349bc361c90fe..89254940a87f4 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -60,9 +60,9 @@ def FPR64IN32X : RegisterOperand { def DExt : ExtInfo<"", "", [HasStdExtD], f64, FPR64, FPR32, FPR64, ?>; -def ZdinxExt : ExtInfo<"_INX", "RVZfinx", [HasStdExtZdinx, IsRV64], +def ZdinxExt : ExtInfo<"_INX", "Zfinx", [HasStdExtZdinx, IsRV64], f64, FPR64INX, FPR32INX, FPR64INX, ?>; -def Zdinx32Ext : ExtInfo<"_IN32X", "RV32Zdinx", [HasStdExtZdinx, IsRV32], +def Zdinx32Ext : ExtInfo<"_IN32X", "ZdinxRV32GPRPair", [HasStdExtZdinx, IsRV32], f64, FPR64IN32X, FPR32INX, FPR64IN32X, ?>; defvar DExts = [DExt, ZdinxExt, Zdinx32Ext]; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index 37ac48db06862..04328151adf8e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -116,7 +116,7 @@ class ExtInfo predicates, def FExt : ExtInfo<"", "", [HasStdExtF], f32, FPR32, FPR32, ?, ?>; -def ZfinxExt : ExtInfo<"_INX", "RVZfinx", [HasStdExtZfinx], f32, FPR32INX, FPR32INX, ?, ?>; +def ZfinxExt : ExtInfo<"_INX", "Zfinx", [HasStdExtZfinx], f32, FPR32INX, FPR32INX, ?, ?>; defvar FExts = [FExt, ZfinxExt]; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index fdb2334b131da..9bd1b0de82757 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -1358,7 +1358,7 @@ defm VNCLIP_W : VNCLP_IV_V_X_I<"vnclip", 0b101111>; let Predicates = [HasVInstructionsAnyF] in { // Vector Single-Width Floating-Point Add/Subtract Instructions -let Uses = [FRM], mayRaiseFPException = true in { +let Uses = [FRM, VL, VTYPE], mayRaiseFPException = true in { defm VFADD_V : VALU_FV_V_F<"vfadd", 0b000000>; defm VFSUB_V : VALU_FV_V_F<"vfsub", 0b000010>; defm VFRSUB_V : VALU_FV_F<"vfrsub", 0b100111>; @@ -1366,7 +1366,7 @@ defm VFRSUB_V : VALU_FV_F<"vfrsub", 0b100111>; // Vector Widening Floating-Point Add/Subtract Instructions let Constraints = "@earlyclobber $vd", - Uses = [FRM], + Uses = [FRM, VL, VTYPE], mayRaiseFPException = true, DestEEW = EEWSEWx2 in { let RVVConstraint = WidenV in { @@ -1381,10 +1381,10 @@ let RVVConstraint = WidenW in { defm VFWADD_W : VWALU_FV_V_F<"vfwadd", 0b110100, "w">; defm VFWSUB_W : VWALU_FV_V_F<"vfwsub", 0b110110, "w">; } // RVVConstraint = WidenW -} // Constraints = "@earlyclobber $vd", Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2 +} // Constraints = "@earlyclobber $vd", Uses = [FRM, VL, VTYPE], mayRaiseFPException = true, DestEEW = EEWSEWx2 // Vector Single-Width Floating-Point Multiply/Divide Instructions -let Uses = [FRM], mayRaiseFPException = true in { +let Uses = [FRM, VL, VTYPE], mayRaiseFPException = true in { defm VFMUL_V : VMUL_FV_V_F<"vfmul", 0b100100>; defm VFDIV_V : VDIV_FV_V_F<"vfdiv", 0b100000>; defm VFRDIV_V : VDIV_FV_F<"vfrdiv", 0b100001>; @@ -1392,12 +1392,12 @@ defm VFRDIV_V : VDIV_FV_F<"vfrdiv", 0b100001>; // Vector Widening Floating-Point Multiply let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, - Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2 in { + Uses = [FRM, VL, VTYPE], mayRaiseFPException = true, DestEEW = EEWSEWx2 in { defm VFWMUL_V : VWMUL_FV_V_F<"vfwmul", 0b111000>; -} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2 +} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM, VL, VTYPE], mayRaiseFPException = true, DestEEW = EEWSEWx2 // Vector Single-Width Floating-Point Fused Multiply-Add Instructions -let Uses = [FRM], mayRaiseFPException = true in { +let Uses = [FRM, VL, VTYPE], mayRaiseFPException = true in { defm VFMACC_V : VMAC_FV_V_F<"vfmacc", 0b101100>; defm VFNMACC_V : VMAC_FV_V_F<"vfnmacc", 0b101101>; defm VFMSAC_V : VMAC_FV_V_F<"vfmsac", 0b101110>; @@ -1409,15 +1409,15 @@ defm VFNMSUB_V : VMAC_FV_V_F<"vfnmsub", 0b101011>; } // Vector Widening Floating-Point Fused Multiply-Add Instructions -let Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2 in { +let Uses = [FRM, VL, VTYPE], mayRaiseFPException = true, DestEEW = EEWSEWx2 in { defm VFWMACC_V : VWMAC_FV_V_F<"vfwmacc", 0b111100>; defm VFWNMACC_V : VWMAC_FV_V_F<"vfwnmacc", 0b111101>; defm VFWMSAC_V : VWMAC_FV_V_F<"vfwmsac", 0b111110>; defm VFWNMSAC_V : VWMAC_FV_V_F<"vfwnmsac", 0b111111>; -} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2 +} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM, VL, VTYPE], mayRaiseFPException = true, DestEEW = EEWSEWx2 // Vector Floating-Point Square-Root Instruction -let Uses = [FRM], mayRaiseFPException = true in { +let Uses = [FRM, VL, VTYPE], mayRaiseFPException = true in { defm VFSQRT_V : VSQR_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>; defm VFREC7_V : VRCP_FV_VS2<"vfrec7.v", 0b010011, 0b00101>; } @@ -1482,13 +1482,13 @@ def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VR:$vd), // Single-Width Floating-Point/Integer Type-Convert Instructions let mayRaiseFPException = true in { -let Uses = [FRM] in { +let Uses = [FRM, VL, VTYPE] in { defm VFCVT_XU_F_V : VCVTI_FV_VS2<"vfcvt.xu.f.v", 0b010010, 0b00000>; defm VFCVT_X_F_V : VCVTI_FV_VS2<"vfcvt.x.f.v", 0b010010, 0b00001>; } defm VFCVT_RTZ_XU_F_V : VCVTI_FV_VS2<"vfcvt.rtz.xu.f.v", 0b010010, 0b00110>; defm VFCVT_RTZ_X_F_V : VCVTI_FV_VS2<"vfcvt.rtz.x.f.v", 0b010010, 0b00111>; -let Uses = [FRM] in { +let Uses = [FRM, VL, VTYPE] in { defm VFCVT_F_XU_V : VCVTF_IV_VS2<"vfcvt.f.xu.v", 0b010010, 0b00010>; defm VFCVT_F_X_V : VCVTF_IV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>; } @@ -1497,7 +1497,7 @@ defm VFCVT_F_X_V : VCVTF_IV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>; // Widening Floating-Point/Integer Type-Convert Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt, mayRaiseFPException = true, DestEEW = EEWSEWx2 in { -let Uses = [FRM] in { +let Uses = [FRM, VL, VTYPE] in { defm VFWCVT_XU_F_V : VWCVTI_FV_VS2<"vfwcvt.xu.f.v", 0b010010, 0b01000>; defm VFWCVT_X_F_V : VWCVTI_FV_VS2<"vfwcvt.x.f.v", 0b010010, 0b01001>; } @@ -1510,13 +1510,13 @@ defm VFWCVT_F_F_V : VWCVTF_FV_VS2<"vfwcvt.f.f.v", 0b010010, 0b01100>; // Narrowing Floating-Point/Integer Type-Convert Instructions let Constraints = "@earlyclobber $vd", mayRaiseFPException = true in { -let Uses = [FRM] in { +let Uses = [FRM, VL, VTYPE] in { defm VFNCVT_XU_F_W : VNCVTI_FV_VS2<"vfncvt.xu.f.w", 0b010010, 0b10000>; defm VFNCVT_X_F_W : VNCVTI_FV_VS2<"vfncvt.x.f.w", 0b010010, 0b10001>; } defm VFNCVT_RTZ_XU_F_W : VNCVTI_FV_VS2<"vfncvt.rtz.xu.f.w", 0b010010, 0b10110>; defm VFNCVT_RTZ_X_F_W : VNCVTI_FV_VS2<"vfncvt.rtz.x.f.w", 0b010010, 0b10111>; -let Uses = [FRM] in { +let Uses = [FRM, VL, VTYPE] in { defm VFNCVT_F_XU_W : VNCVTF_IV_VS2<"vfncvt.f.xu.w", 0b010010, 0b10010>; defm VFNCVT_F_X_W : VNCVTF_IV_VS2<"vfncvt.f.x.w", 0b010010, 0b10011>; defm VFNCVT_F_F_W : VNCVTF_FV_VS2<"vfncvt.f.f.w", 0b010010, 0b10100>; @@ -1554,7 +1554,7 @@ defm VWREDSUM : VWRED_IV_V<"vwredsum", 0b110001>; let Predicates = [HasVInstructionsAnyF] in { // Vector Single-Width Floating-Point Reduction Instructions let RVVConstraint = NoConstraint, ElementsDependOn = EltDepsVLMask in { -let Uses = [FRM], mayRaiseFPException = true in { +let Uses = [FRM, VL, VTYPE], mayRaiseFPException = true in { defm VFREDOSUM : VREDO_FV_V<"vfredosum", 0b000011>; defm VFREDUSUM : VRED_FV_V<"vfredusum", 0b000001>; } @@ -1573,7 +1573,7 @@ let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, ElementsDep // This has the downside that the earlyclobber constraint is too coarse and // will impose unnecessary restrictions by not allowing the destination to // overlap with the first (wide) operand. -let Uses = [FRM], mayRaiseFPException = true in { +let Uses = [FRM, VL, VTYPE], mayRaiseFPException = true in { defm VFWREDOSUM : VWREDO_FV_V<"vfwredosum", 0b110011>; defm VFWREDUSUM : VWRED_FV_V<"vfwredusum", 0b110001>; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index 7a79d438fb596..cd13433847c54 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -223,7 +223,7 @@ let Predicates = [HasVendorXSfvfwmaccqqq], DecoderNamespace = "XSfvector", } let Predicates = [HasVendorXSfvfnrclipxfqf], DecoderNamespace = "XSfvector", - Uses = [FRM] in { + Uses = [FRM, VL, VTYPE] in { def VFNRCLIP_XU_F_QF : CustomSiFiveVFNRCLIP<0b100010, OPFVF, "sf.vfnrclip.xu.f.qf">; def VFNRCLIP_X_F_QF : CustomSiFiveVFNRCLIP<0b100011, OPFVF, "sf.vfnrclip.x.f.qf">; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td index 1ee78359bc4a5..e903df4d91933 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td @@ -59,7 +59,7 @@ let Predicates = [HasStdExtZacas], IsSignExtendingOpW = 1 in { defm AMOCAS_W : AMO_cas_aq_rl<0b00101, 0b010, "amocas.w", GPR>; } // Predicates = [HasStdExtZacas] -let Predicates = [HasStdExtZacas, IsRV32], DecoderNamespace = "RV32Zacas" in { +let Predicates = [HasStdExtZacas, IsRV32], DecoderNamespace = "RV32GPRPair" in { defm AMOCAS_D_RV32 : AMO_cas_aq_rl<0b00101, 0b011, "amocas.d", GPRPairRV32>; } // Predicates = [HasStdExtZacas, IsRV32] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td index b5e3e6a3a8bbf..efed74ca8c870 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td @@ -216,7 +216,7 @@ def C_SH_INX : CStoreH_rri<0b100011, 0b0, "c.sh", GPRF16C>, } // Predicates = [HasStdExtZcb] // Zcmp -let DecoderNamespace = "RVZcmp", Predicates = [HasStdExtZcmp], +let DecoderNamespace = "ZcOverlap", Predicates = [HasStdExtZcmp], hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { let Defs = [X10, X11] in def CM_MVA01S : RVInst16CA<0b101011, 0b11, 0b10, (outs), @@ -227,9 +227,9 @@ let Uses = [X10, X11] in def CM_MVSA01 : RVInst16CA<0b101011, 0b01, 0b10, (outs SR07:$rs1, SR07:$rs2), (ins), "cm.mvsa01", "$rs1, $rs2">, Sched<[WriteIALU, WriteIALU, ReadIALU, ReadIALU]>; -} // DecoderNamespace = "RVZcmp", Predicates = [HasStdExtZcmp]... +} // DecoderNamespace = "ZcOverlap", Predicates = [HasStdExtZcmp]... -let DecoderNamespace = "RVZcmp", Predicates = [HasStdExtZcmp] in { +let DecoderNamespace = "ZcOverlap", Predicates = [HasStdExtZcmp] in { let hasSideEffects = 0, mayLoad = 0, mayStore = 1, Uses = [X2], Defs = [X2] in def CM_PUSH : RVInstZcCPPP<0b11000, "cm.push", negstackadj>, Sched<[WriteIALU, ReadIALU, ReadStoreData, ReadStoreData, @@ -258,9 +258,9 @@ def CM_POP : RVInstZcCPPP<0b11010, "cm.pop">, Sched<[WriteIALU, WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW, ReadIALU]>; -} // DecoderNamespace = "RVZcmp", Predicates = [HasStdExtZcmp]... +} // DecoderNamespace = "ZcOverlap", Predicates = [HasStdExtZcmp]... -let DecoderNamespace = "RVZcmt", Predicates = [HasStdExtZcmt], +let DecoderNamespace = "ZcOverlap", Predicates = [HasStdExtZcmt], hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { def CM_JT : RVInst16CJ<0b101, 0b10, (outs), (ins uimm5:$index), "cm.jt", "$index">{ @@ -278,7 +278,7 @@ def CM_JALT : RVInst16CJ<0b101, 0b10, (outs), (ins uimm8ge32:$index), let Inst{12-10} = 0b000; let Inst{9-2} = index; } -} // DecoderNamespace = "RVZcmt", Predicates = [HasStdExtZcmt]... +} // DecoderNamespace = "ZcOverlap", Predicates = [HasStdExtZcmt]... let Predicates = [HasStdExtZcb, HasStdExtZmmul] in{ diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index 625011c3b9f7c..ea0b814ac7ba5 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -52,22 +52,22 @@ def ZfhDExt : ExtInfo<"", "", [HasStdExtZfh, HasStdExtD], def ZfhminDExt : ExtInfo<"", "", [HasStdExtZfhmin, HasStdExtD], ?, ?, FPR32, FPR64, FPR16>; -def ZhinxExt : ExtInfo<"_INX", "RVZfinx", +def ZhinxExt : ExtInfo<"_INX", "Zfinx", [HasStdExtZhinx], f16, FPR16INX, FPR32INX, ?, FPR16INX>; -def ZhinxminExt : ExtInfo<"_INX", "RVZfinx", +def ZhinxminExt : ExtInfo<"_INX", "Zfinx", [HasStdExtZhinxmin], f16, FPR16INX, FPR32INX, ?, FPR16INX>; -def ZhinxZdinxExt : ExtInfo<"_INX", "RVZfinx", +def ZhinxZdinxExt : ExtInfo<"_INX", "Zfinx", [HasStdExtZhinx, HasStdExtZdinx, IsRV64], ?, ?, FPR32INX, FPR64INX, FPR16INX>; -def ZhinxminZdinxExt : ExtInfo<"_INX", "RVZfinx", +def ZhinxminZdinxExt : ExtInfo<"_INX", "Zfinx", [HasStdExtZhinxmin, HasStdExtZdinx, IsRV64], ?, ?, FPR32INX, FPR64INX, FPR16INX>; -def ZhinxZdinx32Ext : ExtInfo<"_IN32X", "RV32Zdinx", +def ZhinxZdinx32Ext : ExtInfo<"_IN32X", "ZdinxGPRPairRV32", [HasStdExtZhinx, HasStdExtZdinx, IsRV32], ?, ?, FPR32INX, FPR64IN32X, FPR16INX >; -def ZhinxminZdinx32Ext : ExtInfo<"_IN32X", "RV32Zdinx", +def ZhinxminZdinx32Ext : ExtInfo<"_IN32X", "ZdinxGPRPairRV32", [HasStdExtZhinxmin, HasStdExtZdinx, IsRV32], ?, ?, FPR32INX, FPR64IN32X, FPR16INX>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td index a79f757753325..cbeec9ba75f16 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td @@ -21,13 +21,13 @@ let Predicates = [HasStdExtZvfbfmin], Constraints = "@earlyclobber $vd", mayRaiseFPException = true in { let RVVConstraint = WidenCvt, DestEEW = EEWSEWx2 in defm VFWCVTBF16_F_F_V : VWCVTF_FV_VS2<"vfwcvtbf16.f.f.v", 0b010010, 0b01101>; -let Uses = [FRM] in +let Uses = [FRM, VL, VTYPE] in defm VFNCVTBF16_F_F_W : VNCVTF_FV_VS2<"vfncvtbf16.f.f.w", 0b010010, 0b11101>; } let Predicates = [HasStdExtZvfbfwma], Constraints = "@earlyclobber $vd_wb, $vd = $vd_wb", - RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true, + RVVConstraint = WidenV, Uses = [FRM, VL, VTYPE], mayRaiseFPException = true, DestEEW = EEWSEWx2 in { defm VFWMACCBF16_V : VWMAC_FV_V_F<"vfwmaccbf16", 0b111011>; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index d19023b19ccdd..6005c067428eb 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -475,6 +475,64 @@ costShuffleViaVRegSplitting(RISCVTTIImpl &TTI, MVT LegalVT, return InstructionCost::getInvalid(); } +InstructionCost RISCVTTIImpl::getSlideCost(FixedVectorType *Tp, + ArrayRef Mask, + TTI::TargetCostKind CostKind) { + // Avoid missing masks and length changing shuffles + if (Mask.size() <= 2 || Mask.size() != Tp->getNumElements()) + return InstructionCost::getInvalid(); + + int NumElts = Tp->getNumElements(); + std::pair LT = getTypeLegalizationCost(Tp); + // Avoid scalarization cases + if (!LT.second.isFixedLengthVector()) + return InstructionCost::getInvalid(); + + // Requires moving elements between parts, which requires additional + // unmodeled instructions. + if (LT.first != 1) + return InstructionCost::getInvalid(); + + auto GetSlideOpcode = [&](int SlideAmt) { + assert(SlideAmt != 0); + bool IsVI = isUInt<5>(std::abs(SlideAmt)); + if (SlideAmt < 0) + return IsVI ? RISCV::VSLIDEDOWN_VI : RISCV::VSLIDEDOWN_VX; + return IsVI ? RISCV::VSLIDEUP_VI : RISCV::VSLIDEUP_VX; + }; + + std::array, 2> SrcInfo; + if (!isMaskedSlidePair(Mask, NumElts, SrcInfo)) + return InstructionCost::getInvalid(); + + if (SrcInfo[1].second == 0) + std::swap(SrcInfo[0], SrcInfo[1]); + + InstructionCost FirstSlideCost = 0; + if (SrcInfo[0].second != 0) { + unsigned Opcode = GetSlideOpcode(SrcInfo[0].second); + FirstSlideCost = getRISCVInstructionCost(Opcode, LT.second, CostKind); + } + + if (SrcInfo[1].first == -1) + return FirstSlideCost; + + InstructionCost SecondSlideCost = 0; + if (SrcInfo[1].second != 0) { + unsigned Opcode = GetSlideOpcode(SrcInfo[1].second); + SecondSlideCost = getRISCVInstructionCost(Opcode, LT.second, CostKind); + } else { + SecondSlideCost = + getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second, CostKind); + } + + auto EC = Tp->getElementCount(); + VectorType *MaskTy = + VectorType::get(IntegerType::getInt1Ty(Tp->getContext()), EC); + InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind); + return FirstSlideCost + SecondSlideCost + MaskCost; +} + InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, TTI::TargetCostKind CostKind, @@ -482,14 +540,13 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, ArrayRef Args, const Instruction *CxtI) { Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp); - std::pair LT = getTypeLegalizationCost(Tp); // First, handle cases where having a fixed length vector enables us to // give a more accurate cost than falling back to generic scalable codegen. // TODO: Each of these cases hints at a modeling gap around scalable vectors. - if (ST->hasVInstructions() && isa(Tp) && - LT.second.isFixedLengthVector()) { + if (auto *FVTp = dyn_cast(Tp); + FVTp && ST->hasVInstructions() && LT.second.isFixedLengthVector()) { InstructionCost VRegSplittingCost = costShuffleViaVRegSplitting( *this, LT.second, ST->getRealVLen(), Tp, Mask, CostKind); if (VRegSplittingCost.isValid()) @@ -545,6 +602,11 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, return Cost; } } + + if (InstructionCost SlideCost = getSlideCost(FVTp, Mask, CostKind); + SlideCost.isValid()) + return SlideCost; + // vrgather + cost of generating the mask constant. // We model this for an unknown mask with a single vrgather. if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 || @@ -559,6 +621,11 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, } case TTI::SK_Transpose: case TTI::SK_PermuteTwoSrc: { + + if (InstructionCost SlideCost = getSlideCost(FVTp, Mask, CostKind); + SlideCost.isValid()) + return SlideCost; + // 2 x (vrgather + cost of generating the mask constant) + cost of mask // register for the second vrgather. We model this for an unknown // (shuffle) mask. @@ -890,11 +957,12 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost( if (Opcode == Instruction::Load) { InstructionCost Cost = MemCost; for (unsigned Index : Indices) { - FixedVectorType *SubVecTy = + FixedVectorType *VecTy = FixedVectorType::get(FVTy->getElementType(), VF * Factor); auto Mask = createStrideMask(Index, Factor, VF); + Mask.resize(VF * Factor, -1); InstructionCost ShuffleCost = - getShuffleCost(TTI::ShuffleKind::SK_PermuteSingleSrc, SubVecTy, Mask, + getShuffleCost(TTI::ShuffleKind::SK_PermuteSingleSrc, VecTy, Mask, CostKind, 0, nullptr, {}); Cost += ShuffleCost; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 134a7333b9b06..3f57560d3c127 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -63,6 +63,12 @@ class RISCVTTIImpl : public BasicTTIImplBase { /// type. InstructionCost getConstantPoolLoadCost(Type *Ty, TTI::TargetCostKind CostKind); + + /// If this shuffle can be lowered as a masked slide pair (at worst), + /// return a cost for it. + InstructionCost getSlideCost(FixedVectorType *Tp, ArrayRef Mask, + TTI::TargetCostKind CostKind); + public: explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F) : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index b2c12411ab782..c013e122a85dc 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -319,7 +319,6 @@ void SPIRVModuleAnalysis::visitDecl( std::map &GlobalToGReg, const MachineFunction *MF, const MachineInstr &MI) { unsigned Opcode = MI.getOpcode(); - DenseSet Deps; // Process each operand of the instruction to resolve dependencies for (const MachineOperand &MO : MI.operands()) { diff --git a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp index cf3073f0f2090..4b16bcf95d51c 100644 --- a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp +++ b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp @@ -101,8 +101,10 @@ void SystemZPostRewrite::selectSELRMux(MachineBasicBlock &MBB, unsigned LowOpcode, unsigned HighOpcode) { Register DestReg = MBBI->getOperand(0).getReg(); - Register Src1Reg = MBBI->getOperand(1).getReg(); - Register Src2Reg = MBBI->getOperand(2).getReg(); + MachineOperand &Src1MO = MBBI->getOperand(1); + MachineOperand &Src2MO = MBBI->getOperand(2); + Register Src1Reg = Src1MO.getReg(); + Register Src2Reg = Src2MO.getReg(); bool DestIsHigh = SystemZ::isHighReg(DestReg); bool Src1IsHigh = SystemZ::isHighReg(Src1Reg); bool Src2IsHigh = SystemZ::isHighReg(Src2Reg); @@ -114,7 +116,7 @@ void SystemZPostRewrite::selectSELRMux(MachineBasicBlock &MBB, if (Src1Reg == Src2Reg) { BuildMI(*MBBI->getParent(), MBBI, MBBI->getDebugLoc(), TII->get(SystemZ::COPY), DestReg) - .addReg(MBBI->getOperand(1).getReg(), getRegState(MBBI->getOperand(1))); + .addReg(Src1Reg, getRegState(Src1MO) & getRegState(Src2MO)); MBBI->eraseFromParent(); return; } @@ -126,15 +128,15 @@ void SystemZPostRewrite::selectSELRMux(MachineBasicBlock &MBB, if (DestIsHigh != Src1IsHigh) { BuildMI(*MBBI->getParent(), MBBI, MBBI->getDebugLoc(), TII->get(SystemZ::COPY), DestReg) - .addReg(MBBI->getOperand(1).getReg(), getRegState(MBBI->getOperand(1))); - MBBI->getOperand(1).setReg(DestReg); + .addReg(Src1Reg, getRegState(Src1MO)); + Src1MO.setReg(DestReg); Src1Reg = DestReg; Src1IsHigh = DestIsHigh; } else if (DestIsHigh != Src2IsHigh) { BuildMI(*MBBI->getParent(), MBBI, MBBI->getDebugLoc(), TII->get(SystemZ::COPY), DestReg) - .addReg(MBBI->getOperand(2).getReg(), getRegState(MBBI->getOperand(2))); - MBBI->getOperand(2).setReg(DestReg); + .addReg(Src2Reg, getRegState(Src2MO)); + Src2MO.setReg(DestReg); Src2Reg = DestReg; Src2IsHigh = DestIsHigh; } diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 2b94832939419..06a0a3a631654 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -887,7 +887,8 @@ InstructionCost SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, unsigned SrcScalarBits = Src->getScalarSizeInBits(); if (!Src->isVectorTy()) { - assert (!Dst->isVectorTy()); + if (Dst->isVectorTy()) + return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) { if (Src->isIntegerTy(128)) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index a1e1a51b201b0..48725a5505514 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -1478,7 +1478,8 @@ bool Attributor::getAssumedSimplifiedValues( // AAPotentialValues. const auto *PotentialValuesAA = getOrCreateAAFor(IRP, AA, DepClassTy::OPTIONAL); - if (PotentialValuesAA && PotentialValuesAA->getAssumedSimplifiedValues(*this, Values, S)) { + if (PotentialValuesAA && + PotentialValuesAA->getAssumedSimplifiedValues(*this, Values, S)) { UsedAssumedInformation |= !PotentialValuesAA->isAtFixpoint(); } else if (IRP.getPositionKind() != IRPosition::IRP_RETURNED) { Values.push_back({IRP.getAssociatedValue(), IRP.getCtxI()}); diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index c1dd8bc393f33..60aa758917b0b 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -129,10 +129,7 @@ STATISTIC(NumIndirectCallsPromoted, "Number of indirect calls promoted"); STATS_DECL_(BUILD_STAT_NAME(NAME, TYPE), MSG); #define STATS_TRACK(NAME, TYPE) ++(BUILD_STAT_NAME(NAME, TYPE)); #define STATS_DECLTRACK(NAME, TYPE, MSG) \ - { \ - STATS_DECL(NAME, TYPE, MSG) \ - STATS_TRACK(NAME, TYPE) \ - } + {STATS_DECL(NAME, TYPE, MSG) STATS_TRACK(NAME, TYPE)} #define STATS_DECLTRACK_ARG_ATTR(NAME) \ STATS_DECLTRACK(NAME, Arguments, BUILD_STAT_MSG_IR_ATTR(arguments, NAME)) #define STATS_DECLTRACK_CSARG_ATTR(NAME) \ @@ -2420,7 +2417,7 @@ struct AANoFreeCallSiteArgument final : AANoFreeFloating { } /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nofree)}; + void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(nofree) }; }; /// NoFree attribute for function return value. @@ -6078,7 +6075,9 @@ struct AANoCaptureCallSiteArgument final : AANoCaptureImpl { } /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nocapture)}; + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(nocapture) + }; }; /// NoCapture attribute for floating values. diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index f6e211c302230..5c17b9e8d386d 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -1249,7 +1249,7 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes, // Functions that are readonly (or readnone) and nounwind and don't return // a value can't capture arguments. Don't analyze them. - if (F->onlyReadsMemory() && F->doesNotThrow() && + if (F->onlyReadsMemory() && F->doesNotThrow() && F->willReturn() && F->getReturnType()->isVoidTy()) { for (Argument &A : F->args()) { if (A.getType()->isPointerTy() && !A.hasNoCaptureAttr()) { diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 8d8d56035a48f..85edcb1276efe 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -797,6 +797,7 @@ struct AddressSanitizer { bool IsWrite, size_t AccessSizeIndex, Value *SizeArgument, uint32_t Exp, RuntimeCallInserter &RTCI); + bool maybeIgnoreMemIntrinsic(MemIntrinsic *MI, const Triple &TargetTriple); void instrumentMemIntrinsic(MemIntrinsic *MI, RuntimeCallInserter &RTCI); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool suppressInstrumentationSiteForDebug(int &Instrumented); @@ -1340,10 +1341,21 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { return IRB.CreateAdd(Shadow, ShadowBase); } +bool AddressSanitizer::maybeIgnoreMemIntrinsic(MemIntrinsic *MI, + const Triple &TargetTriple) { + // Ignore FS and GS registers to prevent miscompilation + if (MI->getDestAddressSpace() >= 256 && + TargetTriple.getArch() == Triple::x86_64) + return true; + return false; +} + // Instrument memset/memmove/memcpy void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI, RuntimeCallInserter &RTCI) { InstrumentationIRBuilder IRB(MI); + if (maybeIgnoreMemIntrinsic(MI, TargetTriple)) + return; if (isa(MI)) { RTCI.createRuntimeCall( IRB, isa(MI) ? AsanMemmove : AsanMemcpy, diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 7b221a814aabd..9cae65bbdcfbc 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -307,12 +307,11 @@ bool JumpThreadingPass::runImpl(Function &F_, FunctionAnalysisManager *FAM_, else BBDupThreshold = DefaultBBDupThreshold; - // JumpThreading must not processes blocks unreachable from entry. It's a - // waste of compute time and can potentially lead to hangs. - SmallPtrSet Unreachable; assert(DTU && "DTU isn't passed into JumpThreading before using it."); assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed."); DominatorTree &DT = DTU->getDomTree(); + + Unreachable.clear(); for (auto &BB : *F) if (!DT.isReachableFromEntry(&BB)) Unreachable.insert(&BB); @@ -1895,6 +1894,11 @@ bool JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) { SinglePred == BB || hasAddressTakenAndUsed(BB)) return false; + // MergeBasicBlockIntoOnlyPred may delete SinglePred, we need to avoid + // deleting a BB pointer from Unreachable. + if (Unreachable.count(SinglePred)) + return false; + // If SinglePred was a loop header, BB becomes one. if (LoopHeaders.erase(SinglePred)) LoopHeaders.insert(BB); diff --git a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp index 5ba626fa213ad..0155a7ba2570b 100644 --- a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp +++ b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp @@ -270,7 +270,7 @@ static void reconnectPhis(BasicBlock *Out, BasicBlock *GuardBlock, } } -BasicBlock *ControlFlowHub::finalize( +std::pair ControlFlowHub::finalize( DomTreeUpdater *DTU, SmallVectorImpl &GuardBlocks, const StringRef Prefix, std::optional MaxControlFlowBooleans) { #ifndef NDEBUG @@ -289,7 +289,7 @@ BasicBlock *ControlFlowHub::finalize( } if (Outgoing.size() < 2) - return Outgoing.front(); + return {Outgoing.front(), false}; SmallVector Updates; if (DTU) { @@ -338,5 +338,5 @@ BasicBlock *ControlFlowHub::finalize( Inst->eraseFromParent(); } - return FirstGuardBlock; + return {FirstGuardBlock, true}; } diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp index 856f3c3ed3e13..9f338dbc78cff 100644 --- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp +++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp @@ -169,8 +169,12 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { SmallVector GuardBlocks; DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); - BasicBlock *LoopExitBlock = CHub.finalize( + BasicBlock *LoopExitBlock; + bool ChangedCFG; + std::tie(LoopExitBlock, ChangedCFG) = CHub.finalize( &DTU, GuardBlocks, "loop.exit", MaxBooleansInControlFlowHub.getValue()); + if (!ChangedCFG) + return false; restoreSSA(DT, L, ExitingBlocks, LoopExitBlock); diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp index 5837cc16fcbac..bffb9f187e882 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp @@ -8,9 +8,11 @@ #include "llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Module.h" #include "llvm/SandboxIR/Constant.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.h" +#include using namespace llvm; @@ -29,6 +31,22 @@ static cl::opt UserDefinedPassPipeline( cl::desc("Comma-separated list of vectorizer passes. If not set " "we run the predefined pipeline.")); +// This option is useful for bisection debugging. +// For example you may use it to figure out which filename is the one causing a +// miscompile. You can specify a regex for the filename like: "/[a-m][^/]*" +// which will enable any file name starting with 'a' to 'm' and disable the +// rest. If the miscompile goes away, then we try "/[n-z][^/]*" for the other +// half of the range, from 'n' to 'z'. If we can reproduce the miscompile then +// we can keep looking in [n-r] and [s-z] and so on, in a binary-search fashion. +// +// Please note that we are using [^/]* and not .* to make sure that we are +// matching the actual filename and not some other directory in the path. +cl::opt AllowFiles( + "sbvec-allow-files", cl::init(".*"), cl::Hidden, + cl::desc("Run the vectorizer only on file paths that match any in the " + "list of comma-separated regex's.")); +static constexpr const char AllowFilesDelim = ','; + SandboxVectorizerPass::SandboxVectorizerPass() : FPM("fpm") { if (UserDefinedPassPipeline == DefaultPipelineMagicStr) { // TODO: Add passes to the default pipeline. It currently contains: @@ -66,6 +84,23 @@ PreservedAnalyses SandboxVectorizerPass::run(Function &F, return PA; } +bool SandboxVectorizerPass::allowFile(const std::string &SrcFilePath) { + // Iterate over all files in AllowFiles separated by `AllowFilesDelim`. + size_t DelimPos = 0; + do { + size_t LastPos = DelimPos != 0 ? DelimPos + 1 : DelimPos; + DelimPos = AllowFiles.find(AllowFilesDelim, LastPos); + auto FileNameToMatch = AllowFiles.substr(LastPos, DelimPos - LastPos); + if (FileNameToMatch.empty()) + return false; + // Note: This only runs when debugging so its OK not to reuse the regex. + std::regex FileNameRegex(std::string(".*") + FileNameToMatch); + if (std::regex_match(SrcFilePath, FileNameRegex)) + return true; + } while (DelimPos != std::string::npos); + return false; +} + bool SandboxVectorizerPass::runImpl(Function &LLVMF) { if (Ctx == nullptr) Ctx = std::make_unique(LLVMF.getContext()); @@ -75,6 +110,13 @@ bool SandboxVectorizerPass::runImpl(Function &LLVMF) { return false; } + // This is used for debugging. + if (LLVM_UNLIKELY(AllowFiles != ".*")) { + const auto &SrcFilePath = LLVMF.getParent()->getSourceFileName(); + if (!allowFile(SrcFilePath)) + return false; + } + // If the target claims to have no vector registers early return. if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true))) { LLVM_DEBUG(dbgs() << "SBVec: Target has no vector registers, return.\n"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index a058b2a121d59..0a59b137bbd79 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -175,12 +175,7 @@ class VPValue { /// Returns the underlying IR value, if this VPValue is defined outside the /// scope of VPlan. Returns nullptr if the VPValue is defined by a VPDef /// inside a VPlan. - Value *getLiveInIRValue() { - assert(isLiveIn() && - "VPValue is not a live-in; it is defined by a VPDef inside a VPlan"); - return getUnderlyingValue(); - } - const Value *getLiveInIRValue() const { + Value *getLiveInIRValue() const { assert(isLiveIn() && "VPValue is not a live-in; it is defined by a VPDef inside a VPlan"); return getUnderlyingValue(); diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll index c951184a31731..06c709e4cc879 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll @@ -186,7 +186,7 @@ define void @insert_subvec() vscale_range(2,2) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_1 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_2 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_3 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_4_05 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i32_4_05 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'insert_subvec' @@ -225,7 +225,7 @@ define void @insert_subvec() vscale_range(2,2) { ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_1 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_2 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_3 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_4_05 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i32_4_05 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v4i8_2_0 = shufflevector <4 x i8> poison, <4 x i8> poison, <4 x i32> @@ -737,8 +737,8 @@ define void @multipart() vscale_range(2,2) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16c = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16d = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32a = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32a4 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32a = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v32a4 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32idrev = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v32many = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v32many2 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> @@ -757,8 +757,8 @@ define void @multipart() vscale_range(2,2) { ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16c = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16d = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32a = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32a4 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32a = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v32a4 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32idrev = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32many = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32many2 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll index e8dd30345cc76..d2bfb61a11b00 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll @@ -19,7 +19,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; VLEN128-LABEL: 'test_vXf64' diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll index 8f784a07d3124..ef069fee8526e 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll @@ -10,11 +10,11 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) { ; CHECK-LABEL: 'trn1.v8i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %tmp0 ; ; SIZE-LABEL: 'trn1.v8i8' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %tmp0 ; %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> @@ -23,11 +23,11 @@ define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) { define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) { ; CHECK-LABEL: 'trn2.v8i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %tmp0 ; ; SIZE-LABEL: 'trn2.v8i8' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %tmp0 ; %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> @@ -36,11 +36,11 @@ define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) { define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: 'trn1.v16i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %tmp0 ; ; SIZE-LABEL: 'trn1.v16i8' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %tmp0 ; %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> @@ -49,11 +49,11 @@ define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: 'trn2.v16i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %tmp0 ; ; SIZE-LABEL: 'trn2.v16i8' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %tmp0 ; %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> @@ -62,11 +62,11 @@ define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) { define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ; CHECK-LABEL: 'trn1.v4i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %tmp0 ; ; SIZE-LABEL: 'trn1.v4i16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %tmp0 ; %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> @@ -75,11 +75,11 @@ define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) { define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ; CHECK-LABEL: 'trn2.v4i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %tmp0 ; ; SIZE-LABEL: 'trn2.v4i16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %tmp0 ; %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> @@ -88,11 +88,11 @@ define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) { define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: 'trn1.v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %tmp0 ; ; SIZE-LABEL: 'trn1.v8i16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %tmp0 ; %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> @@ -101,11 +101,11 @@ define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) { define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: 'trn2.v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %tmp0 ; ; SIZE-LABEL: 'trn2.v8i16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %tmp0 ; %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> @@ -140,11 +140,11 @@ define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) { define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: 'trn1.v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %tmp0 ; ; SIZE-LABEL: 'trn1.v4i32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %tmp0 ; %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> @@ -153,11 +153,11 @@ define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: 'trn2.v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %tmp0 ; ; SIZE-LABEL: 'trn2.v4i32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %tmp0 ; %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> @@ -218,11 +218,11 @@ define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) { define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: 'trn1.v4f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %tmp0 ; ; SIZE-LABEL: 'trn1.v4f32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %tmp0 ; %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> @@ -231,11 +231,11 @@ define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) { define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: 'trn2.v4f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %tmp0 ; ; SIZE-LABEL: 'trn2.v4f32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %tmp0 ; %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> @@ -270,11 +270,11 @@ define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) { define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) { ; CHECK-LABEL: 'trn1.v4f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x half> %tmp0 ; ; SIZE-LABEL: 'trn1.v4f16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x half> %tmp0 ; %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> @@ -283,11 +283,11 @@ define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) { define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) { ; CHECK-LABEL: 'trn2.v4f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x half> %tmp0 ; ; SIZE-LABEL: 'trn2.v4f16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x half> %tmp0 ; %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> @@ -296,11 +296,11 @@ define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) { define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: 'trn1.v8f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x half> %tmp0 ; ; SIZE-LABEL: 'trn1.v8f16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x half> %tmp0 ; %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> @@ -309,11 +309,11 @@ define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) { define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: 'trn2.v8f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x half> %tmp0 ; ; SIZE-LABEL: 'trn2.v8f16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x half> %tmp0 ; %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> diff --git a/llvm/test/Analysis/CostModel/SystemZ/bitcast.ll b/llvm/test/Analysis/CostModel/SystemZ/bitcast.ll new file mode 100644 index 0000000000000..7927588623c52 --- /dev/null +++ b/llvm/test/Analysis/CostModel/SystemZ/bitcast.ll @@ -0,0 +1,36 @@ +; RUN: opt < %s -mtriple=systemz-unknown -mcpu=z15 -passes="print" \ +; RUN: -disable-output 2>&1 | FileCheck %s + +; Check bitcast from scalar to vector. + +@Glob = dso_local local_unnamed_addr global i32 0, align 4 + +define dso_local void @fun() { +entry: + %d.sroa.0 = alloca i64, align 8 + store i64 0, ptr %d.sroa.0, align 8 + store i32 2, ptr @Glob, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.cond1, %entry + %L = load i64, ptr %d.sroa.0, align 8 + %A0 = and i64 %L, 4294967295 + store i64 %A0, ptr %d.sroa.0, align 8 + %BC = bitcast i64 %A0 to <2 x i32> + %0 = and <2 x i32> %BC, splat (i32 10) + store <2 x i32> %0, ptr %d.sroa.0, align 8 + br label %for.cond1 + +; CHECK: Printing analysis 'Cost Model Analysis' for function 'fun': +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d.sroa.0 = alloca i64, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 0, ptr %d.sroa.0, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 2, ptr @Glob, align 4 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %for.cond1 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %L = load i64, ptr %d.sroa.0, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A0 = and i64 %L, 4294967295 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %A0, ptr %d.sroa.0, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BC = bitcast i64 %A0 to <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %0 = and <2 x i32> %BC, splat (i32 10) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %0, ptr %d.sroa.0, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %for.cond1 +} diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll index e5443038cb4c3..9c5f0640b1677 100644 --- a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll +++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll @@ -10,13 +10,13 @@ ; code path which isn't ever executed. ; CHECK: define void @test0_yes(ptr captures(none) %p) #0 { -define void @test0_yes(ptr %p) nounwind { +define void @test0_yes(ptr %p) nounwind willreturn { store i32 0, ptr %p, !tbaa !1 ret void } ; CHECK: define void @test0_no(ptr writeonly captures(none) initializes((0, 4)) %p) #1 { -define void @test0_no(ptr %p) nounwind { +define void @test0_no(ptr %p) nounwind willreturn { store i32 0, ptr %p, !tbaa !2 ret void } @@ -25,13 +25,13 @@ define void @test0_no(ptr %p) nounwind { ; TBAA says only accesses constant memory. ; CHECK: define void @test1_yes(ptr captures(none) %p) #2 { -define void @test1_yes(ptr %p) nounwind { +define void @test1_yes(ptr %p) nounwind willreturn { call void @callee(ptr %p), !tbaa !1 ret void } ; CHECK: define void @test1_no(ptr %p) #3 { -define void @test1_no(ptr %p) nounwind { +define void @test1_no(ptr %p) nounwind willreturn { call void @callee(ptr %p), !tbaa !2 ret void } @@ -44,13 +44,13 @@ define void @test1_no(ptr %p) nounwind { ; isn't necessarily invalid. ; CHECK: define void @test2_yes(ptr captures(none) %p, ptr captures(none) %q, i64 %n) #0 { -define void @test2_yes(ptr %p, ptr %q, i64 %n) nounwind { +define void @test2_yes(ptr %p, ptr %q, i64 %n) nounwind willreturn { call void @llvm.memcpy.p0.p0.i64(ptr %p, ptr %q, i64 %n, i1 false), !tbaa !1 ret void } ; CHECK: define void @test2_no(ptr writeonly captures(none) %p, ptr readonly captures(none) %q, i64 %n) #4 { -define void @test2_no(ptr %p, ptr %q, i64 %n) nounwind { +define void @test2_no(ptr %p, ptr %q, i64 %n) nounwind willreturn { call void @llvm.memcpy.p0.p0.i64(ptr %p, ptr %q, i64 %n, i1 false), !tbaa !2 ret void } @@ -58,26 +58,27 @@ define void @test2_no(ptr %p, ptr %q, i64 %n) nounwind { ; Similar to the others, va_arg only accesses memory through its operand. ; CHECK: define i32 @test3_yes(ptr captures(none) %p) #0 { -define i32 @test3_yes(ptr %p) nounwind { +define i32 @test3_yes(ptr %p) nounwind willreturn { %t = va_arg ptr %p, i32, !tbaa !1 ret i32 %t } ; CHECK: define i32 @test3_no(ptr captures(none) %p) #4 { -define i32 @test3_no(ptr %p) nounwind { +define i32 @test3_no(ptr %p) nounwind willreturn { %t = va_arg ptr %p, i32, !tbaa !2 ret i32 %t } -declare void @callee(ptr %p) nounwind -declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) nounwind +declare void @callee(ptr %p) nounwind willreturn +declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) nounwind willreturn ; CHECK: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } ; CHECK: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) } -; CHECK: attributes #2 = { nofree nosync nounwind memory(none) } -; CHECK: attributes #3 = { nounwind } +; CHECK: attributes #2 = { mustprogress nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #3 = { mustprogress nounwind willreturn } ; CHECK: attributes #4 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } -; CHECK: attributes #5 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #5 = { nounwind willreturn } +; CHECK: attributes #6 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ; Root note. !0 = !{ } diff --git a/llvm/test/CodeGen/AArch64/sve-select.ll b/llvm/test/CodeGen/AArch64/sve-select.ll index b1270165556e6..e0ea173cd28e4 100644 --- a/llvm/test/CodeGen/AArch64/sve-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-select.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s -define @select_nxv1i8(i1 %cond, %a, %b) { +define @select_nxv1i8(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv1i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -9,11 +9,11 @@ define @select_nxv1i8(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv16i8(i1 %cond, %a, %b) { +define @select_nxv16i8(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -21,11 +21,11 @@ define @select_nxv16i8(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv1i16(i1 %cond, %a, %b) { +define @select_nxv1i16(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv1i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -33,11 +33,11 @@ define @select_nxv1i16(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv8i16(i1 %cond, %a, %b) { +define @select_nxv8i16(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -45,11 +45,11 @@ define @select_nxv8i16(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv1i32(i1 %cond, %a, %b) { +define @select_nxv1i32(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv1i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -57,11 +57,11 @@ define @select_nxv1i32(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv4i32(i1 %cond, %a, %b) { +define @select_nxv4i32(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -69,11 +69,11 @@ define @select_nxv4i32(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv1i64(i1 %cond, %a, %b) { +define @select_nxv1i64(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv1i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -81,11 +81,11 @@ define @select_nxv1i64(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv2i64(i1 %cond, %a, %b) { +define @select_nxv2i64(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -93,11 +93,11 @@ define @select_nxv2i64(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv8f16(i1 %cond, %a, %b) { +define @select_nxv8f16(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -105,11 +105,35 @@ define @select_nxv8f16(i1 %cond, %a, < ; CHECK-NEXT: whilelo p0.h, xzr, x8 ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: ret - %res = select i1 %cond, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv4f32(i1 %cond, %a, %b) { +define @select_nxv4f16(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfx x8, x0, #0, #1 +; CHECK-NEXT: whilelo p0.s, xzr, x8 +; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv2f16(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfx x8, x0, #0, #1 +; CHECK-NEXT: whilelo p0.d, xzr, x8 +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv4f32(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -117,11 +141,23 @@ define @select_nxv4f32(i1 %cond, %a, ; CHECK-NEXT: whilelo p0.s, xzr, x8 ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: ret - %res = select i1 %cond, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv2f32(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfx x8, x0, #0, #1 +; CHECK-NEXT: whilelo p0.d, xzr, x8 +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv2f64(i1 %cond, %a, %b) { +define @select_nxv2f64(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -129,11 +165,47 @@ define @select_nxv2f64(i1 %cond, % ; CHECK-NEXT: whilelo p0.d, xzr, x8 ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: ret - %res = select i1 %cond, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv8bf16(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfx x8, x0, #0, #1 +; CHECK-NEXT: whilelo p0.h, xzr, x8 +; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv16i1(i1 %cond, %a, %b) { +define @select_nxv4bf16(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfx x8, x0, #0, #1 +; CHECK-NEXT: whilelo p0.s, xzr, x8 +; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv2bf16(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfx x8, x0, #0, #1 +; CHECK-NEXT: whilelo p0.d, xzr, x8 +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv16i1(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -141,11 +213,11 @@ define @select_nxv16i1(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv8i1(i1 %cond, %a, %b) { +define @select_nxv8i1(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -153,11 +225,11 @@ define @select_nxv8i1(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv4i1(i1 %cond, %a, %b) { +define @select_nxv4i1(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -165,11 +237,11 @@ define @select_nxv4i1(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv2i1(i1 %cond, %a, %b) { +define @select_nxv2i1(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -177,11 +249,11 @@ define @select_nxv2i1(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv1i1(i1 %cond, %a, %b) { +define @select_nxv1i1(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv1i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -190,8 +262,8 @@ define @select_nxv1i1(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } ; Integer vector select @@ -324,6 +396,20 @@ define @icmp_select_nxv2f64( %a, %sel } +define @icmp_select_nxv2bf16( %a, %b, i64 %x0) { +; CHECK-LABEL: icmp_select_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, #0 +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: sbfx x8, x8, #0, #1 +; CHECK-NEXT: whilelo p0.d, xzr, x8 +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d +; CHECK-NEXT: ret + %mask = icmp eq i64 %x0, 0 + %sel = select i1 %mask, %a, %b + ret %sel +} + define @icmp_select_nxv4f16( %a, %b, i64 %x0) { ; CHECK-LABEL: icmp_select_nxv4f16: ; CHECK: // %bb.0: @@ -352,6 +438,20 @@ define @icmp_select_nxv4f32( %a, %sel } +define @icmp_select_nxv4bf16( %a, %b, i64 %x0) { +; CHECK-LABEL: icmp_select_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, #0 +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: sbfx x8, x8, #0, #1 +; CHECK-NEXT: whilelo p0.s, xzr, x8 +; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s +; CHECK-NEXT: ret + %mask = icmp eq i64 %x0, 0 + %sel = select i1 %mask, %a, %b + ret %sel +} + define @icmp_select_nxv8f16( %a, %b, i64 %x0) { ; CHECK-LABEL: icmp_select_nxv8f16: ; CHECK: // %bb.0: @@ -366,6 +466,20 @@ define @icmp_select_nxv8f16( %a, %sel } +define @icmp_select_nxv8bf16( %a, %b, i64 %x0) { +; CHECK-LABEL: icmp_select_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, #0 +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: sbfx x8, x8, #0, #1 +; CHECK-NEXT: whilelo p0.h, xzr, x8 +; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h +; CHECK-NEXT: ret + %mask = icmp eq i64 %x0, 0 + %sel = select i1 %mask, %a, %b + ret %sel +} + define @icmp_select_nxv1i64( %a, %b, i64 %x0) { ; CHECK-LABEL: icmp_select_nxv1i64: ; CHECK: // %bb.0: @@ -488,9 +602,9 @@ define @icmp_select_nxv1i1( %a, %a, %b - ret %sel + %mask = icmp eq i64 %x0, 0 + %sel = select i1 %mask, %a, %b + ret %sel } define @icmp_select_nxv2i1( %a, %b, i64 %x0) { @@ -502,9 +616,9 @@ define @icmp_select_nxv2i1( %a, %a, %b - ret %sel + %mask = icmp eq i64 %x0, 0 + %sel = select i1 %mask, %a, %b + ret %sel } define @icmp_select_nxv4i1( %a, %b, i64 %x0) { ; CHECK-LABEL: icmp_select_nxv4i1: @@ -515,9 +629,9 @@ define @icmp_select_nxv4i1( %a, %a, %b - ret %sel + %mask = icmp eq i64 %x0, 0 + %sel = select i1 %mask, %a, %b + ret %sel } define @icmp_select_nxv8i1( %a, %b, i64 %x0) { ; CHECK-LABEL: icmp_select_nxv8i1: @@ -528,9 +642,9 @@ define @icmp_select_nxv8i1( %a, %a, %b - ret %sel + %mask = icmp eq i64 %x0, 0 + %sel = select i1 %mask, %a, %b + ret %sel } define @icmp_select_nxv16i1( %a, %b, i64 %x0) { ; CHECK-LABEL: icmp_select_nxv16i1: @@ -541,9 +655,9 @@ define @icmp_select_nxv16i1( %a, %a, %b - ret %sel + %mask = icmp eq i64 %x0, 0 + %sel = select i1 %mask, %a, %b + ret %sel } define @select_f32_invert_fmul( %a, %b) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll index 91e16d91ddd15..a66c21feb1cbc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll @@ -298,7 +298,7 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: ; implicit-def: $sgpr6 -; GFX10-NEXT: v_mov_b32_e32 v4, s5 +; GFX10-NEXT: v_mov_b32_e32 v5, s5 ; GFX10-NEXT: s_branch .LBB4_2 ; GFX10-NEXT: .LBB4_1: ; %Flow ; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1 @@ -312,6 +312,7 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace ; GFX10-NEXT: s_cbranch_execz .LBB4_6 ; GFX10-NEXT: .LBB4_2: ; %cond.block.0 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX10-NEXT: v_mov_b32_e32 v4, v5 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v4 ; GFX10-NEXT: s_and_saveexec_b32 s7, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB4_4 @@ -328,11 +329,12 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7 ; GFX10-NEXT: v_cmp_ne_u32_e64 s4, v1, v4 ; GFX10-NEXT: s_mov_b32 s7, -1 +; GFX10-NEXT: ; implicit-def: $vgpr5 ; GFX10-NEXT: s_and_saveexec_b32 s8, s4 ; GFX10-NEXT: s_cbranch_execz .LBB4_1 ; GFX10-NEXT: ; %bb.5: ; %loop.cond ; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1 -; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v4 +; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v4 ; GFX10-NEXT: s_andn2_b32 s4, -1, exec_lo ; GFX10-NEXT: s_and_b32 s7, exec_lo, 0 ; GFX10-NEXT: s_or_b32 s7, s4, s7 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll index 53f6c9543c3e3..074272f7bed86 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll @@ -5,7 +5,6 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX10_W64 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefix=GFX11_W32 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX11_W64 %s -; REQUIRES: do-not-run-me define float @v_div_fmas_f32(float %a, float %b, float %c, i1 %d) { ; GFX7-LABEL: v_div_fmas_f32: @@ -291,14 +290,14 @@ define amdgpu_ps double @s_div_fmas_f64(double inreg %a, double inreg %b, double define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[2:3], 0xa -; GFX7-NEXT: s_load_dword s5, s[2:3], 0x13 -; GFX7-NEXT: s_load_dword s6, s[2:3], 0x1c -; GFX7-NEXT: s_load_dword s7, s[2:3], 0x25 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0xa +; GFX7-NEXT: s_load_dword s3, s[4:5], 0x13 +; GFX7-NEXT: s_load_dword s6, s[4:5], 0x1c +; GFX7-NEXT: s_load_dword s7, s[4:5], 0x25 +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 ; GFX7-NEXT: v_mov_b32_e32 v2, s6 ; GFX7-NEXT: s_and_b32 s2, 1, s7 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 @@ -311,17 +310,17 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f ; ; GFX8-LABEL: test_div_fmas_f32: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s0, s[2:3], 0x28 -; GFX8-NEXT: s_load_dword s1, s[2:3], 0x4c -; GFX8-NEXT: s_load_dword s4, s[2:3], 0x70 -; GFX8-NEXT: s_load_dword s5, s[2:3], 0x94 +; GFX8-NEXT: s_load_dword s0, s[4:5], 0x28 +; GFX8-NEXT: s_load_dword s1, s[4:5], 0x4c +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x70 +; GFX8-NEXT: s_load_dword s3, s[4:5], 0x94 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: s_and_b32 s0, 1, s5 +; GFX8-NEXT: v_mov_b32_e32 v2, s2 +; GFX8-NEXT: s_and_b32 s0, 1, s3 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-NEXT: s_nop 2 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -333,14 +332,14 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f ; GFX10_W32-LABEL: test_div_fmas_f32: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x4 -; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x94 -; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x4c -; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x70 -; GFX10_W32-NEXT: s_load_dword s7, s[2:3], 0x28 -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W32-NEXT: s_load_dword s2, s[4:5], 0x94 +; GFX10_W32-NEXT: s_load_dword s3, s[4:5], 0x4c +; GFX10_W32-NEXT: s_load_dword s6, s[4:5], 0x70 +; GFX10_W32-NEXT: s_load_dword s7, s[4:5], 0x28 +; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: s_and_b32 s2, 1, s4 -; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5 +; GFX10_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W32-NEXT: v_mov_b32_e32 v0, s3 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, s6 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s7, v0, v1 @@ -351,14 +350,14 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f ; GFX10_W64-LABEL: test_div_fmas_f32: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x4 -; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x94 -; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x4c -; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x70 -; GFX10_W64-NEXT: s_load_dword s7, s[2:3], 0x28 -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W64-NEXT: s_load_dword s2, s[4:5], 0x94 +; GFX10_W64-NEXT: s_load_dword s3, s[4:5], 0x4c +; GFX10_W64-NEXT: s_load_dword s6, s[4:5], 0x70 +; GFX10_W64-NEXT: s_load_dword s7, s[4:5], 0x28 +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W64-NEXT: s_and_b32 s2, 1, s4 -; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5 +; GFX10_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W64-NEXT: v_mov_b32_e32 v0, s3 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, s6 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s7, v0, v1 @@ -369,40 +368,36 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f ; GFX11_W32-LABEL: test_div_fmas_f32: ; GFX11_W32: ; %bb.0: ; GFX11_W32-NEXT: s_clause 0x4 -; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x94 -; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x4c -; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x70 -; GFX11_W32-NEXT: s_load_b32 s7, s[2:3], 0x28 -; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W32-NEXT: s_load_b32 s2, s[4:5], 0x94 +; GFX11_W32-NEXT: s_load_b32 s3, s[4:5], 0x4c +; GFX11_W32-NEXT: s_load_b32 s6, s[4:5], 0x70 +; GFX11_W32-NEXT: s_load_b32 s7, s[4:5], 0x28 +; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: s_and_b32 s2, 1, s4 -; GFX11_W32-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 +; GFX11_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W32-NEXT: v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v1, s6 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s7, v0, v1 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f32: ; GFX11_W64: ; %bb.0: ; GFX11_W64-NEXT: s_clause 0x4 -; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x94 -; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x4c -; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x70 -; GFX11_W64-NEXT: s_load_b32 s7, s[2:3], 0x28 -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W64-NEXT: s_load_b32 s2, s[4:5], 0x94 +; GFX11_W64-NEXT: s_load_b32 s3, s[4:5], 0x4c +; GFX11_W64-NEXT: s_load_b32 s6, s[4:5], 0x70 +; GFX11_W64-NEXT: s_load_b32 s7, s[4:5], 0x28 +; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W64-NEXT: s_and_b32 s2, 1, s4 -; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5 +; GFX11_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W64-NEXT: v_mov_b32_e32 v0, s3 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, s6 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s7, v0, v1 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d) store float %result, ptr addrspace(1) %out, align 4 @@ -412,33 +407,33 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[2:3], 0x13 -; GFX7-NEXT: s_load_dword s5, s[2:3], 0x1c -; GFX7-NEXT: s_load_dword s6, s[2:3], 0x25 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x13 +; GFX7-NEXT: s_load_dword s3, s[4:5], 0x1c +; GFX7-NEXT: s_load_dword s6, s[4:5], 0x25 +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 ; GFX7-NEXT: s_and_b32 s2, 1, s6 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_nop 2 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_nop 1 ; GFX7-NEXT: v_div_fmas_f32 v0, 1.0, v0, v1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s0, s[2:3], 0x4c -; GFX8-NEXT: s_load_dword s1, s[2:3], 0x70 -; GFX8-NEXT: s_load_dword s4, s[2:3], 0x94 +; GFX8-NEXT: s_load_dword s0, s[4:5], 0x4c +; GFX8-NEXT: s_load_dword s1, s[4:5], 0x70 +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x94 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 -; GFX8-NEXT: s_and_b32 s0, 1, s4 +; GFX8-NEXT: s_and_b32 s0, 1, s2 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-NEXT: s_nop 2 ; GFX8-NEXT: v_div_fmas_f32 v2, 1.0, v0, v1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -450,14 +445,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out, ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x94 -; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x70 -; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x4c -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W32-NEXT: s_load_dword s2, s[4:5], 0x94 +; GFX10_W32-NEXT: s_load_dword s3, s[4:5], 0x70 +; GFX10_W32-NEXT: s_load_dword s6, s[4:5], 0x4c +; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: s_and_b32 s2, 1, s4 -; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5 +; GFX10_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W32-NEXT: v_mov_b32_e32 v0, s3 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] @@ -466,14 +461,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out, ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x94 -; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x70 -; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x4c -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W64-NEXT: s_load_dword s2, s[4:5], 0x94 +; GFX10_W64-NEXT: s_load_dword s3, s[4:5], 0x70 +; GFX10_W64-NEXT: s_load_dword s6, s[4:5], 0x4c +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W64-NEXT: s_and_b32 s2, 1, s4 -; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5 +; GFX10_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W64-NEXT: v_mov_b32_e32 v0, s3 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] @@ -482,37 +477,33 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out, ; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX11_W32: ; %bb.0: ; GFX11_W32-NEXT: s_clause 0x3 -; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x94 -; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x70 -; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x4c -; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W32-NEXT: s_load_b32 s2, s[4:5], 0x94 +; GFX11_W32-NEXT: s_load_b32 s3, s[4:5], 0x70 +; GFX11_W32-NEXT: s_load_b32 s6, s[4:5], 0x4c +; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: s_and_b32 s2, 1, s4 -; GFX11_W32-NEXT: v_mov_b32_e32 v0, s5 +; GFX11_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W32-NEXT: v_mov_b32_e32 v0, s3 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX11_W64: ; %bb.0: ; GFX11_W64-NEXT: s_clause 0x3 -; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x94 -; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x70 -; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x4c -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W64-NEXT: s_load_b32 s2, s[4:5], 0x94 +; GFX11_W64-NEXT: s_load_b32 s3, s[4:5], 0x70 +; GFX11_W64-NEXT: s_load_b32 s6, s[4:5], 0x4c +; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W64-NEXT: s_and_b32 s2, 1, s4 -; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5 +; GFX11_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W64-NEXT: v_mov_b32_e32 v0, s3 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call float @llvm.amdgcn.div.fmas.f32(float 1.0, float %b, float %c, i1 %d) store float %result, ptr addrspace(1) %out, align 4 @@ -522,33 +513,33 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out, define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out, float %a, float %b, float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[2:3], 0x2 -; GFX7-NEXT: s_load_dword s5, s[2:3], 0x4 -; GFX7-NEXT: s_load_dword s6, s[2:3], 0xd -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 +; GFX7-NEXT: s_load_dword s3, s[4:5], 0x4 +; GFX7-NEXT: s_load_dword s6, s[4:5], 0xd +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 ; GFX7-NEXT: s_and_b32 s2, 1, s6 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_nop 2 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_nop 1 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, 1.0, v1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s0, s[2:3], 0x8 -; GFX8-NEXT: s_load_dword s1, s[2:3], 0x10 -; GFX8-NEXT: s_load_dword s4, s[2:3], 0x34 +; GFX8-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX8-NEXT: s_load_dword s1, s[4:5], 0x10 +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x34 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 -; GFX8-NEXT: s_and_b32 s0, 1, s4 +; GFX8-NEXT: s_and_b32 s0, 1, s2 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-NEXT: s_nop 2 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, 1.0, v1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -560,14 +551,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out, ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x34 -; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x10 -; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x8 -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W32-NEXT: s_load_dword s2, s[4:5], 0x34 +; GFX10_W32-NEXT: s_load_dword s3, s[4:5], 0x10 +; GFX10_W32-NEXT: s_load_dword s6, s[4:5], 0x8 +; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: s_and_b32 s2, 1, s4 -; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5 +; GFX10_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W32-NEXT: v_mov_b32_e32 v0, s3 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, 1.0, v0 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] @@ -576,14 +567,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out, ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x34 -; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x10 -; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x8 -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W64-NEXT: s_load_dword s2, s[4:5], 0x34 +; GFX10_W64-NEXT: s_load_dword s3, s[4:5], 0x10 +; GFX10_W64-NEXT: s_load_dword s6, s[4:5], 0x8 +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W64-NEXT: s_and_b32 s2, 1, s4 -; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5 +; GFX10_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W64-NEXT: v_mov_b32_e32 v0, s3 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, 1.0, v0 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] @@ -592,37 +583,33 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out, ; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX11_W32: ; %bb.0: ; GFX11_W32-NEXT: s_clause 0x3 -; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x34 -; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x10 -; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x8 -; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W32-NEXT: s_load_b32 s2, s[4:5], 0x34 +; GFX11_W32-NEXT: s_load_b32 s3, s[4:5], 0x10 +; GFX11_W32-NEXT: s_load_b32 s6, s[4:5], 0x8 +; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: s_and_b32 s2, 1, s4 -; GFX11_W32-NEXT: v_mov_b32_e32 v0, s5 +; GFX11_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W32-NEXT: v_mov_b32_e32 v0, s3 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s6, 1.0, v0 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX11_W64: ; %bb.0: ; GFX11_W64-NEXT: s_clause 0x3 -; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x34 -; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x10 -; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x8 -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W64-NEXT: s_load_b32 s2, s[4:5], 0x34 +; GFX11_W64-NEXT: s_load_b32 s3, s[4:5], 0x10 +; GFX11_W64-NEXT: s_load_b32 s6, s[4:5], 0x8 +; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W64-NEXT: s_and_b32 s2, 1, s4 -; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5 +; GFX11_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W64-NEXT: v_mov_b32_e32 v0, s3 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s6, 1.0, v0 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float 1.0, float %c, i1 %d) store float %result, ptr addrspace(1) %out, align 4 @@ -632,33 +619,33 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out, define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[2:3], 0xa -; GFX7-NEXT: s_load_dword s5, s[2:3], 0x13 -; GFX7-NEXT: s_load_dword s6, s[2:3], 0x25 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0xa +; GFX7-NEXT: s_load_dword s3, s[4:5], 0x13 +; GFX7-NEXT: s_load_dword s6, s[4:5], 0x25 +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 ; GFX7-NEXT: s_and_b32 s2, 1, s6 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_nop 2 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_nop 1 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, 1.0 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s0, s[2:3], 0x28 -; GFX8-NEXT: s_load_dword s1, s[2:3], 0x4c -; GFX8-NEXT: s_load_dword s4, s[2:3], 0x94 +; GFX8-NEXT: s_load_dword s0, s[4:5], 0x28 +; GFX8-NEXT: s_load_dword s1, s[4:5], 0x4c +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x94 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 -; GFX8-NEXT: s_and_b32 s0, 1, s4 +; GFX8-NEXT: s_and_b32 s0, 1, s2 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-NEXT: s_nop 2 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, 1.0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -670,14 +657,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(ptr addrspace(1) %out, ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x94 -; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x4c -; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x28 -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W32-NEXT: s_load_dword s2, s[4:5], 0x94 +; GFX10_W32-NEXT: s_load_dword s3, s[4:5], 0x4c +; GFX10_W32-NEXT: s_load_dword s6, s[4:5], 0x28 +; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: s_and_b32 s2, 1, s4 -; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5 +; GFX10_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W32-NEXT: v_mov_b32_e32 v0, s3 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, v0, 1.0 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] @@ -686,14 +673,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(ptr addrspace(1) %out, ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x94 -; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x4c -; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x28 -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W64-NEXT: s_load_dword s2, s[4:5], 0x94 +; GFX10_W64-NEXT: s_load_dword s3, s[4:5], 0x4c +; GFX10_W64-NEXT: s_load_dword s6, s[4:5], 0x28 +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W64-NEXT: s_and_b32 s2, 1, s4 -; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5 +; GFX10_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W64-NEXT: v_mov_b32_e32 v0, s3 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, v0, 1.0 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] @@ -702,37 +689,33 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(ptr addrspace(1) %out, ; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX11_W32: ; %bb.0: ; GFX11_W32-NEXT: s_clause 0x3 -; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x94 -; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x4c -; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x28 -; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W32-NEXT: s_load_b32 s2, s[4:5], 0x94 +; GFX11_W32-NEXT: s_load_b32 s3, s[4:5], 0x4c +; GFX11_W32-NEXT: s_load_b32 s6, s[4:5], 0x28 +; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: s_and_b32 s2, 1, s4 -; GFX11_W32-NEXT: v_mov_b32_e32 v0, s5 +; GFX11_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W32-NEXT: v_mov_b32_e32 v0, s3 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s6, v0, 1.0 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX11_W64: ; %bb.0: ; GFX11_W64-NEXT: s_clause 0x3 -; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x94 -; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x4c -; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x28 -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W64-NEXT: s_load_b32 s2, s[4:5], 0x94 +; GFX11_W64-NEXT: s_load_b32 s3, s[4:5], 0x4c +; GFX11_W64-NEXT: s_load_b32 s6, s[4:5], 0x28 +; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W64-NEXT: s_and_b32 s2, 1, s4 -; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5 +; GFX11_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W64-NEXT: v_mov_b32_e32 v0, s3 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s6, v0, 1.0 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float 1.0, i1 %d) store float %result, ptr addrspace(1) %out, align 4 @@ -742,83 +725,83 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(ptr addrspace(1) %out, define amdgpu_kernel void @test_div_fmas_f64(ptr addrspace(1) %out, double %a, double %b, double %c, i1 %d) { ; GFX7-LABEL: test_div_fmas_f64: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 -; GFX7-NEXT: s_load_dword s0, s[2:3], 0x8 +; GFX7-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s0, s[4:5], 0x8 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, s6 -; GFX7-NEXT: v_mov_b32_e32 v2, s8 -; GFX7-NEXT: v_mov_b32_e32 v4, s10 +; GFX7-NEXT: v_mov_b32_e32 v0, s10 +; GFX7-NEXT: v_mov_b32_e32 v2, s12 +; GFX7-NEXT: v_mov_b32_e32 v4, s14 ; GFX7-NEXT: s_and_b32 s0, 1, s0 -; GFX7-NEXT: v_mov_b32_e32 v1, s7 -; GFX7-NEXT: v_mov_b32_e32 v3, s9 -; GFX7-NEXT: v_mov_b32_e32 v5, s11 +; GFX7-NEXT: v_mov_b32_e32 v1, s11 +; GFX7-NEXT: v_mov_b32_e32 v3, s13 +; GFX7-NEXT: v_mov_b32_e32 v5, s15 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX7-NEXT: s_mov_b32 s6, -1 -; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b32 s10, -1 +; GFX7-NEXT: s_mov_b32 s11, 0xf000 ; GFX7-NEXT: s_nop 1 ; GFX7-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] -; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: test_div_fmas_f64: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 -; GFX8-NEXT: s_load_dword s0, s[2:3], 0x20 +; GFX8-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s0, s[4:5], 0x20 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, s6 -; GFX8-NEXT: v_mov_b32_e32 v2, s8 -; GFX8-NEXT: v_mov_b32_e32 v4, s10 +; GFX8-NEXT: v_mov_b32_e32 v0, s10 +; GFX8-NEXT: v_mov_b32_e32 v2, s12 +; GFX8-NEXT: v_mov_b32_e32 v4, s14 ; GFX8-NEXT: s_and_b32 s0, 1, s0 -; GFX8-NEXT: v_mov_b32_e32 v1, s7 -; GFX8-NEXT: v_mov_b32_e32 v3, s9 -; GFX8-NEXT: v_mov_b32_e32 v5, s11 +; GFX8-NEXT: v_mov_b32_e32 v1, s11 +; GFX8-NEXT: v_mov_b32_e32 v3, s13 +; GFX8-NEXT: v_mov_b32_e32 v5, s15 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 ; GFX8-NEXT: s_nop 3 ; GFX8-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_mov_b32_e32 v3, s5 +; GFX8-NEXT: v_mov_b32_e32 v2, s8 +; GFX8-NEXT: v_mov_b32_e32 v3, s9 ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8-NEXT: s_endpgm ; ; GFX10_W32-LABEL: test_div_fmas_f64: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x1 -; GFX10_W32-NEXT: s_load_dword s0, s[2:3], 0x20 -; GFX10_W32-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 +; GFX10_W32-NEXT: s_load_dword s0, s[4:5], 0x20 +; GFX10_W32-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: s_and_b32 s0, 1, s0 -; GFX10_W32-NEXT: v_mov_b32_e32 v0, s8 -; GFX10_W32-NEXT: v_mov_b32_e32 v2, s10 +; GFX10_W32-NEXT: v_mov_b32_e32 v0, s12 +; GFX10_W32-NEXT: v_mov_b32_e32 v2, s14 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 -; GFX10_W32-NEXT: v_mov_b32_e32 v1, s9 -; GFX10_W32-NEXT: v_mov_b32_e32 v3, s11 -; GFX10_W32-NEXT: v_div_fmas_f64 v[0:1], s[6:7], v[0:1], v[2:3] +; GFX10_W32-NEXT: v_mov_b32_e32 v1, s13 +; GFX10_W32-NEXT: v_mov_b32_e32 v3, s15 +; GFX10_W32-NEXT: v_div_fmas_f64 v[0:1], s[10:11], v[0:1], v[2:3] ; GFX10_W32-NEXT: v_mov_b32_e32 v2, 0 -; GFX10_W32-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] +; GFX10_W32-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX10_W32-NEXT: s_endpgm ; ; GFX10_W64-LABEL: test_div_fmas_f64: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x1 -; GFX10_W64-NEXT: s_load_dword s0, s[2:3], 0x20 -; GFX10_W64-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 +; GFX10_W64-NEXT: s_load_dword s0, s[4:5], 0x20 +; GFX10_W64-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: s_and_b32 s0, 1, s0 -; GFX10_W64-NEXT: v_mov_b32_e32 v0, s8 -; GFX10_W64-NEXT: v_mov_b32_e32 v2, s10 +; GFX10_W64-NEXT: v_mov_b32_e32 v0, s12 +; GFX10_W64-NEXT: v_mov_b32_e32 v2, s14 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX10_W64-NEXT: v_mov_b32_e32 v1, s9 -; GFX10_W64-NEXT: v_mov_b32_e32 v3, s11 -; GFX10_W64-NEXT: v_div_fmas_f64 v[0:1], s[6:7], v[0:1], v[2:3] +; GFX10_W64-NEXT: v_mov_b32_e32 v1, s13 +; GFX10_W64-NEXT: v_mov_b32_e32 v3, s15 +; GFX10_W64-NEXT: v_div_fmas_f64 v[0:1], s[10:11], v[0:1], v[2:3] ; GFX10_W64-NEXT: v_mov_b32_e32 v2, 0 -; GFX10_W64-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] +; GFX10_W64-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX10_W64-NEXT: s_endpgm ; ; GFX11_W32-LABEL: test_div_fmas_f64: ; GFX11_W32: ; %bb.0: ; GFX11_W32-NEXT: s_clause 0x1 -; GFX11_W32-NEXT: s_load_b32 s8, s[2:3], 0x20 -; GFX11_W32-NEXT: s_load_b256 s[0:7], s[2:3], 0x0 +; GFX11_W32-NEXT: s_load_b32 s8, s[4:5], 0x20 +; GFX11_W32-NEXT: s_load_b256 s[0:7], s[4:5], 0x0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX11_W32-NEXT: s_and_b32 s8, 1, s8 ; GFX11_W32-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 @@ -827,15 +810,13 @@ define amdgpu_kernel void @test_div_fmas_f64(ptr addrspace(1) %out, double %a, d ; GFX11_W32-NEXT: v_div_fmas_f64 v[0:1], s[2:3], v[0:1], v[2:3] ; GFX11_W32-NEXT: v_mov_b32_e32 v2, 0 ; GFX11_W32-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f64: ; GFX11_W64: ; %bb.0: ; GFX11_W64-NEXT: s_clause 0x1 -; GFX11_W64-NEXT: s_load_b32 s8, s[2:3], 0x20 -; GFX11_W64-NEXT: s_load_b256 s[0:7], s[2:3], 0x0 +; GFX11_W64-NEXT: s_load_b32 s8, s[4:5], 0x20 +; GFX11_W64-NEXT: s_load_b256 s[0:7], s[4:5], 0x0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX11_W64-NEXT: s_and_b32 s8, 1, s8 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s4 @@ -846,8 +827,6 @@ define amdgpu_kernel void @test_div_fmas_f64(ptr addrspace(1) %out, double %a, d ; GFX11_W64-NEXT: v_div_fmas_f64 v[0:1], s[2:3], v[0:1], v[2:3] ; GFX11_W64-NEXT: v_mov_b32_e32 v2, 0 ; GFX11_W64-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %d) store double %result, ptr addrspace(1) %out, align 8 @@ -857,35 +836,35 @@ define amdgpu_kernel void @test_div_fmas_f64(ptr addrspace(1) %out, double %a, d define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %i) { ; GFX7-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x2 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GFX7-NEXT: s_mov_b32 s6, -1 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_cmp_eq_u32 s7, 0 -; GFX7-NEXT: s_cselect_b32 s2, 1, 0 -; GFX7-NEXT: s_and_b32 s2, 1, s2 -; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 -; GFX7-NEXT: v_mov_b32_e32 v2, s6 -; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 -; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_nop 2 +; GFX7-NEXT: s_cmp_eq_u32 s3, 0 +; GFX7-NEXT: s_cselect_b32 s3, 1, 0 +; GFX7-NEXT: v_mov_b32_e32 v0, s0 +; GFX7-NEXT: s_and_b32 s0, 1, s3 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: v_mov_b32_e32 v2, s2 +; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 +; GFX7-NEXT: s_nop 3 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2 -; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x8 +; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x8 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_cmp_eq_u32 s7, 0 -; GFX8-NEXT: s_cselect_b32 s0, 1, 0 -; GFX8-NEXT: s_and_b32 s0, 1, s0 -; GFX8-NEXT: v_mov_b32_e32 v0, s4 -; GFX8-NEXT: v_mov_b32_e32 v1, s5 -; GFX8-NEXT: v_mov_b32_e32 v2, s6 +; GFX8-NEXT: s_cmp_eq_u32 s3, 0 +; GFX8-NEXT: s_cselect_b32 s3, 1, 0 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: s_and_b32 s0, 1, s3 +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: v_mov_b32_e32 v2, s2 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-NEXT: s_nop 2 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -897,72 +876,68 @@ define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(ptr addrspace(1) %out, ; GFX10_W32-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x1 -; GFX10_W32-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x8 -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W32-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x8 +; GFX10_W32-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: s_cmp_eq_u32 s7, 0 -; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5 -; GFX10_W32-NEXT: s_cselect_b32 s2, 1, 0 -; GFX10_W32-NEXT: v_mov_b32_e32 v1, s6 -; GFX10_W32-NEXT: s_and_b32 s2, 1, s2 -; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 -; GFX10_W32-NEXT: v_div_fmas_f32 v0, s4, v0, v1 +; GFX10_W32-NEXT: s_cmp_eq_u32 s3, 0 +; GFX10_W32-NEXT: v_mov_b32_e32 v0, s1 +; GFX10_W32-NEXT: s_cselect_b32 s3, 1, 0 +; GFX10_W32-NEXT: v_mov_b32_e32 v1, s2 +; GFX10_W32-NEXT: s_and_b32 s3, 1, s3 +; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s3 +; GFX10_W32-NEXT: v_div_fmas_f32 v0, s0, v0, v1 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 -; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] +; GFX10_W32-NEXT: global_store_dword v1, v0, s[6:7] ; GFX10_W32-NEXT: s_endpgm ; ; GFX10_W64-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x1 -; GFX10_W64-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x8 -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W64-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x8 +; GFX10_W64-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W64-NEXT: s_cmp_eq_u32 s7, 0 -; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5 -; GFX10_W64-NEXT: s_cselect_b32 s2, 1, 0 -; GFX10_W64-NEXT: v_mov_b32_e32 v1, s6 -; GFX10_W64-NEXT: s_and_b32 s2, 1, s2 -; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 -; GFX10_W64-NEXT: v_div_fmas_f32 v0, s4, v0, v1 +; GFX10_W64-NEXT: s_cmp_eq_u32 s3, 0 +; GFX10_W64-NEXT: v_mov_b32_e32 v0, s1 +; GFX10_W64-NEXT: s_cselect_b32 s3, 1, 0 +; GFX10_W64-NEXT: v_mov_b32_e32 v1, s2 +; GFX10_W64-NEXT: s_and_b32 s3, 1, s3 +; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s3 +; GFX10_W64-NEXT: v_div_fmas_f32 v0, s0, v0, v1 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 -; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] +; GFX10_W64-NEXT: global_store_dword v1, v0, s[6:7] ; GFX10_W64-NEXT: s_endpgm ; ; GFX11_W32-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX11_W32: ; %bb.0: ; GFX11_W32-NEXT: s_clause 0x1 -; GFX11_W32-NEXT: s_load_b128 s[4:7], s[2:3], 0x8 -; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W32-NEXT: s_load_b128 s[0:3], s[4:5], 0x8 +; GFX11_W32-NEXT: s_load_b64 s[4:5], s[4:5], 0x0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: s_cmp_eq_u32 s7, 0 -; GFX11_W32-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11_W32-NEXT: s_cselect_b32 s2, 1, 0 -; GFX11_W32-NEXT: s_and_b32 s2, 1, s2 -; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 -; GFX11_W32-NEXT: v_div_fmas_f32 v0, s4, v0, v1 +; GFX11_W32-NEXT: s_cmp_eq_u32 s3, 0 +; GFX11_W32-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s2 +; GFX11_W32-NEXT: s_cselect_b32 s3, 1, 0 +; GFX11_W32-NEXT: s_and_b32 s3, 1, s3 +; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s3 +; GFX11_W32-NEXT: v_div_fmas_f32 v0, s0, v0, v1 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 -; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11_W32-NEXT: global_store_b32 v1, v0, s[4:5] ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX11_W64: ; %bb.0: ; GFX11_W64-NEXT: s_clause 0x1 -; GFX11_W64-NEXT: s_load_b128 s[4:7], s[2:3], 0x8 -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W64-NEXT: s_load_b128 s[0:3], s[4:5], 0x8 +; GFX11_W64-NEXT: s_load_b64 s[4:5], s[4:5], 0x0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W64-NEXT: s_cmp_eq_u32 s7, 0 -; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5 -; GFX11_W64-NEXT: s_cselect_b32 s2, 1, 0 -; GFX11_W64-NEXT: v_mov_b32_e32 v1, s6 -; GFX11_W64-NEXT: s_and_b32 s2, 1, s2 -; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 -; GFX11_W64-NEXT: v_div_fmas_f32 v0, s4, v0, v1 +; GFX11_W64-NEXT: s_cmp_eq_u32 s3, 0 +; GFX11_W64-NEXT: v_mov_b32_e32 v0, s1 +; GFX11_W64-NEXT: s_cselect_b32 s3, 1, 0 +; GFX11_W64-NEXT: v_mov_b32_e32 v1, s2 +; GFX11_W64-NEXT: s_and_b32 s3, 1, s3 +; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s3 +; GFX11_W64-NEXT: v_div_fmas_f32 v0, s0, v0, v1 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 -; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11_W64-NEXT: global_store_b32 v1, v0, s[4:5] ; GFX11_W64-NEXT: s_endpgm %cmp = icmp eq i32 %i, 0 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cmp) @@ -973,14 +948,14 @@ define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(ptr addrspace(1) %out, define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) { ; GFX7-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[2:3], 0xa -; GFX7-NEXT: s_load_dword s5, s[2:3], 0x13 -; GFX7-NEXT: s_load_dword s6, s[2:3], 0x1c -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0xa +; GFX7-NEXT: s_load_dword s3, s[4:5], 0x13 +; GFX7-NEXT: s_load_dword s6, s[4:5], 0x1c +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX7-NEXT: s_mov_b64 vcc, 0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 ; GFX7-NEXT: v_mov_b32_e32 v2, s6 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2 ; GFX7-NEXT: s_mov_b32 s2, -1 @@ -990,16 +965,16 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(ptr addrspace ; ; GFX8-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s0, s[2:3], 0x28 -; GFX8-NEXT: s_load_dword s1, s[2:3], 0x4c -; GFX8-NEXT: s_load_dword s4, s[2:3], 0x70 +; GFX8-NEXT: s_load_dword s0, s[4:5], 0x28 +; GFX8-NEXT: s_load_dword s1, s[4:5], 0x4c +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x70 ; GFX8-NEXT: s_mov_b64 vcc, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_mov_b32_e32 v2, s2 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 @@ -1009,14 +984,14 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(ptr addrspace ; GFX10_W32-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x4c -; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x70 -; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x28 -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W32-NEXT: s_load_dword s2, s[4:5], 0x4c +; GFX10_W32-NEXT: s_load_dword s3, s[4:5], 0x70 +; GFX10_W32-NEXT: s_load_dword s6, s[4:5], 0x28 +; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W32-NEXT: s_mov_b32 vcc_lo, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: v_mov_b32_e32 v0, s4 -; GFX10_W32-NEXT: v_mov_b32_e32 v1, s5 +; GFX10_W32-NEXT: v_mov_b32_e32 v0, s2 +; GFX10_W32-NEXT: v_mov_b32_e32 v1, s3 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, v0, v1 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] @@ -1025,14 +1000,14 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(ptr addrspace ; GFX10_W64-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x4c -; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x70 -; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x28 -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W64-NEXT: s_load_dword s2, s[4:5], 0x4c +; GFX10_W64-NEXT: s_load_dword s3, s[4:5], 0x70 +; GFX10_W64-NEXT: s_load_dword s6, s[4:5], 0x28 +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W64-NEXT: s_mov_b64 vcc, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W64-NEXT: v_mov_b32_e32 v0, s4 -; GFX10_W64-NEXT: v_mov_b32_e32 v1, s5 +; GFX10_W64-NEXT: v_mov_b32_e32 v0, s2 +; GFX10_W64-NEXT: v_mov_b32_e32 v1, s3 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, v0, v1 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] @@ -1041,36 +1016,32 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(ptr addrspace ; GFX11_W32-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX11_W32: ; %bb.0: ; GFX11_W32-NEXT: s_clause 0x3 -; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x4c -; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x70 -; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x28 -; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W32-NEXT: s_load_b32 s2, s[4:5], 0x4c +; GFX11_W32-NEXT: s_load_b32 s3, s[4:5], 0x70 +; GFX11_W32-NEXT: s_load_b32 s6, s[4:5], 0x28 +; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W32-NEXT: s_mov_b32 vcc_lo, 0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GFX11_W32-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s6, v0, v1 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX11_W64: ; %bb.0: ; GFX11_W64-NEXT: s_clause 0x3 -; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x4c -; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x70 -; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x28 -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W64-NEXT: s_load_b32 s2, s[4:5], 0x4c +; GFX11_W64-NEXT: s_load_b32 s3, s[4:5], 0x70 +; GFX11_W64-NEXT: s_load_b32 s6, s[4:5], 0x28 +; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W64-NEXT: s_mov_b64 vcc, 0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W64-NEXT: v_mov_b32_e32 v0, s4 -; GFX11_W64-NEXT: v_mov_b32_e32 v1, s5 +; GFX11_W64-NEXT: v_mov_b32_e32 v0, s2 +; GFX11_W64-NEXT: v_mov_b32_e32 v1, s3 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s6, v0, v1 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 false) store float %result, ptr addrspace(1) %out, align 4 @@ -1080,14 +1051,14 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(ptr addrspace define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) { ; GFX7-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[2:3], 0xa -; GFX7-NEXT: s_load_dword s5, s[2:3], 0x13 -; GFX7-NEXT: s_load_dword s6, s[2:3], 0x1c -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0xa +; GFX7-NEXT: s_load_dword s3, s[4:5], 0x13 +; GFX7-NEXT: s_load_dword s6, s[4:5], 0x1c +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX7-NEXT: s_mov_b64 vcc, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 ; GFX7-NEXT: v_mov_b32_e32 v2, s6 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2 ; GFX7-NEXT: s_mov_b32 s2, -1 @@ -1097,16 +1068,16 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(ptr addrspace( ; ; GFX8-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s0, s[2:3], 0x28 -; GFX8-NEXT: s_load_dword s1, s[2:3], 0x4c -; GFX8-NEXT: s_load_dword s4, s[2:3], 0x70 +; GFX8-NEXT: s_load_dword s0, s[4:5], 0x28 +; GFX8-NEXT: s_load_dword s1, s[4:5], 0x4c +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x70 ; GFX8-NEXT: s_mov_b64 vcc, -1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_mov_b32_e32 v2, s2 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 @@ -1116,14 +1087,14 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(ptr addrspace( ; GFX10_W32-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x4c -; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x70 -; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x28 -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W32-NEXT: s_load_dword s2, s[4:5], 0x4c +; GFX10_W32-NEXT: s_load_dword s3, s[4:5], 0x70 +; GFX10_W32-NEXT: s_load_dword s6, s[4:5], 0x28 +; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W32-NEXT: s_mov_b32 vcc_lo, -1 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: v_mov_b32_e32 v0, s4 -; GFX10_W32-NEXT: v_mov_b32_e32 v1, s5 +; GFX10_W32-NEXT: v_mov_b32_e32 v0, s2 +; GFX10_W32-NEXT: v_mov_b32_e32 v1, s3 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, v0, v1 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] @@ -1132,14 +1103,14 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(ptr addrspace( ; GFX10_W64-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x4c -; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x70 -; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x28 -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W64-NEXT: s_load_dword s2, s[4:5], 0x4c +; GFX10_W64-NEXT: s_load_dword s3, s[4:5], 0x70 +; GFX10_W64-NEXT: s_load_dword s6, s[4:5], 0x28 +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W64-NEXT: s_mov_b64 vcc, -1 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W64-NEXT: v_mov_b32_e32 v0, s4 -; GFX10_W64-NEXT: v_mov_b32_e32 v1, s5 +; GFX10_W64-NEXT: v_mov_b32_e32 v0, s2 +; GFX10_W64-NEXT: v_mov_b32_e32 v1, s3 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, v0, v1 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] @@ -1148,36 +1119,32 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(ptr addrspace( ; GFX11_W32-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX11_W32: ; %bb.0: ; GFX11_W32-NEXT: s_clause 0x3 -; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x4c -; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x70 -; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x28 -; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W32-NEXT: s_load_b32 s2, s[4:5], 0x4c +; GFX11_W32-NEXT: s_load_b32 s3, s[4:5], 0x70 +; GFX11_W32-NEXT: s_load_b32 s6, s[4:5], 0x28 +; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W32-NEXT: s_mov_b32 vcc_lo, -1 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GFX11_W32-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s6, v0, v1 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX11_W64: ; %bb.0: ; GFX11_W64-NEXT: s_clause 0x3 -; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x4c -; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x70 -; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x28 -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W64-NEXT: s_load_b32 s2, s[4:5], 0x4c +; GFX11_W64-NEXT: s_load_b32 s3, s[4:5], 0x70 +; GFX11_W64-NEXT: s_load_b32 s6, s[4:5], 0x28 +; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W64-NEXT: s_mov_b64 vcc, -1 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W64-NEXT: v_mov_b32_e32 v0, s4 -; GFX11_W64-NEXT: v_mov_b32_e32 v1, s5 +; GFX11_W64-NEXT: v_mov_b32_e32 v0, s2 +; GFX11_W64-NEXT: v_mov_b32_e32 v1, s3 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s6, v0, v1 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 true) store float %result, ptr addrspace(1) %out, align 4 @@ -1187,40 +1154,40 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(ptr addrspace( define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], i32 %d) { ; GFX7-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0 -; GFX7-NEXT: s_load_dword s0, s[2:3], 0xc -; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s4, s[4:5], 0xc +; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX7-NEXT: v_mov_b32_e32 v2, 0 -; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_mov_b64 s[4:5], s[10:11] -; GFX7-NEXT: buffer_load_dword v3, v[1:2], s[4:7], 0 addr64 glc +; GFX7-NEXT: s_mov_b64 s[0:1], s[10:11] +; GFX7-NEXT: buffer_load_dword v3, v[1:2], s[0:3], 0 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: buffer_load_dword v4, v[1:2], s[4:7], 0 addr64 offset:4 glc +; GFX7-NEXT: buffer_load_dword v4, v[1:2], s[0:3], 0 addr64 offset:4 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: buffer_load_dword v1, v[1:2], s[4:7], 0 addr64 offset:8 glc +; GFX7-NEXT: buffer_load_dword v1, v[1:2], s[0:3], 0 addr64 offset:8 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: s_cmp_lg_u32 s0, 0 +; GFX7-NEXT: s_cmp_lg_u32 s4, 0 ; GFX7-NEXT: s_cselect_b32 s0, 1, 0 ; GFX7-NEXT: s_and_b32 s0, 1, s0 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX7-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 -; GFX7-NEXT: s_mov_b32 s6, -1 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_and_b64 vcc, vcc, s[0:1] -; GFX7-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX7-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX7-NEXT: v_div_fmas_f32 v0, v3, v4, v1 ; GFX7-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:8 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 -; GFX8-NEXT: s_load_dword s2, s[2:3], 0x30 +; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s4, s[4:5], 0x30 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v1, s6 -; GFX8-NEXT: v_mov_b32_e32 v2, s7 +; GFX8-NEXT: v_mov_b32_e32 v1, s2 +; GFX8-NEXT: v_mov_b32_e32 v2, s3 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 ; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 4, v1 @@ -1233,9 +1200,9 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(ptr addrspace(1 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: flat_load_dword v3, v[5:6] glc ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: s_add_u32 s0, s4, 8 -; GFX8-NEXT: s_addc_u32 s1, s5, 0 -; GFX8-NEXT: s_cmp_lg_u32 s2, 0 +; GFX8-NEXT: s_add_u32 s0, s0, 8 +; GFX8-NEXT: s_addc_u32 s1, s1, 0 +; GFX8-NEXT: s_cmp_lg_u32 s4, 0 ; GFX8-NEXT: s_cselect_b32 s2, 1, 0 ; GFX8-NEXT: s_and_b32 s2, 1, s2 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 @@ -1250,104 +1217,96 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(ptr addrspace(1 ; ; GFX10_W32-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX10_W32: ; %bb.0: -; GFX10_W32-NEXT: s_clause 0x1 -; GFX10_W32-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 -; GFX10_W32-NEXT: s_load_dword s0, s[2:3], 0x30 +; GFX10_W32-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX10_W32-NEXT: s_load_dword s4, s[4:5], 0x30 ; GFX10_W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: global_load_dword v2, v1, s[6:7] glc dlc +; GFX10_W32-NEXT: global_load_dword v2, v1, s[2:3] glc dlc ; GFX10_W32-NEXT: s_waitcnt vmcnt(0) -; GFX10_W32-NEXT: global_load_dword v3, v1, s[6:7] offset:4 glc dlc +; GFX10_W32-NEXT: global_load_dword v3, v1, s[2:3] offset:4 glc dlc ; GFX10_W32-NEXT: s_waitcnt vmcnt(0) -; GFX10_W32-NEXT: global_load_dword v4, v1, s[6:7] offset:8 glc dlc +; GFX10_W32-NEXT: global_load_dword v4, v1, s[2:3] offset:8 glc dlc ; GFX10_W32-NEXT: s_waitcnt vmcnt(0) -; GFX10_W32-NEXT: s_cmp_lg_u32 s0, 0 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 -; GFX10_W32-NEXT: s_cselect_b32 s0, 1, 0 -; GFX10_W32-NEXT: s_and_b32 s0, 1, s0 -; GFX10_W32-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 -; GFX10_W32-NEXT: s_and_b32 vcc_lo, vcc_lo, s0 +; GFX10_W32-NEXT: s_cmp_lg_u32 s4, 0 +; GFX10_W32-NEXT: s_cselect_b32 s2, 1, 0 +; GFX10_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W32-NEXT: v_cmp_ne_u32_e64 s2, 0, s2 +; GFX10_W32-NEXT: s_and_b32 vcc_lo, vcc_lo, s2 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, v2, v3, v4 -; GFX10_W32-NEXT: global_store_dword v1, v0, s[4:5] offset:8 +; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] offset:8 ; GFX10_W32-NEXT: s_endpgm ; ; GFX10_W64-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX10_W64: ; %bb.0: -; GFX10_W64-NEXT: s_clause 0x1 -; GFX10_W64-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 -; GFX10_W64-NEXT: s_load_dword s0, s[2:3], 0x30 +; GFX10_W64-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX10_W64-NEXT: s_load_dword s4, s[4:5], 0x30 ; GFX10_W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W64-NEXT: global_load_dword v2, v1, s[6:7] glc dlc +; GFX10_W64-NEXT: global_load_dword v2, v1, s[2:3] glc dlc ; GFX10_W64-NEXT: s_waitcnt vmcnt(0) -; GFX10_W64-NEXT: global_load_dword v3, v1, s[6:7] offset:4 glc dlc +; GFX10_W64-NEXT: global_load_dword v3, v1, s[2:3] offset:4 glc dlc ; GFX10_W64-NEXT: s_waitcnt vmcnt(0) -; GFX10_W64-NEXT: global_load_dword v4, v1, s[6:7] offset:8 glc dlc +; GFX10_W64-NEXT: global_load_dword v4, v1, s[2:3] offset:8 glc dlc ; GFX10_W64-NEXT: s_waitcnt vmcnt(0) -; GFX10_W64-NEXT: s_cmp_lg_u32 s0, 0 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 -; GFX10_W64-NEXT: s_cselect_b32 s0, 1, 0 -; GFX10_W64-NEXT: s_and_b32 s0, 1, s0 -; GFX10_W64-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 -; GFX10_W64-NEXT: s_and_b64 vcc, vcc, s[0:1] +; GFX10_W64-NEXT: s_cmp_lg_u32 s4, 0 +; GFX10_W64-NEXT: s_cselect_b32 s2, 1, 0 +; GFX10_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W64-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, s2 +; GFX10_W64-NEXT: s_and_b64 vcc, vcc, s[2:3] ; GFX10_W64-NEXT: v_div_fmas_f32 v0, v2, v3, v4 -; GFX10_W64-NEXT: global_store_dword v1, v0, s[4:5] offset:8 +; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] offset:8 ; GFX10_W64-NEXT: s_endpgm ; ; GFX11_W32-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX11_W32: ; %bb.0: -; GFX11_W32-NEXT: s_clause 0x1 -; GFX11_W32-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 -; GFX11_W32-NEXT: s_load_b32 s0, s[2:3], 0x30 +; GFX11_W32-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; GFX11_W32-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11_W32-NEXT: s_load_b32 s4, s[4:5], 0x30 ; GFX11_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX11_W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: global_load_b32 v2, v1, s[6:7] glc dlc +; GFX11_W32-NEXT: global_load_b32 v2, v1, s[2:3] glc dlc ; GFX11_W32-NEXT: s_waitcnt vmcnt(0) -; GFX11_W32-NEXT: global_load_b32 v3, v1, s[6:7] offset:4 glc dlc +; GFX11_W32-NEXT: global_load_b32 v3, v1, s[2:3] offset:4 glc dlc ; GFX11_W32-NEXT: s_waitcnt vmcnt(0) -; GFX11_W32-NEXT: global_load_b32 v1, v1, s[6:7] offset:8 glc dlc +; GFX11_W32-NEXT: global_load_b32 v1, v1, s[2:3] offset:8 glc dlc ; GFX11_W32-NEXT: s_waitcnt vmcnt(0) -; GFX11_W32-NEXT: s_cmp_lg_u32 s0, 0 -; GFX11_W32-NEXT: s_cselect_b32 s0, 1, 0 -; GFX11_W32-NEXT: s_and_b32 s0, 1, s0 -; GFX11_W32-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 -; GFX11_W32-NEXT: s_and_b32 vcc_lo, vcc_lo, s0 +; GFX11_W32-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11_W32-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W32-NEXT: v_cmp_ne_u32_e64 s2, 0, s2 +; GFX11_W32-NEXT: s_and_b32 vcc_lo, vcc_lo, s2 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, v2, v3, v1 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 -; GFX11_W32-NEXT: global_store_b32 v1, v0, s[4:5] offset:8 -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] offset:8 ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX11_W64: ; %bb.0: -; GFX11_W64-NEXT: s_clause 0x1 -; GFX11_W64-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 -; GFX11_W64-NEXT: s_load_b32 s0, s[2:3], 0x30 +; GFX11_W64-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; GFX11_W64-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11_W64-NEXT: s_load_b32 s4, s[4:5], 0x30 ; GFX11_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX11_W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W64-NEXT: global_load_b32 v2, v1, s[6:7] glc dlc +; GFX11_W64-NEXT: global_load_b32 v2, v1, s[2:3] glc dlc ; GFX11_W64-NEXT: s_waitcnt vmcnt(0) -; GFX11_W64-NEXT: global_load_b32 v3, v1, s[6:7] offset:4 glc dlc +; GFX11_W64-NEXT: global_load_b32 v3, v1, s[2:3] offset:4 glc dlc ; GFX11_W64-NEXT: s_waitcnt vmcnt(0) -; GFX11_W64-NEXT: global_load_b32 v1, v1, s[6:7] offset:8 glc dlc +; GFX11_W64-NEXT: global_load_b32 v1, v1, s[2:3] offset:8 glc dlc ; GFX11_W64-NEXT: s_waitcnt vmcnt(0) -; GFX11_W64-NEXT: s_cmp_lg_u32 s0, 0 -; GFX11_W64-NEXT: s_cselect_b32 s0, 1, 0 -; GFX11_W64-NEXT: s_and_b32 s0, 1, s0 -; GFX11_W64-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 -; GFX11_W64-NEXT: s_and_b64 vcc, vcc, s[0:1] +; GFX11_W64-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11_W64-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W64-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, s2 +; GFX11_W64-NEXT: s_and_b64 vcc, vcc, s[2:3] ; GFX11_W64-NEXT: v_div_fmas_f32 v0, v2, v3, v1 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 -; GFX11_W64-NEXT: global_store_b32 v1, v0, s[4:5] offset:8 -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] offset:8 ; GFX11_W64-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.a = getelementptr float, ptr addrspace(1) %in, i32 %tid @@ -1371,19 +1330,19 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(ptr addrspace(1 define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [8 x i32], ptr addrspace(1) %in, [8 x i32], ptr addrspace(1) %dummy) { ; GFX7-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX7-NEXT: v_mov_b32_e32 v2, 0 -; GFX7-NEXT: s_mov_b32 s6, 0 -; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, 0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: buffer_load_dwordx3 v[1:3], v[1:2], s[4:7], 0 addr64 +; GFX7-NEXT: buffer_load_dwordx3 v[1:3], v[1:2], s[0:3], 0 addr64 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0 ; GFX7-NEXT: s_mov_b64 vcc, 0 -; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[0:1] +; GFX7-NEXT: s_and_saveexec_b64 s[6:7], s[0:1] ; GFX7-NEXT: s_cbranch_execz .LBB13_2 ; GFX7-NEXT: ; %bb.1: ; %bb -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x14 +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x14 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) @@ -1395,18 +1354,18 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; GFX7-NEXT: s_and_b64 s[0:1], exec, s[0:1] ; GFX7-NEXT: s_or_b64 vcc, s[8:9], s[0:1] ; GFX7-NEXT: .LBB13_2: ; %exit -; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX7-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 +; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_div_fmas_f32 v0, v1, v2, v3 -; GFX7-NEXT: s_mov_b32 s6, -1 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8 +; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x28 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, s1 @@ -1416,10 +1375,10 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; GFX8-NEXT: flat_load_dwordx3 v[1:3], v[1:2] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0 ; GFX8-NEXT: s_mov_b64 vcc, 0 -; GFX8-NEXT: s_and_saveexec_b64 s[4:5], s[0:1] +; GFX8-NEXT: s_and_saveexec_b64 s[2:3], s[0:1] ; GFX8-NEXT: s_cbranch_execz .LBB13_2 ; GFX8-NEXT: ; %bb.1: ; %bb -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x50 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x50 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -1431,10 +1390,10 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; GFX8-NEXT: s_and_b64 s[0:1], exec, s[0:1] ; GFX8-NEXT: s_or_b64 vcc, s[6:7], s[0:1] ; GFX8-NEXT: .LBB13_2: ; %exit -; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_div_fmas_f32 v2, v1, v2, v3 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_add_u32 s0, s0, 8 ; GFX8-NEXT: s_addc_u32 s1, s1, 0 @@ -1445,7 +1404,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; ; GFX10_W32-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX10_W32: ; %bb.0: ; %entry -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x28 +; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28 ; GFX10_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX10_W32-NEXT: s_mov_b32 vcc_lo, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) @@ -1454,20 +1413,20 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; GFX10_W32-NEXT: s_and_saveexec_b32 s1, s0 ; GFX10_W32-NEXT: s_cbranch_execz .LBB13_2 ; GFX10_W32-NEXT: ; %bb.1: ; %bb -; GFX10_W32-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x50 +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x50 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX10_W32-NEXT: s_load_dword s0, s[2:3], 0x0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: s_cmp_lg_u32 s0, 0 ; GFX10_W32-NEXT: s_cselect_b32 s0, 1, 0 -; GFX10_W32-NEXT: s_andn2_b32 s4, 0, exec_lo +; GFX10_W32-NEXT: s_andn2_b32 s2, 0, exec_lo ; GFX10_W32-NEXT: s_and_b32 s0, 1, s0 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 ; GFX10_W32-NEXT: s_and_b32 s0, exec_lo, s0 -; GFX10_W32-NEXT: s_or_b32 vcc_lo, s4, s0 +; GFX10_W32-NEXT: s_or_b32 vcc_lo, s2, s0 ; GFX10_W32-NEXT: .LBB13_2: ; %exit ; GFX10_W32-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W32-NEXT: s_waitcnt vmcnt(0) ; GFX10_W32-NEXT: v_div_fmas_f32 v0, v1, v2, v3 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 @@ -1477,16 +1436,16 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; ; GFX10_W64-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX10_W64: ; %bb.0: ; %entry -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x28 +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28 ; GFX10_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX10_W64-NEXT: s_mov_b64 vcc, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: global_load_dwordx3 v[1:3], v1, s[0:1] ; GFX10_W64-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0 -; GFX10_W64-NEXT: s_and_saveexec_b64 s[4:5], s[0:1] +; GFX10_W64-NEXT: s_and_saveexec_b64 s[2:3], s[0:1] ; GFX10_W64-NEXT: s_cbranch_execz .LBB13_2 ; GFX10_W64-NEXT: ; %bb.1: ; %bb -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x50 +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x50 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) @@ -1498,8 +1457,8 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; GFX10_W64-NEXT: s_and_b64 s[0:1], exec, s[0:1] ; GFX10_W64-NEXT: s_or_b64 vcc, s[6:7], s[0:1] ; GFX10_W64-NEXT: .LBB13_2: ; %exit -; GFX10_W64-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W64-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W64-NEXT: s_waitcnt vmcnt(0) ; GFX10_W64-NEXT: v_div_fmas_f32 v0, v1, v2, v3 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 @@ -1509,7 +1468,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; ; GFX11_W32-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX11_W32: ; %bb.0: ; %entry -; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x28 +; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x28 ; GFX11_W32-NEXT: v_and_b32_e32 v3, 0x3ff, v0 ; GFX11_W32-NEXT: s_mov_b32 vcc_lo, 0 ; GFX11_W32-NEXT: v_lshlrev_b32_e32 v0, 2, v3 @@ -1519,42 +1478,40 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; GFX11_W32-NEXT: v_cmpx_eq_u32_e32 0, v3 ; GFX11_W32-NEXT: s_cbranch_execz .LBB13_2 ; GFX11_W32-NEXT: ; %bb.1: ; %bb -; GFX11_W32-NEXT: s_load_b64 s[4:5], s[2:3], 0x50 +; GFX11_W32-NEXT: s_load_b64 s[2:3], s[4:5], 0x50 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: s_load_b32 s0, s[4:5], 0x0 +; GFX11_W32-NEXT: s_load_b32 s0, s[2:3], 0x0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX11_W32-NEXT: s_cmp_lg_u32 s0, 0 ; GFX11_W32-NEXT: s_cselect_b32 s0, 1, 0 -; GFX11_W32-NEXT: s_and_not1_b32 s4, 0, exec_lo +; GFX11_W32-NEXT: s_and_not1_b32 s2, 0, exec_lo ; GFX11_W32-NEXT: s_and_b32 s0, 1, s0 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 ; GFX11_W32-NEXT: s_and_b32 s0, exec_lo, s0 -; GFX11_W32-NEXT: s_or_b32 vcc_lo, s4, s0 +; GFX11_W32-NEXT: s_or_b32 vcc_lo, s2, s0 ; GFX11_W32-NEXT: .LBB13_2: ; %exit ; GFX11_W32-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W32-NEXT: s_waitcnt vmcnt(0) ; GFX11_W32-NEXT: v_div_fmas_f32 v0, v0, v1, v2 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] offset:8 -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX11_W64: ; %bb.0: ; %entry -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x28 +; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x28 ; GFX11_W64-NEXT: v_and_b32_e32 v3, 0x3ff, v0 ; GFX11_W64-NEXT: s_mov_b64 vcc, 0 -; GFX11_W64-NEXT: s_mov_b64 s[4:5], exec +; GFX11_W64-NEXT: s_mov_b64 s[2:3], exec ; GFX11_W64-NEXT: v_lshlrev_b32_e32 v0, 2, v3 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX11_W64-NEXT: global_load_b96 v[0:2], v0, s[0:1] ; GFX11_W64-NEXT: v_cmpx_eq_u32_e32 0, v3 ; GFX11_W64-NEXT: s_cbranch_execz .LBB13_2 ; GFX11_W64-NEXT: ; %bb.1: ; %bb -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x50 +; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x50 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX11_W64-NEXT: s_load_b32 s0, s[0:1], 0x0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) @@ -1566,15 +1523,13 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; GFX11_W64-NEXT: s_and_b64 s[0:1], exec, s[0:1] ; GFX11_W64-NEXT: s_or_b64 vcc, s[6:7], s[0:1] ; GFX11_W64-NEXT: .LBB13_2: ; %exit -; GFX11_W64-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W64-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W64-NEXT: s_waitcnt vmcnt(0) ; GFX11_W64-NEXT: v_div_fmas_f32 v0, v0, v1, v2 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] offset:8 -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll b/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll index 50b6ad9f0cb37..15f5b2f76dfc5 100644 --- a/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll @@ -7646,9 +7646,9 @@ define amdgpu_kernel void @local_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) ; GFX7-NEXT: s_or_b64 s[8:9], s[0:1], s[8:9] ; GFX7-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GFX7-NEXT: s_cbranch_execnz .LBB28_2 -; GFX7-NEXT: ; %bb.3: ; %Flow23 +; GFX7-NEXT: ; %bb.3: ; %Flow22 ; GFX7-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX7-NEXT: .LBB28_4: ; %Flow24 +; GFX7-NEXT: .LBB28_4: ; %Flow23 ; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX7-NEXT: s_mov_b64 s[8:9], exec ; GFX7-NEXT: v_readfirstlane_b32 s10, v1 @@ -7676,7 +7676,7 @@ define amdgpu_kernel void @local_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) ; GFX7-NEXT: v_mov_b32_e32 v3, v4 ; GFX7-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GFX7-NEXT: s_cbranch_execnz .LBB28_6 -; GFX7-NEXT: .LBB28_7: ; %Flow22 +; GFX7-NEXT: .LBB28_7: ; %Flow21 ; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX7-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; GFX7-NEXT: v_mul_f32_e32 v0, 0x42280000, v0 @@ -7725,7 +7725,7 @@ define amdgpu_kernel void @local_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) ; GFX7-NEXT: s_cbranch_execnz .LBB28_11 ; GFX7-NEXT: ; %bb.12: ; %Flow ; GFX7-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX7-NEXT: .LBB28_13: ; %Flow20 +; GFX7-NEXT: .LBB28_13: ; %Flow19 ; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX7-NEXT: v_readfirstlane_b32 s4, v2 @@ -7770,9 +7770,9 @@ define amdgpu_kernel void @local_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) ; GFX6-NEXT: s_or_b64 s[8:9], s[0:1], s[8:9] ; GFX6-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GFX6-NEXT: s_cbranch_execnz .LBB28_2 -; GFX6-NEXT: ; %bb.3: ; %Flow21 +; GFX6-NEXT: ; %bb.3: ; %Flow20 ; GFX6-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX6-NEXT: .LBB28_4: ; %Flow22 +; GFX6-NEXT: .LBB28_4: ; %Flow21 ; GFX6-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX6-NEXT: s_mov_b64 s[8:9], exec ; GFX6-NEXT: v_readfirstlane_b32 s10, v1 @@ -7800,7 +7800,7 @@ define amdgpu_kernel void @local_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) ; GFX6-NEXT: v_mov_b32_e32 v3, v4 ; GFX6-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GFX6-NEXT: s_cbranch_execnz .LBB28_6 -; GFX6-NEXT: .LBB28_7: ; %Flow20 +; GFX6-NEXT: .LBB28_7: ; %Flow19 ; GFX6-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x42280000, v0 @@ -7849,7 +7849,7 @@ define amdgpu_kernel void @local_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) ; GFX6-NEXT: s_cbranch_execnz .LBB28_11 ; GFX6-NEXT: ; %bb.12: ; %Flow ; GFX6-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX6-NEXT: .LBB28_13: ; %Flow18 +; GFX6-NEXT: .LBB28_13: ; %Flow17 ; GFX6-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX6-NEXT: v_readfirstlane_b32 s4, v2 @@ -8483,9 +8483,9 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs ; GFX7-NEXT: s_or_b64 s[8:9], s[0:1], s[8:9] ; GFX7-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GFX7-NEXT: s_cbranch_execnz .LBB29_2 -; GFX7-NEXT: ; %bb.3: ; %Flow23 +; GFX7-NEXT: ; %bb.3: ; %Flow22 ; GFX7-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX7-NEXT: .LBB29_4: ; %Flow24 +; GFX7-NEXT: .LBB29_4: ; %Flow23 ; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX7-NEXT: s_mov_b64 s[8:9], exec ; GFX7-NEXT: v_readfirstlane_b32 s10, v1 @@ -8513,7 +8513,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs ; GFX7-NEXT: v_mov_b32_e32 v3, v4 ; GFX7-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GFX7-NEXT: s_cbranch_execnz .LBB29_6 -; GFX7-NEXT: .LBB29_7: ; %Flow22 +; GFX7-NEXT: .LBB29_7: ; %Flow21 ; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX7-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; GFX7-NEXT: v_mul_f32_e32 v0, 0x42280000, v0 @@ -8562,7 +8562,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs ; GFX7-NEXT: s_cbranch_execnz .LBB29_11 ; GFX7-NEXT: ; %bb.12: ; %Flow ; GFX7-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX7-NEXT: .LBB29_13: ; %Flow20 +; GFX7-NEXT: .LBB29_13: ; %Flow19 ; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX7-NEXT: v_readfirstlane_b32 s4, v2 @@ -8607,9 +8607,9 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs ; GFX6-NEXT: s_or_b64 s[8:9], s[0:1], s[8:9] ; GFX6-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GFX6-NEXT: s_cbranch_execnz .LBB29_2 -; GFX6-NEXT: ; %bb.3: ; %Flow21 +; GFX6-NEXT: ; %bb.3: ; %Flow20 ; GFX6-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX6-NEXT: .LBB29_4: ; %Flow22 +; GFX6-NEXT: .LBB29_4: ; %Flow21 ; GFX6-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX6-NEXT: s_mov_b64 s[8:9], exec ; GFX6-NEXT: v_readfirstlane_b32 s10, v1 @@ -8637,7 +8637,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs ; GFX6-NEXT: v_mov_b32_e32 v3, v4 ; GFX6-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GFX6-NEXT: s_cbranch_execnz .LBB29_6 -; GFX6-NEXT: .LBB29_7: ; %Flow20 +; GFX6-NEXT: .LBB29_7: ; %Flow19 ; GFX6-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x42280000, v0 @@ -8686,7 +8686,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs ; GFX6-NEXT: s_cbranch_execnz .LBB29_11 ; GFX6-NEXT: ; %bb.12: ; %Flow ; GFX6-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX6-NEXT: .LBB29_13: ; %Flow18 +; GFX6-NEXT: .LBB29_13: ; %Flow17 ; GFX6-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX6-NEXT: v_readfirstlane_b32 s4, v2 diff --git a/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll b/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll index a9b8663a48dea..dad59daaefb5f 100644 --- a/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll +++ b/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll @@ -39,6 +39,7 @@ define amdgpu_kernel void @matmul_kernel(i32 %a0, i32 %a1) { ; GFX942-NEXT: s_cbranch_vccz .LBB0_1 ; GFX942-NEXT: ; %bb.3: ; GFX942-NEXT: ; implicit-def: $sgpr3 +; GFX942-NEXT: ; implicit-def: $agpr0 ; GFX942-NEXT: .LBB0_4: ; %common.ret ; GFX942-NEXT: s_endpgm ; @@ -79,6 +80,7 @@ define amdgpu_kernel void @matmul_kernel(i32 %a0, i32 %a1) { ; GFX908-NEXT: s_cbranch_vccz .LBB0_1 ; GFX908-NEXT: ; %bb.3: ; GFX908-NEXT: ; implicit-def: $sgpr3 +; GFX908-NEXT: ; implicit-def: $agpr0 ; GFX908-NEXT: .LBB0_4: ; %common.ret ; GFX908-NEXT: s_endpgm entry: diff --git a/llvm/test/CodeGen/AMDGPU/true16-saveexec.mir b/llvm/test/CodeGen/AMDGPU/true16-saveexec.mir new file mode 100644 index 0000000000000..c178083eecbce --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/true16-saveexec.mir @@ -0,0 +1,64 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -run-pass=si-optimize-exec-masking -o - %s | FileCheck %s + +--- +name: int +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: int + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr20 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; CHECK-NEXT: V_CMPX_LT_I16_t16_nosdst_e64 0, 15, 0, $vgpr20_lo16, 0, implicit-def $exec, implicit $exec + ; CHECK-NEXT: renamable $sgpr0_sgpr1 = S_XOR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: S_ENDPGM 0 + bb.1: + liveins: $vgpr20 + $vcc = V_CMP_LT_I16_t16_e64 0, 15, 0, $vgpr20_lo16, 0, implicit $exec + renamable $sgpr0_sgpr1 = COPY $exec, implicit-def $exec + renamable $sgpr2_sgpr3 = S_AND_B64 renamable $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc + renamable $sgpr0_sgpr1 = S_XOR_B64 renamable $sgpr2_sgpr3, killed renamable $sgpr0_sgpr1, implicit-def dead $scc + $exec = S_MOV_B64_term killed renamable $sgpr2_sgpr3 + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... + +--- +name: float +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: float + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr20 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; CHECK-NEXT: V_CMPX_LT_F16_t16_nosdst_e64 0, 15, 0, $vgpr20_lo16, 1, 0, implicit-def $exec, implicit $mode, implicit $exec + ; CHECK-NEXT: renamable $sgpr0_sgpr1 = S_XOR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: S_ENDPGM 0 + bb.1: + liveins: $vgpr20 + $vcc = V_CMP_LT_F16_t16_e64 0, 15, 0, $vgpr20_lo16, 1, 0, implicit $exec, implicit $mode + renamable $sgpr0_sgpr1 = COPY $exec, implicit-def $exec + renamable $sgpr2_sgpr3 = S_AND_B64 renamable $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc + renamable $sgpr0_sgpr1 = S_XOR_B64 renamable $sgpr2_sgpr3, killed renamable $sgpr0_sgpr1, implicit-def dead $scc + $exec = S_MOV_B64_term killed renamable $sgpr2_sgpr3 + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir b/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir index 62ede3b9eef3b..400bff47c8f2e 100644 --- a/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir +++ b/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir @@ -16,7 +16,7 @@ registers: - { id: 1, class: float32regs } body: | bb.0.entry: - %0 = LD_f32_asi 0, 4, 1, 2, 32, &test_param_0, 0 + %0 = LD_f32 0, 4, 1, 2, 32, &test_param_0, 0 ; CHECK: [[@LINE+1]]:33: expected a floating point literal %1 = FADD_rnf32ri %0, float 3 StoreRetvalF32 %1, 0 diff --git a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir index 69c1e25a06024..486c6ca16a531 100644 --- a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir +++ b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir @@ -40,9 +40,9 @@ registers: - { id: 7, class: float32regs } body: | bb.0.entry: - %0 = LD_f32_asi 0, 0, 4, 1, 2, 32, &test_param_0, 0 + %0 = LD_f32 0, 0, 4, 1, 2, 32, &test_param_0, 0 %1 = CVT_f64_f32 %0, 0 - %2 = LD_i32_asi 0, 0, 4, 1, 0, 32, &test_param_1, 0 + %2 = LD_i32 0, 0, 4, 1, 0, 32, &test_param_1, 0 ; CHECK: %3:float64regs = FADD_rnf64ri %1, double 3.250000e+00 %3 = FADD_rnf64ri %1, double 3.250000e+00 %4 = CVT_f32_f64 %3, 5 @@ -66,9 +66,9 @@ registers: - { id: 7, class: float32regs } body: | bb.0.entry: - %0 = LD_f32_asi 0, 0, 4, 1, 2, 32, &test2_param_0, 0 + %0 = LD_f32 0, 0, 4, 1, 2, 32, &test2_param_0, 0 %1 = CVT_f64_f32 %0, 0 - %2 = LD_i32_asi 0, 0, 4, 1, 0, 32, &test2_param_1, 0 + %2 = LD_i32 0, 0, 4, 1, 0, 32, &test2_param_1, 0 ; CHECK: %3:float64regs = FADD_rnf64ri %1, double 0x7FF8000000000000 %3 = FADD_rnf64ri %1, double 0x7FF8000000000000 %4 = CVT_f32_f64 %3, 5 diff --git a/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir b/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir index cc9a36509db33..114b0f9702033 100644 --- a/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir +++ b/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir @@ -16,7 +16,7 @@ registers: - { id: 1, class: float32regs } body: | bb.0.entry: - %0 = LD_f32_asi 0, 4, 1, 2, 32, &test_param_0, 0 + %0 = LD_f32 0, 4, 1, 2, 32, &test_param_0, 0 ; CHECK: [[@LINE+1]]:33: floating point constant does not have type 'float' %1 = FADD_rnf32ri %0, float 0xH3C00 StoreRetvalF32 %1, 0 diff --git a/llvm/test/CodeGen/PowerPC/v1024ls.ll b/llvm/test/CodeGen/PowerPC/v1024ls.ll new file mode 100644 index 0000000000000..c7f6911f9ddbc --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/v1024ls.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE + +define void @v1024ls(ptr nocapture readonly %vqp, ptr nocapture %resp) { +; CHECK-LABEL: v1024ls: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r3) +; CHECK-NEXT: lxvp vsp36, 96(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-NEXT: stxvp vsp34, 96(r4) +; CHECK-NEXT: stxvp vsp36, 64(r4) +; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-NEXT: stxvp vsp34, 32(r4) +; CHECK-NEXT: stxvp vsp36, 0(r4) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: v1024ls: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: lxvp vsp34, 32(r3) +; CHECK-BE-NEXT: lxvp vsp36, 0(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r4) +; CHECK-BE-NEXT: stxvp vsp34, 64(r4) +; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r4) +; CHECK-BE-NEXT: stxvp vsp34, 0(r4) +; CHECK-BE-NEXT: blr +entry: + %0 = load <1024 x i1>, ptr %vqp, align 64 + store <1024 x i1> %0, ptr %resp, align 64 + ret void +} + +declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz() diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll new file mode 100644 index 0000000000000..1c42cd29deca9 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll @@ -0,0 +1,136 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +define bfloat @vreduce_fmin_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmin_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vector.reduce.fmin.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmax_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmax_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vector.reduce.fmax.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmin_nnan_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmin_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vector.reduce.fmin.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmax_nnan_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmax_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vector.reduce.fmax.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fminimum_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fminimum_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfne.vv v8, v10, v10 +; CHECK-NEXT: vcpop.m a0, v8 +; CHECK-NEXT: beqz a0, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: lui a0, 523264 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: vfredmin.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vector.reduce.fminimum.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmaximum_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmaximum_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfne.vv v8, v10, v10 +; CHECK-NEXT: vcpop.m a0, v8 +; CHECK-NEXT: beqz a0, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: lui a0, 523264 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: vfredmax.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vector.reduce.fmaximum.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fminimum_nnan_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fminimum_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vector.reduce.fminimum.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmaximum_nnan_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmaximum_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vector.reduce.fmaximum.nxv4bf16( %val) + ret bfloat %s +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll new file mode 100644 index 0000000000000..e269b13137d44 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll @@ -0,0 +1,212 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN + +define half @vreduce_fmin_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmin_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmin_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vector.reduce.fmin.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmax_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmax_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmax_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vector.reduce.fmax.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmin_nnan_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmin_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmin_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vector.reduce.fmin.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmax_nnan_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmax_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmax_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vector.reduce.fmax.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fminimum_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fminimum_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmfne.vv v9, v8, v8 +; ZVFH-NEXT: vcpop.m a0, v9 +; ZVFH-NEXT: beqz a0, .LBB4_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa0, %lo(.LCPI4_0)(a0) +; ZVFH-NEXT: ret +; ZVFH-NEXT: .LBB4_2: +; ZVFH-NEXT: vfredmin.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fminimum_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10 +; ZVFHMIN-NEXT: vcpop.m a0, v8 +; ZVFHMIN-NEXT: beqz a0, .LBB4_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: lui a0, 523264 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret +; ZVFHMIN-NEXT: .LBB4_2: +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vector.reduce.fminimum.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmaximum_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmaximum_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmfne.vv v9, v8, v8 +; ZVFH-NEXT: vcpop.m a0, v9 +; ZVFH-NEXT: beqz a0, .LBB5_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa0, %lo(.LCPI5_0)(a0) +; ZVFH-NEXT: ret +; ZVFH-NEXT: .LBB5_2: +; ZVFH-NEXT: vfredmax.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmaximum_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10 +; ZVFHMIN-NEXT: vcpop.m a0, v8 +; ZVFHMIN-NEXT: beqz a0, .LBB5_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: lui a0, 523264 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret +; ZVFHMIN-NEXT: .LBB5_2: +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vector.reduce.fmaximum.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fminimum_nnan_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fminimum_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fminimum_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vector.reduce.fminimum.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmaximum_nnan_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmaximum_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmaximum_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vector.reduce.fmaximum.nxv4f16( %val) + ret half %s +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll new file mode 100644 index 0000000000000..37bd0a0496dcf --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll @@ -0,0 +1,167 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +define bfloat @vpreduce_fmin_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmin_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vp.reduce.fmin.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmax_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmax_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vp.reduce.fmax.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmin_nnan_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmin_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vp.reduce.fmin.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmax_nnan_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmax_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vp.reduce.fmax.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fminimum_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fminimum_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfne.vv v8, v10, v10, v0.t +; CHECK-NEXT: feq.s a1, fa5, fa5 +; CHECK-NEXT: vcpop.m a2, v8, v0.t +; CHECK-NEXT: xori a1, a1, 1 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: lui a0, 523264 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vp.reduce.fminimum.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmaximum_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmaximum_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfne.vv v8, v10, v10, v0.t +; CHECK-NEXT: feq.s a1, fa5, fa5 +; CHECK-NEXT: vcpop.m a2, v8, v0.t +; CHECK-NEXT: xori a1, a1, 1 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: lui a0, 523264 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vp.reduce.fmaximum.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fminimum_nnan_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fminimum_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vp.reduce.fminimum.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmaximum_nnan_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmaximum_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vp.reduce.fmaximum.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll new file mode 100644 index 0000000000000..8993bf8a767d8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll @@ -0,0 +1,269 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN + +define half @vpreduce_fmin_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmin_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmin_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vp.reduce.fmin.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmax_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmax_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmax_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vp.reduce.fmax.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmin_nnan_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmin_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmin_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vp.reduce.fmin.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmax_nnan_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmax_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmax_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vp.reduce.fmax.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fminimum_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fminimum_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vmfne.vv v9, v8, v8, v0.t +; ZVFH-NEXT: fcvt.s.h fa5, fa0 +; ZVFH-NEXT: vcpop.m a1, v9, v0.t +; ZVFH-NEXT: feq.s a2, fa5, fa5 +; ZVFH-NEXT: xori a2, a2, 1 +; ZVFH-NEXT: or a1, a1, a2 +; ZVFH-NEXT: beqz a1, .LBB4_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa0, %lo(.LCPI4_0)(a0) +; ZVFH-NEXT: ret +; ZVFH-NEXT: .LBB4_2: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fminimum_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: feq.s a1, fa5, fa5 +; ZVFHMIN-NEXT: vcpop.m a2, v8, v0.t +; ZVFHMIN-NEXT: xori a1, a1, 1 +; ZVFHMIN-NEXT: or a1, a2, a1 +; ZVFHMIN-NEXT: beqz a1, .LBB4_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: lui a0, 523264 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret +; ZVFHMIN-NEXT: .LBB4_2: +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vp.reduce.fminimum.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmaximum_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmaximum_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vmfne.vv v9, v8, v8, v0.t +; ZVFH-NEXT: fcvt.s.h fa5, fa0 +; ZVFH-NEXT: vcpop.m a1, v9, v0.t +; ZVFH-NEXT: feq.s a2, fa5, fa5 +; ZVFH-NEXT: xori a2, a2, 1 +; ZVFH-NEXT: or a1, a1, a2 +; ZVFH-NEXT: beqz a1, .LBB5_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa0, %lo(.LCPI5_0)(a0) +; ZVFH-NEXT: ret +; ZVFH-NEXT: .LBB5_2: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmaximum_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: feq.s a1, fa5, fa5 +; ZVFHMIN-NEXT: vcpop.m a2, v8, v0.t +; ZVFHMIN-NEXT: xori a1, a1, 1 +; ZVFHMIN-NEXT: or a1, a2, a1 +; ZVFHMIN-NEXT: beqz a1, .LBB5_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: lui a0, 523264 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret +; ZVFHMIN-NEXT: .LBB5_2: +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vp.reduce.fmaximum.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fminimum_nnan_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fminimum_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fminimum_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vp.reduce.fminimum.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmaximum_nnan_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmaximum_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmaximum_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vp.reduce.fmaximum.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} diff --git a/llvm/test/CodeGen/SystemZ/cond-move-10.mir b/llvm/test/CodeGen/SystemZ/cond-move-10.mir index 1db960829729e..7a27d8b02271f 100644 --- a/llvm/test/CodeGen/SystemZ/cond-move-10.mir +++ b/llvm/test/CodeGen/SystemZ/cond-move-10.mir @@ -5,7 +5,7 @@ # CHECK: name: fun0 # CHECK: renamable $r1l = AHIMuxK killed renamable $r1l, -1, implicit-def dead $cc # CHECK-NEXT: CHIMux renamable $r5h, 9, implicit-def $cc -# CHECK-NEXT: $r14h = COPY killed renamable $r1l +# CHECK-NEXT: $r14h = COPY renamable $r1l --- name: fun0 tracksRegLiveness: true diff --git a/llvm/test/CodeGen/SystemZ/cond-move-11.mir b/llvm/test/CodeGen/SystemZ/cond-move-11.mir new file mode 100644 index 0000000000000..aea2fabf9e536 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/cond-move-11.mir @@ -0,0 +1,43 @@ +# RUN: llc -o - %s -mtriple=s390x-linux-gnu -mcpu=z15 -start-before=systemz-post-rewrite \ +# RUN: -stop-after=machine-cp -verify-machineinstrs 2>&1 | FileCheck %s + +# The chained SELRMux:es both has two operands with the same register but +# where one of the operands have been marked as undef (resulting from +# early-ifcvt). Check that the resulting COPY after machine-cp is from $r0l +# to $r2l. + +# CHECK: name: fun0 +# CHECK: $r2l = COPY $r0l +--- | + + @Res = global i32 0, align 4 + @Z = global i32 0, align 4 + define signext i32 @fun0() { ret i32 0 } +... +--- +name: fun0 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1(0x80000000) + + renamable $r0l = LRL @Z :: (dereferenceable load (s32) from @Z) + renamable $r1l = LHIMux 1 + + bb.1: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $r0l, $r1l + + CHIMux renamable $r1l, 0, implicit-def $cc + renamable $r2l = SELRMux undef renamable $r0l, renamable $r0l, 14, 6, implicit $cc + renamable $r2l = SELRMux undef renamable $r2l, killed renamable $r2l, 14, 6, implicit $cc + BRC 14, 8, %bb.1, implicit killed $cc + J %bb.2 + + bb.2: + liveins: $r2l + + STRL renamable $r2l, @Res :: (store (s32) into @Res) + renamable $r2d = LGFR killed renamable $r2l + Return implicit $r2d +... diff --git a/llvm/test/CodeGen/X86/stack-protector-phi.ll b/llvm/test/CodeGen/X86/stack-protector-phi.ll new file mode 100644 index 0000000000000..bf0442dbf47a1 --- /dev/null +++ b/llvm/test/CodeGen/X86/stack-protector-phi.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define void @test_phi_diff_size(i1 %c) sspstrong { +; CHECK-LABEL: test_phi_diff_size: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # %bb.2: # %if +; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: movq $0, (%rax) +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: movq $0, (%rax) +; CHECK-NEXT: retq +entry: + %a = alloca i64 + br i1 %c, label %if, label %join + +if: + %gep = getelementptr i8, ptr %a, i64 4 + br label %join + +join: + %phi = phi ptr [ %a, %entry ], [ %gep, %if ] + store i64 0, ptr %phi + ret void +} + +define void @test_phi_loop(i1 %c) sspstrong { +; CHECK-LABEL: test_phi_loop: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: andq $-131072, %rsp # imm = 0xFFFE0000 +; CHECK-NEXT: subq $262144, %rsp # imm = 0x40000 +; CHECK-NEXT: movq %rsp, %rax +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB1_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq $0, (%rax) +; CHECK-NEXT: addq $4, %rax +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: jne .LBB1_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq +entry: + %a = alloca <10000 x i64> + br label %loop + +loop: + %phi = phi ptr [ %a, %entry ], [ %gep, %loop ] + store i64 0, ptr %phi + %gep = getelementptr i8, ptr %phi, i64 4 + br i1 %c, label %loop, label %exit + +exit: + ret void +} diff --git a/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_ptrauth-null-global.s b/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_ptrauth-null-global.s new file mode 100644 index 0000000000000..22ccc1630c63d --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_ptrauth-null-global.s @@ -0,0 +1,34 @@ +# RUN: llvm-mc -triple=arm64e-apple-macosx -filetype=obj -o %t.o %s +# RUN: llvm-jitlink %t.o +# +# REQUIRES: system-darwin && host=arm64{{.*}} +# +# Check that arm64e ptrauth pass preserves nulls. +# +# Testcase derived from: +# extern void __attribute__((weak_import)) f(void); +# void (*p) = &f; +# +# int main(int argc, char *argv[]) { +# return p ? 1 : 0; +# } + + .section __TEXT,__text,regular,pure_instructions + .globl _main + .p2align 2 +_main: + adrp x8, _p@PAGE + ldr x8, [x8, _p@PAGEOFF] + cmp x8, #0 + cset w0, ne + ret + + .section __DATA,__data + .globl _p + .p2align 3, 0x0 +_p: + .quad _f@AUTH(ia,0) + + .weak_reference _f + .weak_reference l_f.ptrauth +.subsections_via_symbols diff --git a/llvm/test/Instrumentation/AddressSanitizer/X86/bug_124238.ll b/llvm/test/Instrumentation/AddressSanitizer/X86/bug_124238.ll new file mode 100644 index 0000000000000..ce82bc48563a0 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/X86/bug_124238.ll @@ -0,0 +1,60 @@ +; RUN: opt -passes=asan %s -S | FileCheck %s + +;; Punt AddressSanitizer::instrumentMemIntrinsics out for MemIntrinsics +;; that need write to unsupported registers on X86 +;; PR124238: https://www.github.com/llvm/llvm-project/issues/124238 + +target triple = "x86_64-unknown-linux-gnu" + +$.str.658906a285b7a0f82dabd9915e07848c = comdat any +@.str = internal constant { [2 x i8], [30 x i8] } { [2 x i8] c"x\00", [30 x i8] zeroinitializer }, comdat($.str.658906a285b7a0f82dabd9915e07848c), align 32 +@0 = private alias { [2 x i8], [30 x i8] }, ptr @.str + +define void @test_memcpy(i64 noundef %addr) sanitize_address #0 { +entry: + %addr.addr = alloca i64, align 8 + store i64 %addr, ptr %addr.addr, align 8 + %0 = load i64, ptr %addr.addr, align 8 + %1 = inttoptr i64 %0 to ptr addrspace(257) + call void @llvm.memcpy.p257.p0.i64(ptr addrspace(257) align 1 %1, ptr align 1 @.str, i64 1, i1 false) +; CHECK: llvm.memcpy + %2 = load i64, ptr %addr.addr, align 8 + %3 = inttoptr i64 %2 to ptr addrspace(256) + call void @llvm.memcpy.p256.p0.i64(ptr addrspace(256) align 1 %3, ptr align 1 @.str, i64 1, i1 false) +; CHECK: llvm.memcpy + ret void +} + +define void @test_memset(i64 noundef %addr) sanitize_address #0 { +entry: + %addr.addr = alloca i64, align 8 + store i64 %addr, ptr %addr.addr, align 8 + %0 = load i64, ptr %addr.addr, align 8 + %1 = inttoptr i64 %0 to ptr addrspace(257) + call void @llvm.memset.p257.i64(ptr addrspace(257) align 1 %1, i8 0, i64 1, i1 false) +; CHECK: llvm.memset + %2 = load i64, ptr %addr.addr, align 8 + %3 = inttoptr i64 %2 to ptr addrspace(256) + call void @llvm.memset.p256.i64(ptr addrspace(256) align 1 %3, i8 0, i64 1, i1 false) +; CHECK: llvm.memset + ret void +} + +define void @test_memmove(i64 noundef %addr) sanitize_address #0 { +entry: + %addr.addr = alloca i64, align 8 + store i64 %addr, ptr %addr.addr, align 8 + %0 = load i64, ptr %addr.addr, align 8 + %1 = inttoptr i64 %0 to ptr addrspace(257) + %2 = load i64, ptr %addr.addr, align 8 + %3 = inttoptr i64 %2 to ptr + call void @llvm.memmove.p257.p0.i64(ptr addrspace(257) align 1 %1, ptr align 1 %3, i64 1, i1 false) +; CHECK: llvm.memmove + %4 = load i64, ptr %addr.addr, align 8 + %5 = inttoptr i64 %4 to ptr addrspace(256) + %6 = load i64, ptr %addr.addr, align 8 + %7 = inttoptr i64 %6 to ptr + call void @llvm.memmove.p256.p0.i64(ptr addrspace(256) align 1 %5, ptr align 1 %7, i64 1, i1 false) +; CHECK: llvm.memmove + ret void +} diff --git a/llvm/test/MC/RISCV/rv32xqccmp-invalid.s b/llvm/test/MC/RISCV/rv32xqccmp-invalid.s index 899979cb4c601..74f96f076756c 100644 --- a/llvm/test/MC/RISCV/rv32xqccmp-invalid.s +++ b/llvm/test/MC/RISCV/rv32xqccmp-invalid.s @@ -13,23 +13,23 @@ qc.cm.mva01s a1, a2 # CHECK-ERROR: error: invalid register list, {ra, s0-s10} or {x1, x8-x9, x18-x26} is not supported qc.cm.popretz {ra, s0-s10}, 112 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [16, 64] qc.cm.popretz {ra, s0-s1}, 112 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] qc.cm.push {ra}, 16 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] qc.cm.pushfp {ra, s0}, 16 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [16, 64] qc.cm.pop {ra, s0-s1}, -32 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] qc.cm.push {ra}, -8 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] qc.cm.pushfp {ra, s0}, -12 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [16, 64] qc.cm.pop {ra, s0-s1}, -40 diff --git a/llvm/test/MC/RISCV/rv32zcmp-invalid.s b/llvm/test/MC/RISCV/rv32zcmp-invalid.s index 0720a74a9b5c2..4115333fc738b 100644 --- a/llvm/test/MC/RISCV/rv32zcmp-invalid.s +++ b/llvm/test/MC/RISCV/rv32zcmp-invalid.s @@ -13,17 +13,17 @@ cm.mva01s a1, a2 # CHECK-ERROR: error: invalid register list, {ra, s0-s10} or {x1, x8-x9, x18-x26} is not supported cm.popretz {ra, s0-s10}, 112 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [16, 64] cm.popretz {ra, s0-s1}, 112 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] cm.push {ra}, 16 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [16, 64] cm.pop {ra, s0-s1}, -32 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] cm.push {ra}, -8 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [16, 64] cm.pop {ra, s0-s1}, -40 diff --git a/llvm/test/MC/RISCV/rv64xqccmp-invalid.s b/llvm/test/MC/RISCV/rv64xqccmp-invalid.s index e922572a44749..ba0ed29afa108 100644 --- a/llvm/test/MC/RISCV/rv64xqccmp-invalid.s +++ b/llvm/test/MC/RISCV/rv64xqccmp-invalid.s @@ -13,23 +13,23 @@ qc.cm.mva01s a1, a2 # CHECK-ERROR: error: invalid register list, {ra, s0-s10} or {x1, x8-x9, x18-x26} is not supported qc.cm.popretz {ra, s0-s10}, 112 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [32, 80] qc.cm.popretz {ra, s0-s1}, 112 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] qc.cm.push {ra}, 16 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] qc.cm.pushfp {ra, s0}, 16 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [32, 80] qc.cm.pop {ra, s0-s1}, -32 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] qc.cm.push {ra}, -15 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] qc.cm.push {ra, s0}, -15 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [32, 80] qc.cm.pop {ra, s0-s1}, -33 diff --git a/llvm/test/MC/RISCV/rv64zcmp-invalid.s b/llvm/test/MC/RISCV/rv64zcmp-invalid.s index 7e10ab5c2f902..804234d2c11e6 100644 --- a/llvm/test/MC/RISCV/rv64zcmp-invalid.s +++ b/llvm/test/MC/RISCV/rv64zcmp-invalid.s @@ -13,17 +13,17 @@ cm.mva01s a1, a2 # CHECK-ERROR: error: invalid register list, {ra, s0-s10} or {x1, x8-x9, x18-x26} is not supported cm.popretz {ra, s0-s10}, 112 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [32, 80] cm.popretz {ra, s0-s1}, 112 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] cm.push {ra}, 16 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [32, 80] cm.pop {ra, s0-s1}, -32 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] cm.push {ra}, -15 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [32, 80] cm.pop {ra, s0-s1}, -33 diff --git a/llvm/test/Other/print-inst-addrs.ll b/llvm/test/Other/print-inst-addrs.ll new file mode 100644 index 0000000000000..5907b30f0f12c --- /dev/null +++ b/llvm/test/Other/print-inst-addrs.ll @@ -0,0 +1,6 @@ +; RUN: opt -S -print-inst-addrs %s | FileCheck %s + +define void @foo() { + ; CHECK: ret void ; 0x + ret void +} diff --git a/llvm/test/Other/print-inst-debug-locs.ll b/llvm/test/Other/print-inst-debug-locs.ll new file mode 100644 index 0000000000000..93210527e27a7 --- /dev/null +++ b/llvm/test/Other/print-inst-debug-locs.ll @@ -0,0 +1,20 @@ +; RUN: opt -S -print-inst-debug-locs < %s | FileCheck %s + +define weak i32 @foo(i32 %a, i32 %b) !dbg !3 { +entry: + ; CHECK: call {{.*}} ; foo.c:52 + %sum = call i32 @fastadd(i32 %a, i32 %b), !dbg !DILocation(line: 52, scope: !3) + ; CHECK: ret {{.*}} ; foo.c:53 + ret i32 %sum, !dbg !DILocation(line: 53, scope: !3) +} + +declare i32 @fastadd(i32, i32) + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} + +!llvm.dbg.cu = !{!1} +!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, emissionKind: FullDebug) +!2 = !DIFile(filename: "foo.c", directory: "/path/to/dir") +!3 = distinct !DISubprogram(file: !2, scope: !2, line: 51, name: "foo", type: !4, unit: !1) +!4 = !DISubroutineType(types: !{}) diff --git a/llvm/test/Other/print-mi-addrs.ll b/llvm/test/Other/print-mi-addrs.ll new file mode 100644 index 0000000000000..5be006d9df282 --- /dev/null +++ b/llvm/test/Other/print-mi-addrs.ll @@ -0,0 +1,11 @@ +; RUN: llc -print-after=slotindexes -print-mi-addrs < %s 2>&1 | FileCheck %s +; REQUIRES: default_triple + +; CHECK: IR Dump {{.*}} +; CHECK: # Machine code for function foo{{.*}} + +define void @foo() { + ; CHECK: ; 0x + ret void +} + diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll index 2e67b637eebf6..b9d2aaf972b23 100644 --- a/llvm/test/Transforms/Attributor/nocapture-1.ll +++ b/llvm/test/Transforms/Attributor/nocapture-1.ll @@ -337,9 +337,9 @@ define void @nc4(ptr %p) { ret void } -define void @nc5(ptr %f, ptr %p) { -; CHECK-LABEL: define {{[^@]+}}@nc5 -; CHECK-SAME: (ptr nofree noundef nonnull captures(none) [[F:%.*]], ptr captures(none) [[P:%.*]]) { +define void @callsite_readonly_nounwind_not_willreturn(ptr %f, ptr %p) { +; CHECK-LABEL: define {{[^@]+}}@callsite_readonly_nounwind_not_willreturn +; CHECK-SAME: (ptr nofree noundef nonnull captures(none) [[F:%.*]], ptr [[P:%.*]]) { ; CHECK-NEXT: call void [[F]](ptr captures(none) [[P]]) ; CHECK-NEXT: ret void ; @@ -348,6 +348,17 @@ define void @nc5(ptr %f, ptr %p) { ret void } +define void @callsite_readonly_nounwind_willreturn(ptr %f, ptr %p) { +; CHECK-LABEL: define {{[^@]+}}@callsite_readonly_nounwind_willreturn +; CHECK-SAME: (ptr nofree noundef nonnull captures(none) [[F:%.*]], ptr captures(none) [[P:%.*]]) { +; CHECK-NEXT: call void [[F]](ptr captures(none) [[P]]) +; CHECK-NEXT: ret void +; + call void %f(ptr %p) readonly nounwind willreturn + call void %f(ptr nocapture %p) + ret void +} + ; It would be acceptable to add readnone to %y1_1 and %y1_2. define void @test1_1(ptr %x1_1, ptr %y1_1, i1 %c) { ; TUNIT: Function Attrs: nofree nosync nounwind memory(write) diff --git a/llvm/test/Transforms/FunctionAttrs/nocapture.ll b/llvm/test/Transforms/FunctionAttrs/nocapture.ll index cc23c435d96c6..dc1fdb6100aeb 100644 --- a/llvm/test/Transforms/FunctionAttrs/nocapture.ll +++ b/llvm/test/Transforms/FunctionAttrs/nocapture.ll @@ -46,7 +46,7 @@ define void @c3(ptr %q) { ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) ; ATTRIBUTOR-LABEL: define void @c3 ; ATTRIBUTOR-SAME: (ptr nofree writeonly [[Q:%.*]]) #[[ATTR1]] { -; ATTRIBUTOR-NEXT: call void @c2(ptr nofree writeonly [[Q]]) #[[ATTR16:[0-9]+]] +; ATTRIBUTOR-NEXT: call void @c2(ptr nofree writeonly [[Q]]) #[[ATTR18:[0-9]+]] ; ATTRIBUTOR-NEXT: ret void ; call void @c2(ptr %q) @@ -232,7 +232,7 @@ define i1 @c7(ptr %q, i32 %bitno) { ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read) ; ATTRIBUTOR-LABEL: define i1 @c7 ; ATTRIBUTOR-SAME: (ptr nofree readonly [[Q:%.*]], i32 [[BITNO:%.*]]) #[[ATTR2]] { -; ATTRIBUTOR-NEXT: [[PTR:%.*]] = call ptr @lookup_bit(ptr nofree readnone [[Q]], i32 [[BITNO]]) #[[ATTR17:[0-9]+]] +; ATTRIBUTOR-NEXT: [[PTR:%.*]] = call ptr @lookup_bit(ptr nofree readnone [[Q]], i32 [[BITNO]]) #[[ATTR19:[0-9]+]] ; ATTRIBUTOR-NEXT: [[VAL:%.*]] = load i1, ptr [[PTR]], align 1 ; ATTRIBUTOR-NEXT: ret i1 [[VAL]] ; @@ -337,7 +337,7 @@ define void @nc2(ptr %p, ptr %q) { ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn ; ATTRIBUTOR-LABEL: define void @nc2 ; ATTRIBUTOR-SAME: (ptr nofree captures(none) [[P:%.*]], ptr nofree [[Q:%.*]]) #[[ATTR5]] { -; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = call i32 @nc1(ptr nofree [[Q]], ptr nofree captures(none) [[P]], i1 false) #[[ATTR18:[0-9]+]] +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = call i32 @nc1(ptr nofree [[Q]], ptr nofree captures(none) [[P]], i1 false) #[[ATTR20:[0-9]+]] ; ATTRIBUTOR-NEXT: ret void ; %1 = call i32 @nc1(ptr %q, ptr %p, i1 0) ; [#uses=0] @@ -360,33 +360,51 @@ define void @nc3(ptr %p) { ret void } -declare void @external(ptr) readonly nounwind -define void @nc4(ptr %p) { +declare void @external_not_willreturn(ptr) readonly nounwind +define void @readonly_nounwind_not_willreturn(ptr %p) { ; FNATTRS: Function Attrs: nofree nounwind memory(read) -; FNATTRS-LABEL: define void @nc4 -; FNATTRS-SAME: (ptr readonly captures(none) [[P:%.*]]) #[[ATTR9:[0-9]+]] { -; FNATTRS-NEXT: call void @external(ptr [[P]]) +; FNATTRS-LABEL: define void @readonly_nounwind_not_willreturn +; FNATTRS-SAME: (ptr readonly [[P:%.*]]) #[[ATTR9:[0-9]+]] { +; FNATTRS-NEXT: call void @external_not_willreturn(ptr [[P]]) ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: nosync nounwind memory(read) -; ATTRIBUTOR-LABEL: define void @nc4 +; ATTRIBUTOR-LABEL: define void @readonly_nounwind_not_willreturn ; ATTRIBUTOR-SAME: (ptr readonly captures(none) [[P:%.*]]) #[[ATTR7:[0-9]+]] { -; ATTRIBUTOR-NEXT: call void @external(ptr readonly captures(none) [[P]]) #[[ATTR4]] +; ATTRIBUTOR-NEXT: call void @external_not_willreturn(ptr readonly captures(none) [[P]]) #[[ATTR4]] ; ATTRIBUTOR-NEXT: ret void ; - call void @external(ptr %p) + call void @external_not_willreturn(ptr %p) ret void } -define void @nc5(ptr %f, ptr %p) { -; FNATTRS-LABEL: define void @nc5 -; FNATTRS-SAME: (ptr readonly captures(none) [[F:%.*]], ptr captures(none) [[P:%.*]]) { +declare void @external_willreturn(ptr) readonly nounwind willreturn +define void @readonly_nounwind_willreturn(ptr %p) { +; FNATTRS: Function Attrs: mustprogress nofree nounwind willreturn memory(read) +; FNATTRS-LABEL: define void @readonly_nounwind_willreturn +; FNATTRS-SAME: (ptr readonly captures(none) [[P:%.*]]) #[[ATTR11:[0-9]+]] { +; FNATTRS-NEXT: call void @external_willreturn(ptr [[P]]) +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR: Function Attrs: mustprogress nosync nounwind willreturn memory(read) +; ATTRIBUTOR-LABEL: define void @readonly_nounwind_willreturn +; ATTRIBUTOR-SAME: (ptr readonly captures(none) [[P:%.*]]) #[[ATTR9:[0-9]+]] { +; ATTRIBUTOR-NEXT: call void @external_willreturn(ptr readonly captures(none) [[P]]) #[[ATTR21:[0-9]+]] +; ATTRIBUTOR-NEXT: ret void +; + call void @external_willreturn(ptr %p) + ret void +} + +define void @callsite_readonly_nounwind_not_willreturn(ptr %f, ptr %p) { +; FNATTRS-LABEL: define void @callsite_readonly_nounwind_not_willreturn +; FNATTRS-SAME: (ptr readonly captures(none) [[F:%.*]], ptr [[P:%.*]]) { ; FNATTRS-NEXT: call void [[F]](ptr [[P]]) #[[ATTR8:[0-9]+]] ; FNATTRS-NEXT: call void [[F]](ptr captures(none) [[P]]) ; FNATTRS-NEXT: ret void ; -; ATTRIBUTOR-LABEL: define void @nc5 -; ATTRIBUTOR-SAME: (ptr nofree nonnull captures(none) [[F:%.*]], ptr captures(none) [[P:%.*]]) { +; ATTRIBUTOR-LABEL: define void @callsite_readonly_nounwind_not_willreturn +; ATTRIBUTOR-SAME: (ptr nofree nonnull captures(none) [[F:%.*]], ptr [[P:%.*]]) { ; ATTRIBUTOR-NEXT: call void [[F]](ptr [[P]]) #[[ATTR6:[0-9]+]] ; ATTRIBUTOR-NEXT: call void [[F]](ptr captures(none) [[P]]) ; ATTRIBUTOR-NEXT: ret void @@ -396,19 +414,71 @@ define void @nc5(ptr %f, ptr %p) { ret void } +define void @callsite_readonly_nounwind_willreturn(ptr %f, ptr %p) { +; FNATTRS-LABEL: define void @callsite_readonly_nounwind_willreturn +; FNATTRS-SAME: (ptr readonly captures(none) [[F:%.*]], ptr captures(none) [[P:%.*]]) { +; FNATTRS-NEXT: call void [[F]](ptr [[P]]) #[[ATTR10:[0-9]+]] +; FNATTRS-NEXT: call void [[F]](ptr captures(none) [[P]]) +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define void @callsite_readonly_nounwind_willreturn +; ATTRIBUTOR-SAME: (ptr nofree nonnull captures(none) [[F:%.*]], ptr captures(none) [[P:%.*]]) { +; ATTRIBUTOR-NEXT: call void [[F]](ptr [[P]]) #[[ATTR8:[0-9]+]] +; ATTRIBUTOR-NEXT: call void [[F]](ptr captures(none) [[P]]) +; ATTRIBUTOR-NEXT: ret void +; + call void %f(ptr %p) readonly nounwind willreturn + call void %f(ptr nocapture %p) + ret void +} + +define void @self_readonly_nounwind_not_willreturn(ptr %p) readonly nounwind { +; FNATTRS: Function Attrs: nofree nounwind memory(read) +; FNATTRS-LABEL: define void @self_readonly_nounwind_not_willreturn +; FNATTRS-SAME: (ptr [[P:%.*]]) #[[ATTR9]] { +; FNATTRS-NEXT: call void @capture(ptr [[P]]) +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR: Function Attrs: nosync nounwind memory(read) +; ATTRIBUTOR-LABEL: define void @self_readonly_nounwind_not_willreturn +; ATTRIBUTOR-SAME: (ptr captures(none) [[P:%.*]]) #[[ATTR7]] { +; ATTRIBUTOR-NEXT: call void @capture(ptr [[P]]) +; ATTRIBUTOR-NEXT: ret void +; + call void @capture(ptr %p) + ret void +} + +define void @self_readonly_nounwind_willreturn(ptr %p) readonly nounwind willreturn { +; FNATTRS: Function Attrs: mustprogress nofree nounwind willreturn memory(read) +; FNATTRS-LABEL: define void @self_readonly_nounwind_willreturn +; FNATTRS-SAME: (ptr captures(none) [[P:%.*]]) #[[ATTR11]] { +; FNATTRS-NEXT: call void @capture(ptr [[P]]) +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR: Function Attrs: mustprogress nosync nounwind willreturn memory(read) +; ATTRIBUTOR-LABEL: define void @self_readonly_nounwind_willreturn +; ATTRIBUTOR-SAME: (ptr captures(none) [[P:%.*]]) #[[ATTR9]] { +; ATTRIBUTOR-NEXT: call void @capture(ptr [[P]]) +; ATTRIBUTOR-NEXT: ret void +; + call void @capture(ptr %p) + ret void +} + ; It would be acceptable to add readnone to %y1_1 and %y1_2. define void @test1_1(ptr %x1_1, ptr %y1_1, i1 %c) { ; FNATTRS: Function Attrs: nofree nosync nounwind memory(write, argmem: none, inaccessiblemem: none) ; FNATTRS-LABEL: define void @test1_1 -; FNATTRS-SAME: (ptr readnone captures(none) [[X1_1:%.*]], ptr [[Y1_1:%.*]], i1 [[C:%.*]]) #[[ATTR10:[0-9]+]] { +; FNATTRS-SAME: (ptr readnone captures(none) [[X1_1:%.*]], ptr [[Y1_1:%.*]], i1 [[C:%.*]]) #[[ATTR12:[0-9]+]] { ; FNATTRS-NEXT: [[TMP1:%.*]] = call ptr @test1_2(ptr [[X1_1]], ptr [[Y1_1]], i1 [[C]]) ; FNATTRS-NEXT: store ptr null, ptr @g, align 8 ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: nofree nosync nounwind memory(write) ; ATTRIBUTOR-LABEL: define void @test1_1 -; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X1_1:%.*]], ptr nofree readnone captures(none) [[Y1_1:%.*]], i1 [[C:%.*]]) #[[ATTR8:[0-9]+]] { -; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = call ptr @test1_2(ptr nofree readnone captures(none) [[X1_1]], ptr nofree readnone [[Y1_1]], i1 [[C]]) #[[ATTR8]] +; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X1_1:%.*]], ptr nofree readnone captures(none) [[Y1_1:%.*]], i1 [[C:%.*]]) #[[ATTR10:[0-9]+]] { +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = call ptr @test1_2(ptr nofree readnone captures(none) [[X1_1]], ptr nofree readnone [[Y1_1]], i1 [[C]]) #[[ATTR10]] ; ATTRIBUTOR-NEXT: store ptr null, ptr @g, align 8 ; ATTRIBUTOR-NEXT: ret void ; @@ -420,7 +490,7 @@ define void @test1_1(ptr %x1_1, ptr %y1_1, i1 %c) { define ptr @test1_2(ptr %x1_2, ptr %y1_2, i1 %c) { ; FNATTRS: Function Attrs: nofree nosync nounwind memory(write, argmem: none, inaccessiblemem: none) ; FNATTRS-LABEL: define ptr @test1_2 -; FNATTRS-SAME: (ptr readnone captures(none) [[X1_2:%.*]], ptr returned [[Y1_2:%.*]], i1 [[C:%.*]]) #[[ATTR10]] { +; FNATTRS-SAME: (ptr readnone captures(none) [[X1_2:%.*]], ptr returned [[Y1_2:%.*]], i1 [[C:%.*]]) #[[ATTR12]] { ; FNATTRS-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; FNATTRS: t: ; FNATTRS-NEXT: call void @test1_1(ptr [[X1_2]], ptr [[Y1_2]], i1 [[C]]) @@ -431,10 +501,10 @@ define ptr @test1_2(ptr %x1_2, ptr %y1_2, i1 %c) { ; ; ATTRIBUTOR: Function Attrs: nofree nosync nounwind memory(write) ; ATTRIBUTOR-LABEL: define ptr @test1_2 -; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X1_2:%.*]], ptr nofree readnone [[Y1_2:%.*]], i1 [[C:%.*]]) #[[ATTR8]] { +; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X1_2:%.*]], ptr nofree readnone [[Y1_2:%.*]], i1 [[C:%.*]]) #[[ATTR10]] { ; ATTRIBUTOR-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; ATTRIBUTOR: t: -; ATTRIBUTOR-NEXT: call void @test1_1(ptr nofree readnone captures(none) [[X1_2]], ptr nofree readnone captures(none) [[Y1_2]], i1 [[C]]) #[[ATTR8]] +; ATTRIBUTOR-NEXT: call void @test1_1(ptr nofree readnone captures(none) [[X1_2]], ptr nofree readnone captures(none) [[Y1_2]], i1 [[C]]) #[[ATTR10]] ; ATTRIBUTOR-NEXT: store ptr null, ptr @g, align 8 ; ATTRIBUTOR-NEXT: br label [[F]] ; ATTRIBUTOR: f: @@ -452,15 +522,15 @@ f: define void @test2(ptr %x2) { ; FNATTRS: Function Attrs: nofree nosync nounwind memory(write, argmem: none, inaccessiblemem: none) ; FNATTRS-LABEL: define void @test2 -; FNATTRS-SAME: (ptr readnone captures(none) [[X2:%.*]]) #[[ATTR10]] { +; FNATTRS-SAME: (ptr readnone captures(none) [[X2:%.*]]) #[[ATTR12]] { ; FNATTRS-NEXT: call void @test2(ptr [[X2]]) ; FNATTRS-NEXT: store ptr null, ptr @g, align 8 ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: nofree nosync nounwind memory(write) ; ATTRIBUTOR-LABEL: define void @test2 -; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X2:%.*]]) #[[ATTR8]] { -; ATTRIBUTOR-NEXT: call void @test2(ptr nofree readnone captures(none) [[X2]]) #[[ATTR8]] +; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X2:%.*]]) #[[ATTR10]] { +; ATTRIBUTOR-NEXT: call void @test2(ptr nofree readnone captures(none) [[X2]]) #[[ATTR10]] ; ATTRIBUTOR-NEXT: store ptr null, ptr @g, align 8 ; ATTRIBUTOR-NEXT: ret void ; @@ -472,15 +542,15 @@ define void @test2(ptr %x2) { define void @test3(ptr %x3, ptr %y3, ptr %z3) { ; FNATTRS: Function Attrs: nofree nosync nounwind memory(write, argmem: none, inaccessiblemem: none) ; FNATTRS-LABEL: define void @test3 -; FNATTRS-SAME: (ptr readnone captures(none) [[X3:%.*]], ptr readnone captures(none) [[Y3:%.*]], ptr readnone captures(none) [[Z3:%.*]]) #[[ATTR10]] { +; FNATTRS-SAME: (ptr readnone captures(none) [[X3:%.*]], ptr readnone captures(none) [[Y3:%.*]], ptr readnone captures(none) [[Z3:%.*]]) #[[ATTR12]] { ; FNATTRS-NEXT: call void @test3(ptr [[Z3]], ptr [[Y3]], ptr [[X3]]) ; FNATTRS-NEXT: store ptr null, ptr @g, align 8 ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: nofree nosync nounwind memory(write) ; ATTRIBUTOR-LABEL: define void @test3 -; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X3:%.*]], ptr nofree readnone captures(none) [[Y3:%.*]], ptr nofree readnone captures(none) [[Z3:%.*]]) #[[ATTR8]] { -; ATTRIBUTOR-NEXT: call void @test3(ptr nofree readnone captures(none) [[Z3]], ptr nofree readnone captures(none) [[Y3]], ptr nofree readnone captures(none) [[X3]]) #[[ATTR8]] +; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X3:%.*]], ptr nofree readnone captures(none) [[Y3:%.*]], ptr nofree readnone captures(none) [[Z3:%.*]]) #[[ATTR10]] { +; ATTRIBUTOR-NEXT: call void @test3(ptr nofree readnone captures(none) [[Z3]], ptr nofree readnone captures(none) [[Y3]], ptr nofree readnone captures(none) [[X3]]) #[[ATTR10]] ; ATTRIBUTOR-NEXT: store ptr null, ptr @g, align 8 ; ATTRIBUTOR-NEXT: ret void ; @@ -492,15 +562,15 @@ define void @test3(ptr %x3, ptr %y3, ptr %z3) { define void @test4_1(ptr %x4_1, i1 %c) { ; FNATTRS: Function Attrs: nofree nosync nounwind memory(write, argmem: none, inaccessiblemem: none) ; FNATTRS-LABEL: define void @test4_1 -; FNATTRS-SAME: (ptr [[X4_1:%.*]], i1 [[C:%.*]]) #[[ATTR10]] { +; FNATTRS-SAME: (ptr [[X4_1:%.*]], i1 [[C:%.*]]) #[[ATTR12]] { ; FNATTRS-NEXT: [[TMP1:%.*]] = call ptr @test4_2(ptr [[X4_1]], ptr [[X4_1]], ptr [[X4_1]], i1 [[C]]) ; FNATTRS-NEXT: store ptr null, ptr @g, align 8 ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: nofree nosync nounwind memory(write) ; ATTRIBUTOR-LABEL: define void @test4_1 -; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X4_1:%.*]], i1 [[C:%.*]]) #[[ATTR8]] { -; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = call ptr @test4_2(ptr nofree readnone captures(none) [[X4_1]], ptr nofree readnone [[X4_1]], ptr nofree readnone captures(none) [[X4_1]], i1 [[C]]) #[[ATTR8]] +; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X4_1:%.*]], i1 [[C:%.*]]) #[[ATTR10]] { +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = call ptr @test4_2(ptr nofree readnone captures(none) [[X4_1]], ptr nofree readnone [[X4_1]], ptr nofree readnone captures(none) [[X4_1]], i1 [[C]]) #[[ATTR10]] ; ATTRIBUTOR-NEXT: store ptr null, ptr @g, align 8 ; ATTRIBUTOR-NEXT: ret void ; @@ -512,7 +582,7 @@ define void @test4_1(ptr %x4_1, i1 %c) { define ptr @test4_2(ptr %x4_2, ptr %y4_2, ptr %z4_2, i1 %c) { ; FNATTRS: Function Attrs: nofree nosync nounwind memory(write, argmem: none, inaccessiblemem: none) ; FNATTRS-LABEL: define ptr @test4_2 -; FNATTRS-SAME: (ptr readnone captures(none) [[X4_2:%.*]], ptr readnone returned captures(ret: address, provenance) [[Y4_2:%.*]], ptr readnone captures(none) [[Z4_2:%.*]], i1 [[C:%.*]]) #[[ATTR10]] { +; FNATTRS-SAME: (ptr readnone captures(none) [[X4_2:%.*]], ptr readnone returned captures(ret: address, provenance) [[Y4_2:%.*]], ptr readnone captures(none) [[Z4_2:%.*]], i1 [[C:%.*]]) #[[ATTR12]] { ; FNATTRS-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; FNATTRS: t: ; FNATTRS-NEXT: call void @test4_1(ptr null, i1 [[C]]) @@ -523,10 +593,10 @@ define ptr @test4_2(ptr %x4_2, ptr %y4_2, ptr %z4_2, i1 %c) { ; ; ATTRIBUTOR: Function Attrs: nofree nosync nounwind memory(write) ; ATTRIBUTOR-LABEL: define ptr @test4_2 -; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X4_2:%.*]], ptr nofree readnone [[Y4_2:%.*]], ptr nofree readnone captures(none) [[Z4_2:%.*]], i1 [[C:%.*]]) #[[ATTR8]] { +; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X4_2:%.*]], ptr nofree readnone [[Y4_2:%.*]], ptr nofree readnone captures(none) [[Z4_2:%.*]], i1 [[C:%.*]]) #[[ATTR10]] { ; ATTRIBUTOR-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; ATTRIBUTOR: t: -; ATTRIBUTOR-NEXT: call void @test4_1(ptr nofree readnone null, i1 [[C]]) #[[ATTR8]] +; ATTRIBUTOR-NEXT: call void @test4_1(ptr nofree readnone null, i1 [[C]]) #[[ATTR10]] ; ATTRIBUTOR-NEXT: store ptr null, ptr @g, align 8 ; ATTRIBUTOR-NEXT: br label [[F]] ; ATTRIBUTOR: f: @@ -578,13 +648,13 @@ define void @test6_2(ptr %x6_2, ptr %y6_2, ptr %z6_2) { define void @test_cmpxchg(ptr %p) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; FNATTRS-LABEL: define void @test_cmpxchg -; FNATTRS-SAME: (ptr captures(none) [[P:%.*]]) #[[ATTR11:[0-9]+]] { +; FNATTRS-SAME: (ptr captures(none) [[P:%.*]]) #[[ATTR13:[0-9]+]] { ; FNATTRS-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], i32 0, i32 1 acquire monotonic, align 4 ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; ATTRIBUTOR-LABEL: define void @test_cmpxchg -; ATTRIBUTOR-SAME: (ptr nofree nonnull captures(none) [[P:%.*]]) #[[ATTR9:[0-9]+]] { +; ATTRIBUTOR-SAME: (ptr nofree nonnull captures(none) [[P:%.*]]) #[[ATTR11:[0-9]+]] { ; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], i32 0, i32 1 acquire monotonic, align 4 ; ATTRIBUTOR-NEXT: ret void ; @@ -595,13 +665,13 @@ define void @test_cmpxchg(ptr %p) { define void @test_cmpxchg_ptr(ptr %p, ptr %q) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; FNATTRS-LABEL: define void @test_cmpxchg_ptr -; FNATTRS-SAME: (ptr captures(none) [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR11]] { +; FNATTRS-SAME: (ptr captures(none) [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR13]] { ; FNATTRS-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], ptr null, ptr [[Q]] acquire monotonic, align 8 ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; ATTRIBUTOR-LABEL: define void @test_cmpxchg_ptr -; ATTRIBUTOR-SAME: (ptr nofree nonnull captures(none) [[P:%.*]], ptr nofree [[Q:%.*]]) #[[ATTR9]] { +; ATTRIBUTOR-SAME: (ptr nofree nonnull captures(none) [[P:%.*]], ptr nofree [[Q:%.*]]) #[[ATTR11]] { ; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], ptr null, ptr [[Q]] acquire monotonic, align 8 ; ATTRIBUTOR-NEXT: ret void ; @@ -612,13 +682,13 @@ define void @test_cmpxchg_ptr(ptr %p, ptr %q) { define void @test_atomicrmw(ptr %p) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; FNATTRS-LABEL: define void @test_atomicrmw -; FNATTRS-SAME: (ptr captures(none) [[P:%.*]]) #[[ATTR11]] { +; FNATTRS-SAME: (ptr captures(none) [[P:%.*]]) #[[ATTR13]] { ; FNATTRS-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[P]], i32 1 seq_cst, align 4 ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; ATTRIBUTOR-LABEL: define void @test_atomicrmw -; ATTRIBUTOR-SAME: (ptr nofree nonnull captures(none) [[P:%.*]]) #[[ATTR9]] { +; ATTRIBUTOR-SAME: (ptr nofree nonnull captures(none) [[P:%.*]]) #[[ATTR11]] { ; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[P]], i32 1 seq_cst, align 4 ; ATTRIBUTOR-NEXT: ret void ; @@ -629,7 +699,7 @@ define void @test_atomicrmw(ptr %p) { define void @test_volatile(ptr %x) { ; FNATTRS: Function Attrs: nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) ; FNATTRS-LABEL: define void @test_volatile -; FNATTRS-SAME: (ptr [[X:%.*]]) #[[ATTR12:[0-9]+]] { +; FNATTRS-SAME: (ptr [[X:%.*]]) #[[ATTR14:[0-9]+]] { ; FNATTRS-NEXT: entry: ; FNATTRS-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[X]], i64 1 ; FNATTRS-NEXT: store volatile i32 0, ptr [[GEP]], align 4 @@ -637,7 +707,7 @@ define void @test_volatile(ptr %x) { ; ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; ATTRIBUTOR-LABEL: define void @test_volatile -; ATTRIBUTOR-SAME: (ptr nofree [[X:%.*]]) #[[ATTR9]] { +; ATTRIBUTOR-SAME: (ptr nofree [[X:%.*]]) #[[ATTR11]] { ; ATTRIBUTOR-NEXT: entry: ; ATTRIBUTOR-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[X]], i64 1 ; ATTRIBUTOR-NEXT: store volatile i32 0, ptr [[GEP]], align 4 @@ -652,7 +722,7 @@ entry: define void @nocaptureLaunder(ptr %p) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write, inaccessiblemem: readwrite) ; FNATTRS-LABEL: define void @nocaptureLaunder -; FNATTRS-SAME: (ptr writeonly captures(none) [[P:%.*]]) #[[ATTR13:[0-9]+]] { +; FNATTRS-SAME: (ptr writeonly captures(none) [[P:%.*]]) #[[ATTR15:[0-9]+]] { ; FNATTRS-NEXT: entry: ; FNATTRS-NEXT: [[B:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[P]]) ; FNATTRS-NEXT: store i8 42, ptr [[B]], align 1 @@ -660,9 +730,9 @@ define void @nocaptureLaunder(ptr %p) { ; ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; ATTRIBUTOR-LABEL: define void @nocaptureLaunder -; ATTRIBUTOR-SAME: (ptr nofree captures(none) [[P:%.*]]) #[[ATTR10:[0-9]+]] { +; ATTRIBUTOR-SAME: (ptr nofree captures(none) [[P:%.*]]) #[[ATTR12:[0-9]+]] { ; ATTRIBUTOR-NEXT: entry: -; ATTRIBUTOR-NEXT: [[B:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[P]]) #[[ATTR19:[0-9]+]] +; ATTRIBUTOR-NEXT: [[B:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[P]]) #[[ATTR22:[0-9]+]] ; ATTRIBUTOR-NEXT: store i8 42, ptr [[B]], align 1 ; ATTRIBUTOR-NEXT: ret void ; @@ -676,7 +746,7 @@ entry: define void @captureLaunder(ptr %p) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write, argmem: none, inaccessiblemem: readwrite) ; FNATTRS-LABEL: define void @captureLaunder -; FNATTRS-SAME: (ptr [[P:%.*]]) #[[ATTR14:[0-9]+]] { +; FNATTRS-SAME: (ptr [[P:%.*]]) #[[ATTR16:[0-9]+]] { ; FNATTRS-NEXT: [[B:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[P]]) ; FNATTRS-NEXT: store ptr [[B]], ptr @g2, align 8 ; FNATTRS-NEXT: ret void @@ -684,7 +754,7 @@ define void @captureLaunder(ptr %p) { ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn ; ATTRIBUTOR-LABEL: define void @captureLaunder ; ATTRIBUTOR-SAME: (ptr nofree [[P:%.*]]) #[[ATTR5]] { -; ATTRIBUTOR-NEXT: [[B:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[P]]) #[[ATTR19]] +; ATTRIBUTOR-NEXT: [[B:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[P]]) #[[ATTR22]] ; ATTRIBUTOR-NEXT: store ptr [[B]], ptr @g2, align 8 ; ATTRIBUTOR-NEXT: ret void ; @@ -696,7 +766,7 @@ define void @captureLaunder(ptr %p) { define void @nocaptureStrip(ptr %p) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; FNATTRS-LABEL: define void @nocaptureStrip -; FNATTRS-SAME: (ptr writeonly captures(none) [[P:%.*]]) #[[ATTR15:[0-9]+]] { +; FNATTRS-SAME: (ptr writeonly captures(none) [[P:%.*]]) #[[ATTR17:[0-9]+]] { ; FNATTRS-NEXT: entry: ; FNATTRS-NEXT: [[B:%.*]] = call ptr @llvm.strip.invariant.group.p0(ptr [[P]]) ; FNATTRS-NEXT: store i8 42, ptr [[B]], align 1 @@ -704,9 +774,9 @@ define void @nocaptureStrip(ptr %p) { ; ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; ATTRIBUTOR-LABEL: define void @nocaptureStrip -; ATTRIBUTOR-SAME: (ptr nofree writeonly captures(none) [[P:%.*]]) #[[ATTR11:[0-9]+]] { +; ATTRIBUTOR-SAME: (ptr nofree writeonly captures(none) [[P:%.*]]) #[[ATTR13:[0-9]+]] { ; ATTRIBUTOR-NEXT: entry: -; ATTRIBUTOR-NEXT: [[B:%.*]] = call ptr @llvm.strip.invariant.group.p0(ptr [[P]]) #[[ATTR17]] +; ATTRIBUTOR-NEXT: [[B:%.*]] = call ptr @llvm.strip.invariant.group.p0(ptr [[P]]) #[[ATTR19]] ; ATTRIBUTOR-NEXT: store i8 42, ptr [[B]], align 1 ; ATTRIBUTOR-NEXT: ret void ; @@ -728,7 +798,7 @@ define void @captureStrip(ptr %p) { ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) ; ATTRIBUTOR-LABEL: define void @captureStrip ; ATTRIBUTOR-SAME: (ptr nofree writeonly [[P:%.*]]) #[[ATTR1]] { -; ATTRIBUTOR-NEXT: [[B:%.*]] = call ptr @llvm.strip.invariant.group.p0(ptr [[P]]) #[[ATTR17]] +; ATTRIBUTOR-NEXT: [[B:%.*]] = call ptr @llvm.strip.invariant.group.p0(ptr [[P]]) #[[ATTR19]] ; ATTRIBUTOR-NEXT: store ptr [[B]], ptr @g3, align 8 ; ATTRIBUTOR-NEXT: ret void ; @@ -870,14 +940,14 @@ define i1 @notInboundsGEPICmp(ptr %x) { define i1 @inboundsGEPICmpNullPointerDefined(ptr %x) null_pointer_is_valid { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; FNATTRS-LABEL: define i1 @inboundsGEPICmpNullPointerDefined -; FNATTRS-SAME: (ptr readnone captures(address) [[X:%.*]]) #[[ATTR16:[0-9]+]] { +; FNATTRS-SAME: (ptr readnone captures(address) [[X:%.*]]) #[[ATTR18:[0-9]+]] { ; FNATTRS-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[X]], i32 5 ; FNATTRS-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], null ; FNATTRS-NEXT: ret i1 [[TMP2]] ; ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; ATTRIBUTOR-LABEL: define i1 @inboundsGEPICmpNullPointerDefined -; ATTRIBUTOR-SAME: (ptr nofree readnone [[X:%.*]]) #[[ATTR12:[0-9]+]] { +; ATTRIBUTOR-SAME: (ptr nofree readnone [[X:%.*]]) #[[ATTR14:[0-9]+]] { ; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[X]], i32 5 ; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], null ; ATTRIBUTOR-NEXT: ret i1 [[TMP2]] @@ -907,13 +977,13 @@ define i1 @nocaptureDereferenceableOrNullICmp(ptr dereferenceable_or_null(4) %x) define i1 @captureDereferenceableOrNullICmp(ptr dereferenceable_or_null(4) %x) null_pointer_is_valid { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; FNATTRS-LABEL: define noundef i1 @captureDereferenceableOrNullICmp -; FNATTRS-SAME: (ptr readnone captures(address_is_null) dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR16]] { +; FNATTRS-SAME: (ptr readnone captures(address_is_null) dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR18]] { ; FNATTRS-NEXT: [[TMP1:%.*]] = icmp eq ptr [[X]], null ; FNATTRS-NEXT: ret i1 [[TMP1]] ; ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; ATTRIBUTOR-LABEL: define i1 @captureDereferenceableOrNullICmp -; ATTRIBUTOR-SAME: (ptr nofree readnone dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR12]] { +; ATTRIBUTOR-SAME: (ptr nofree readnone dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR14]] { ; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = icmp eq ptr [[X]], null ; ATTRIBUTOR-NEXT: ret i1 [[TMP1]] ; @@ -962,14 +1032,14 @@ define void @recurse_fptr(ptr %f, ptr %p) { define void @readnone_indirec(ptr %f, ptr %p) { ; FNATTRS: Function Attrs: nofree nosync memory(none) ; FNATTRS-LABEL: define void @readnone_indirec -; FNATTRS-SAME: (ptr readonly captures(none) [[F:%.*]], ptr readnone [[P:%.*]]) #[[ATTR17:[0-9]+]] { -; FNATTRS-NEXT: call void [[F]](ptr [[P]]) #[[ATTR20:[0-9]+]] +; FNATTRS-SAME: (ptr readonly captures(none) [[F:%.*]], ptr readnone [[P:%.*]]) #[[ATTR19:[0-9]+]] { +; FNATTRS-NEXT: call void [[F]](ptr [[P]]) #[[ATTR22:[0-9]+]] ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: nosync memory(none) ; ATTRIBUTOR-LABEL: define void @readnone_indirec -; ATTRIBUTOR-SAME: (ptr nofree nonnull readnone captures(none) [[F:%.*]], ptr readnone [[P:%.*]]) #[[ATTR13:[0-9]+]] { -; ATTRIBUTOR-NEXT: call void [[F]](ptr [[P]]) #[[ATTR20:[0-9]+]] +; ATTRIBUTOR-SAME: (ptr nofree nonnull readnone captures(none) [[F:%.*]], ptr readnone [[P:%.*]]) #[[ATTR15:[0-9]+]] { +; ATTRIBUTOR-NEXT: call void [[F]](ptr [[P]]) #[[ATTR23:[0-9]+]] ; ATTRIBUTOR-NEXT: ret void ; call void %f(ptr %p) readnone @@ -1062,7 +1132,7 @@ define ptr @captures_used_ret(ptr %p) { define ptr @scc_capture_via_ret(i1 %c, ptr %p) { ; FNATTRS: Function Attrs: nofree nosync nounwind memory(write, argmem: none, inaccessiblemem: none) ; FNATTRS-LABEL: define ptr @scc_capture_via_ret -; FNATTRS-SAME: (i1 [[C:%.*]], ptr [[P:%.*]]) #[[ATTR10]] { +; FNATTRS-SAME: (i1 [[C:%.*]], ptr [[P:%.*]]) #[[ATTR12]] { ; FNATTRS-NEXT: br i1 [[C]], label [[IF:%.*]], label [[ELSE:%.*]] ; FNATTRS: if: ; FNATTRS-NEXT: [[C_NOT:%.*]] = xor i1 [[C]], true @@ -1074,11 +1144,11 @@ define ptr @scc_capture_via_ret(i1 %c, ptr %p) { ; ; ATTRIBUTOR: Function Attrs: nofree nosync nounwind memory(write) ; ATTRIBUTOR-LABEL: define ptr @scc_capture_via_ret -; ATTRIBUTOR-SAME: (i1 [[C:%.*]], ptr nofree [[P:%.*]]) #[[ATTR8]] { +; ATTRIBUTOR-SAME: (i1 [[C:%.*]], ptr nofree [[P:%.*]]) #[[ATTR10]] { ; ATTRIBUTOR-NEXT: br i1 [[C]], label [[IF:%.*]], label [[ELSE:%.*]] ; ATTRIBUTOR: if: ; ATTRIBUTOR-NEXT: [[C_NOT:%.*]] = xor i1 [[C]], true -; ATTRIBUTOR-NEXT: [[RET:%.*]] = call ptr @scc_capture_via_ret(i1 [[C_NOT]], ptr nofree [[P]]) #[[ATTR8]] +; ATTRIBUTOR-NEXT: [[RET:%.*]] = call ptr @scc_capture_via_ret(i1 [[C_NOT]], ptr nofree [[P]]) #[[ATTR10]] ; ATTRIBUTOR-NEXT: store ptr [[RET]], ptr @g, align 8 ; ATTRIBUTOR-NEXT: ret ptr [[RET]] ; ATTRIBUTOR: else: diff --git a/llvm/test/Transforms/FunctionAttrs/nonnull.ll b/llvm/test/Transforms/FunctionAttrs/nonnull.ll index 94093568419af..483b560ece6c8 100644 --- a/llvm/test/Transforms/FunctionAttrs/nonnull.ll +++ b/llvm/test/Transforms/FunctionAttrs/nonnull.ll @@ -1029,10 +1029,10 @@ define ptr @g1() { ret ptr %c } -declare void @use_i32_ptr(ptr) readnone nounwind +declare void @use_i32_ptr(ptr) readnone nounwind willreturn define internal void @called_by_weak(ptr %a) { ; FNATTRS-LABEL: define internal void @called_by_weak( -; FNATTRS-SAME: ptr readnone captures(none) [[A:%.*]]) #[[ATTR1]] { +; FNATTRS-SAME: ptr readnone captures(none) [[A:%.*]]) #[[ATTR10:[0-9]+]] { ; FNATTRS-NEXT: call void @use_i32_ptr(ptr [[A]]) ; FNATTRS-NEXT: ret void ; @@ -1064,7 +1064,7 @@ define weak_odr void @weak_caller(ptr nonnull %a) { ; Expect nonnull define internal void @control(ptr dereferenceable(4) %a) { ; FNATTRS-LABEL: define internal void @control( -; FNATTRS-SAME: ptr readnone captures(none) dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { +; FNATTRS-SAME: ptr readnone captures(none) dereferenceable(4) [[A:%.*]]) #[[ATTR10]] { ; FNATTRS-NEXT: call void @use_i32_ptr(ptr [[A]]) ; FNATTRS-NEXT: ret void ; @@ -1079,7 +1079,7 @@ define internal void @control(ptr dereferenceable(4) %a) { ; Avoid nonnull as we do not touch naked functions define internal void @naked(ptr dereferenceable(4) %a) naked { ; FNATTRS-LABEL: define internal void @naked( -; FNATTRS-SAME: ptr dereferenceable(4) [[A:%.*]]) #[[ATTR10:[0-9]+]] { +; FNATTRS-SAME: ptr dereferenceable(4) [[A:%.*]]) #[[ATTR11:[0-9]+]] { ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR-LABEL: define internal void @naked( @@ -1091,7 +1091,7 @@ define internal void @naked(ptr dereferenceable(4) %a) naked { ; Avoid nonnull as we do not touch optnone define internal void @optnone(ptr dereferenceable(4) %a) optnone noinline { ; FNATTRS-LABEL: define internal void @optnone( -; FNATTRS-SAME: ptr dereferenceable(4) [[A:%.*]]) #[[ATTR11:[0-9]+]] { +; FNATTRS-SAME: ptr dereferenceable(4) [[A:%.*]]) #[[ATTR12:[0-9]+]] { ; FNATTRS-NEXT: call void @use_i32_ptr(ptr [[A]]) ; FNATTRS-NEXT: ret void ; diff --git a/llvm/test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll b/llvm/test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll index fd98b71cb5562..e2e3603e9cb43 100644 --- a/llvm/test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll +++ b/llvm/test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll @@ -6,7 +6,7 @@ declare void @llvm.va_start(ptr) declare void @llvm.va_end(ptr) -define void @va_func(ptr readonly %b, ...) readonly nounwind { +define void @va_func(ptr readonly %b, ...) readonly nounwind willreturn { ; CHECK-LABEL: define void @va_func(ptr readonly captures(none) %b, ...) entry: %valist = alloca i8 diff --git a/llvm/test/Transforms/GVN/PRE/2009-06-17-InvalidPRE.ll b/llvm/test/Transforms/GVN/PRE/2009-06-17-InvalidPRE.ll index 506ad7ce6cd35..95a3b5cbfcd10 100644 --- a/llvm/test/Transforms/GVN/PRE/2009-06-17-InvalidPRE.ll +++ b/llvm/test/Transforms/GVN/PRE/2009-06-17-InvalidPRE.ll @@ -4,7 +4,6 @@ ; This is invalid as it bypasses the check for %m.0.ph==null in bb4. ; ModuleID = 'mbuf.c' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin9.6" %struct.mbuf = type { ptr, ptr, i32, ptr, i16, i16, i32 } define void @m_adj(ptr %mp, i32 %req_len) nounwind optsize { diff --git a/llvm/test/Transforms/GVN/PRE/2011-06-01-NonLocalMemdepMiscompile.ll b/llvm/test/Transforms/GVN/PRE/2011-06-01-NonLocalMemdepMiscompile.ll index 3f0475dc79ca2..05d505c603c75 100644 --- a/llvm/test/Transforms/GVN/PRE/2011-06-01-NonLocalMemdepMiscompile.ll +++ b/llvm/test/Transforms/GVN/PRE/2011-06-01-NonLocalMemdepMiscompile.ll @@ -4,7 +4,6 @@ ; rdar://9429882 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-macosx10.7.0" define i1 @rb_intern(ptr %foo) nounwind ssp { ; CHECK-LABEL: @rb_intern( diff --git a/llvm/test/Transforms/GVN/PRE/2017-06-28-pre-load-dbgloc.ll b/llvm/test/Transforms/GVN/PRE/2017-06-28-pre-load-dbgloc.ll index b2b0216ed8f72..92c01002975de 100644 --- a/llvm/test/Transforms/GVN/PRE/2017-06-28-pre-load-dbgloc.ll +++ b/llvm/test/Transforms/GVN/PRE/2017-06-28-pre-load-dbgloc.ll @@ -26,7 +26,6 @@ ; ^ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -target triple = "aarch64--linux-gnu" %struct.desc = type { ptr } %struct.node = type { ptr, ptr } diff --git a/llvm/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll b/llvm/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll index 04de10a5cc1dc..0c172dcfa565e 100644 --- a/llvm/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll +++ b/llvm/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll @@ -1,7 +1,6 @@ ; RUN: opt -S -passes=gvn -enable-load-pre < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" %ArrayImpl = type { i64, ptr addrspace(100), [1 x i64], [1 x i64], [1 x i64], i64, i64, ptr addrspace(100), ptr addrspace(100), i8, i64 } diff --git a/llvm/test/Transforms/GVN/PRE/2018-06-08-pre-load-dbgloc-no-null-opt.ll b/llvm/test/Transforms/GVN/PRE/2018-06-08-pre-load-dbgloc-no-null-opt.ll index 2f63ed0016c2b..c238fe880cd5a 100644 --- a/llvm/test/Transforms/GVN/PRE/2018-06-08-pre-load-dbgloc-no-null-opt.ll +++ b/llvm/test/Transforms/GVN/PRE/2018-06-08-pre-load-dbgloc-no-null-opt.ll @@ -26,7 +26,6 @@ ; ^ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -target triple = "aarch64--linux-gnu" %struct.desc = type { ptr } %struct.node = type { ptr, ptr } diff --git a/llvm/test/Transforms/GVN/PRE/atomic.ll b/llvm/test/Transforms/GVN/PRE/atomic.ll index e8bf25548ba89..ed530bec22e84 100644 --- a/llvm/test/Transforms/GVN/PRE/atomic.ll +++ b/llvm/test/Transforms/GVN/PRE/atomic.ll @@ -1,8 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt -passes=gvn -S < %s | FileCheck %s +; RUN: opt -S -passes=gvn < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-macosx10.7.0" @x = common global i32 0, align 4 @y = common global i32 0, align 4 diff --git a/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll b/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll index 7028edb4732bd..efbc71b06256d 100644 --- a/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll +++ b/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -passes=gvn < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" -target triple = "i386-apple-darwin11.0.0" @sortlist = external global [5001 x i32], align 4 diff --git a/llvm/test/Transforms/GVN/PRE/lpre-call-wrap-2.ll b/llvm/test/Transforms/GVN/PRE/lpre-call-wrap-2.ll index 177b8a080bb0a..dde5c225e4ff6 100644 --- a/llvm/test/Transforms/GVN/PRE/lpre-call-wrap-2.ll +++ b/llvm/test/Transforms/GVN/PRE/lpre-call-wrap-2.ll @@ -10,7 +10,6 @@ ; outbuf[outcnt] = bi_buf; ; } target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin7" @outcnt = common global i32 0 ; [#uses=3] define void @bi_windup(ptr %outbuf, i8 zeroext %bi_buf) nounwind { diff --git a/llvm/test/Transforms/GVN/PRE/lpre-call-wrap.ll b/llvm/test/Transforms/GVN/PRE/lpre-call-wrap.ll index 1f6a5c7a11a89..06a7f11aff14b 100644 --- a/llvm/test/Transforms/GVN/PRE/lpre-call-wrap.ll +++ b/llvm/test/Transforms/GVN/PRE/lpre-call-wrap.ll @@ -15,7 +15,6 @@ ; void testfunction(A& iter) { A const end; while (iter != end) ++iter; } ; target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin7" %struct.A = type { i32, i32 } define void @_Z12testfunctionR1A(ptr %iter) { diff --git a/llvm/test/Transforms/GVN/PRE/nonintegral.ll b/llvm/test/Transforms/GVN/PRE/nonintegral.ll index 240c985b23580..d989e81b8e76c 100644 --- a/llvm/test/Transforms/GVN/PRE/nonintegral.ll +++ b/llvm/test/Transforms/GVN/PRE/nonintegral.ll @@ -2,7 +2,6 @@ ; RUN: opt -passes=gvn -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4" -target triple = "x86_64-unknown-linux-gnu" define void @nipre(ptr noalias %p, ptr noalias %p2, i8 %jmp) { diff --git a/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll b/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll index 6b5211ebc00cc..edadcbead2223 100644 --- a/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll +++ b/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll @@ -4,7 +4,6 @@ ; RUN: opt < %s -aa-pipeline=basic-aa -passes="gvn" -enable-load-pre=false -S | FileCheck %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" -target triple = "aarch64--linux-gnu" define double @foo(i32 %stat, i32 %i, ptr %p) { ; CHECK-LABEL: @foo( diff --git a/llvm/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll b/llvm/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll index 331344b767436..0585781e7985f 100644 --- a/llvm/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll +++ b/llvm/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll @@ -1,7 +1,6 @@ ; RUN: opt -S -passes=gvn -enable-load-pre < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" ; These tests exercise situations when instructions that were first instructions ; with implicit control flow get removed. We make sure that after that we don't diff --git a/llvm/test/Transforms/GVN/PRE/rle-phi-translate.ll b/llvm/test/Transforms/GVN/PRE/rle-phi-translate.ll index 519e0ca29a971..2d63344fa79b0 100644 --- a/llvm/test/Transforms/GVN/PRE/rle-phi-translate.ll +++ b/llvm/test/Transforms/GVN/PRE/rle-phi-translate.ll @@ -1,7 +1,6 @@ -; RUN: opt < %s -passes=gvn -S | FileCheck %s +; RUN: opt < %s -S -passes=gvn | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin7" define i32 @test1(ptr %b, ptr %c) nounwind { ; CHECK-LABEL: @test1( diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll new file mode 100644 index 0000000000000..836c739048411 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll @@ -0,0 +1,439 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine < %s | FileCheck %s + +define i16 @extract_elt0_v2i16_readfirstlane(<2 x i16> %src) { +; CHECK-LABEL: define i16 @extract_elt0_v2i16_readfirstlane( +; CHECK-SAME: <2 x i16> [[SRC:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[VEC:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i16> [[VEC]], i64 0 +; CHECK-NEXT: ret i16 [[ELT]] +; + %vec = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> %src) + %elt = extractelement <2 x i16> %vec, i32 0 + ret i16 %elt +} + +define i16 @extract_elt0_v1i16_readfirstlane(<1 x i16> %src) { +; CHECK-LABEL: define i16 @extract_elt0_v1i16_readfirstlane( +; CHECK-SAME: <1 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <1 x i16> @llvm.amdgcn.readfirstlane.v1i16(<1 x i16> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <1 x i16> [[VEC]], i64 0 +; CHECK-NEXT: ret i16 [[ELT]] +; + %vec = call <1 x i16> @llvm.amdgcn.readfirstlane.v1i16(<1 x i16> %src) + %elt = extractelement <1 x i16> %vec, i32 0 + ret i16 %elt +} + +define i16 @extract_elt1_v2i16_readfirstlane(<2 x i16> %src) { +; CHECK-LABEL: define i16 @extract_elt1_v2i16_readfirstlane( +; CHECK-SAME: <2 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i16> [[VEC]], i64 1 +; CHECK-NEXT: ret i16 [[ELT]] +; + %vec = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> %src) + %elt = extractelement <2 x i16> %vec, i32 1 + ret i16 %elt +} + +define i16 @extract_elt0_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define i16 @extract_elt0_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <4 x i16> [[VEC]], i64 0 +; CHECK-NEXT: ret i16 [[ELT]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %elt = extractelement <4 x i16> %vec, i32 0 + ret i16 %elt +} + +define i16 @extract_elt2_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define i16 @extract_elt2_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <4 x i16> [[VEC]], i64 2 +; CHECK-NEXT: ret i16 [[ELT]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %elt = extractelement <4 x i16> %vec, i32 2 + ret i16 %elt +} + +define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define <2 x i16> @extract_elt01_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> + ret <2 x i16> %shuffle +} + +define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define <2 x i16> @extract_elt12_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> + ret <2 x i16> %shuffle +} + +define <2 x i16> @extract_elt23_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define <2 x i16> @extract_elt23_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> + ret <2 x i16> %shuffle +} + +define <2 x i16> @extract_elt10_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define <2 x i16> @extract_elt10_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> + ret <2 x i16> %shuffle +} + +define <2 x i16> @extract_elt32_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define <2 x i16> @extract_elt32_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> + ret <2 x i16> %shuffle +} + +define <2 x i16> @extract_elt30_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define <2 x i16> @extract_elt30_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> + ret <2 x i16> %shuffle +} + +define half @extract_elt0_v2f16_readfirstlane(<2 x half> %src) { +; CHECK-LABEL: define half @extract_elt0_v2f16_readfirstlane( +; CHECK-SAME: <2 x half> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <2 x half> @llvm.amdgcn.readfirstlane.v2f16(<2 x half> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x half> [[VEC]], i64 0 +; CHECK-NEXT: ret half [[ELT]] +; + %vec = call <2 x half> @llvm.amdgcn.readfirstlane.v2i16(<2 x half> %src) + %elt = extractelement <2 x half> %vec, i32 0 + ret half %elt +} + +define half @extract_elt1_v2f16_readfirstlane(<2 x half> %src) { +; CHECK-LABEL: define half @extract_elt1_v2f16_readfirstlane( +; CHECK-SAME: <2 x half> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <2 x half> @llvm.amdgcn.readfirstlane.v2f16(<2 x half> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x half> [[VEC]], i64 1 +; CHECK-NEXT: ret half [[ELT]] +; + %vec = call <2 x half> @llvm.amdgcn.readfirstlane.v2i16(<2 x half> %src) + %elt = extractelement <2 x half> %vec, i32 1 + ret half %elt +} + +; Don't break on illegal types +define i8 @extract_elt0_v4i8_readfirstlane(<4 x i8> %src) { +; CHECK-LABEL: define i8 @extract_elt0_v4i8_readfirstlane( +; CHECK-SAME: <4 x i8> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i8> @llvm.amdgcn.readfirstlane.v4i8(<4 x i8> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <4 x i8> [[VEC]], i64 0 +; CHECK-NEXT: ret i8 [[ELT]] +; + %vec = call <4 x i8> @llvm.amdgcn.readfirstlane.v4ii8(<4 x i8> %src) + %elt = extractelement <4 x i8> %vec, i32 0 + ret i8 %elt +} + +; Don't break on illegal types +define i32 @extract_elt0_nxv4i32_readfirstlane( %src) { +; CHECK-LABEL: define i32 @extract_elt0_nxv4i32_readfirstlane( +; CHECK-SAME: [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call @llvm.amdgcn.readfirstlane.nxv2i32( [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement [[VEC]], i64 0 +; CHECK-NEXT: ret i32 [[ELT]] +; + %vec = call @llvm.amdgcn.readfirstlane.nxv2i32( %src) + %elt = extractelement %vec, i32 0 + ret i32 %elt +} + +define i32 @extract_elt0_v2i32_readfirstlane(<2 x i32> %src) { +; CHECK-LABEL: define i32 @extract_elt0_v2i32_readfirstlane( +; CHECK-SAME: <2 x i32> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i32> [[VEC]], i64 0 +; CHECK-NEXT: ret i32 [[ELT]] +; + %vec = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> %src) + %elt = extractelement <2 x i32> %vec, i32 0 + ret i32 %elt +} + +define ptr addrspace(3) @extract_elt0_v2p3_readfirstlane(<2 x ptr addrspace(3)> %src) { +; CHECK-LABEL: define ptr addrspace(3) @extract_elt0_v2p3_readfirstlane( +; CHECK-SAME: <2 x ptr addrspace(3)> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <2 x ptr addrspace(3)> @llvm.amdgcn.readfirstlane.v2p3(<2 x ptr addrspace(3)> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x ptr addrspace(3)> [[VEC]], i64 0 +; CHECK-NEXT: ret ptr addrspace(3) [[ELT]] +; + %vec = call <2 x ptr addrspace(3)> @llvm.amdgcn.readfirstlane.v2p3(<2 x ptr addrspace(3)> %src) + %elt = extractelement <2 x ptr addrspace(3)> %vec, i32 0 + ret ptr addrspace(3) %elt +} + +define i64 @extract_elt0_v2i64_readfirstlane(<2 x i64> %src) { +; CHECK-LABEL: define i64 @extract_elt0_v2i64_readfirstlane( +; CHECK-SAME: <2 x i64> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i64> [[VEC]], i64 0 +; CHECK-NEXT: ret i64 [[ELT]] +; + %vec = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> %src) + %elt = extractelement <2 x i64> %vec, i32 0 + ret i64 %elt +} + +define i64 @extract_elt1_v2i64_readfirstlane(<2 x i64> %src) { +; CHECK-LABEL: define i64 @extract_elt1_v2i64_readfirstlane( +; CHECK-SAME: <2 x i64> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i64> [[VEC]], i64 1 +; CHECK-NEXT: ret i64 [[ELT]] +; + %vec = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> %src) + %elt = extractelement <2 x i64> %vec, i32 1 + ret i64 %elt +} + +define <3 x i16> @extract_elt012_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define <3 x i16> @extract_elt012_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <3 x i32> +; CHECK-NEXT: ret <3 x i16> [[SHUFFLE]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <3 x i32> + ret <3 x i16> %shuffle +} + +define <3 x i16> @extract_elt123_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define <3 x i16> @extract_elt123_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <3 x i32> +; CHECK-NEXT: ret <3 x i16> [[SHUFFLE]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <3 x i32> + ret <3 x i16> %shuffle +} + +define <3 x i32> @extract_elt012_v4i32_readfirstlane(<4 x i32> %src) { +; CHECK-LABEL: define <3 x i32> @extract_elt012_v4i32_readfirstlane( +; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> +; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]] +; + %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src) + %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <3 x i32> + ret <3 x i32> %shuffle +} + +define <3 x i32> @extract_elt123_v4i32_readfirstlane(<4 x i32> %src) { +; CHECK-LABEL: define <3 x i32> @extract_elt123_v4i32_readfirstlane( +; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> +; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]] +; + %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src) + %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <3 x i32> + ret <3 x i32> %shuffle +} + +define <2 x i32> @extract_elt13_v4i32_readfirstlane(<4 x i32> %src) { +; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane( +; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] +; + %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src) + %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> + ret <2 x i32> %shuffle +} + +define <2 x i16> @extract_elt13_v4i16readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define <2 x i16> @extract_elt13_v4i16readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> + ret <2 x i16> %shuffle +} + +define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify0(i32 %src0, i32 %src2) { +; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify0( +; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[INS_1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 1 +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] +; + %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 0 + %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 1 + %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1) + %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> + ret <2 x i32> %shuffle +} + +define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(i32 %src0, i32 %src2) { +; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1( +; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0 +; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] +; + %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 1 + %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 3 + %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1) + %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> + ret <2 x i32> %shuffle +} + +define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify2(i32 %src0, i32 %src2) { +; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify2( +; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret <2 x i32> poison +; + %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 0 + %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 2 + %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1) + %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> + ret <2 x i32> %shuffle +} + +define i32 @extract_elt0_v2i32_readfirstlane_convergencetoken(<2 x i32> %src) convergent { +; CHECK-LABEL: define i32 @extract_elt0_v2i32_readfirstlane_convergencetoken( +; CHECK-SAME: <2 x i32> [[SRC:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry() +; CHECK-NEXT: [[VEC:%.*]] = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> [[SRC]]) [ "convergencectrl"(token [[T]]) ] +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i32> [[VEC]], i64 0 +; CHECK-NEXT: ret i32 [[ELT]] +; + %t = call token @llvm.experimental.convergence.entry() + %vec = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> %src) [ "convergencectrl"(token %t) ] + %elt = extractelement <2 x i32> %vec, i32 0 + ret i32 %elt +} + +define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(i32 %src0, i32 %src2) convergent { +; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken( +; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry() +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0 +; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) [ "convergencectrl"(token [[T]]) ] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] +; + %t = call token @llvm.experimental.convergence.entry() + %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 1 + %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 3 + %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1) [ "convergencectrl"(token %t) ] + %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> + ret <2 x i32> %shuffle +} + +define i1 @extract_elt0_v2i1_readfirstlane(<2 x i1> %src) { +; CHECK-LABEL: define i1 @extract_elt0_v2i1_readfirstlane( +; CHECK-SAME: <2 x i1> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <2 x i1> @llvm.amdgcn.readfirstlane.v2i1(<2 x i1> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i1> [[VEC]], i64 0 +; CHECK-NEXT: ret i1 [[ELT]] +; + %vec = call <2 x i1> @llvm.amdgcn.readfirstlane.v2i1(<2 x i1> %src) + %elt = extractelement <2 x i1> %vec, i32 0 + ret i1 %elt +} + +define <2 x i1> @extract_elt01_v4i1_readfirstlane(<4 x i1> %src) { +; CHECK-LABEL: define <2 x i1> @extract_elt01_v4i1_readfirstlane( +; CHECK-SAME: <4 x i1> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i1> @llvm.amdgcn.readfirstlane.v4i1(<4 x i1> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i1> [[VEC]], <4 x i1> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i1> [[SHUFFLE]] +; + %vec = call <4 x i1> @llvm.amdgcn.readfirstlane.v4i1(<4 x i1> %src) + %shuffle = shufflevector <4 x i1> %vec, <4 x i1> poison, <2 x i32> + ret <2 x i1> %shuffle +} + +define <2 x i32> @extract_elt13_v8i32_readfirstlane(<8 x i32> %src) { +; CHECK-LABEL: define <2 x i32> @extract_elt13_v8i32_readfirstlane( +; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] +; + %vec = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> %src) + %shuffle = shufflevector <8 x i32> %vec, <8 x i32> poison, <2 x i32> + ret <2 x i32> %shuffle +} + +define <2 x i32> @extract_elt03_v4i32_readfirstlane(<4 x i32> %src) { +; CHECK-LABEL: define <2 x i32> @extract_elt03_v4i32_readfirstlane( +; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] +; + %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src) + %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> + ret <2 x i32> %shuffle +} + +define <3 x i32> @extract_elt124_v8i32_readfirstlane(<8 x i32> %src) { +; CHECK-LABEL: define <3 x i32> @extract_elt124_v8i32_readfirstlane( +; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <3 x i32> +; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]] +; + %vec = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> %src) + %shuffle = shufflevector <8 x i32> %vec, <8 x i32> poison, <3 x i32> + ret <3 x i32> %shuffle +} diff --git a/llvm/test/Transforms/JumpThreading/pr62908.ll b/llvm/test/Transforms/JumpThreading/pr62908.ll index 4c389ee040b90..cfb647c509f8e 100644 --- a/llvm/test/Transforms/JumpThreading/pr62908.ll +++ b/llvm/test/Transforms/JumpThreading/pr62908.ll @@ -5,7 +5,18 @@ define i32 @test() { ; CHECK-LABEL: define i32 @test() { -; CHECK-NEXT: end: +; CHECK-NEXT: join.thread: +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: unreachable: +; CHECK-NEXT: [[SH_PROM:%.*]] = zext i32 -1 to i64 +; CHECK-NEXT: [[SHL:%.*]] = shl nsw i64 -1, [[SH_PROM]] +; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[SHL]] to i32 +; CHECK-NEXT: br label [[JOIN:%.*]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[CONV]], [[UNREACHABLE:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[END]], label [[END]] +; CHECK: end: ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll new file mode 100644 index 0000000000000..544ef5c82c7ac --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll @@ -0,0 +1,579 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(:|sincos|modf|extractvalue|store)" --version 5 +; RUN: opt -passes=loop-vectorize -mtriple=aarch64-gnu-linux -mcpu=neoverse-v1 -mattr=+sve < %s -S -o - -debug-only=loop-vectorize 2>%t.1 | FileCheck %s --check-prefix=CHECK +; RUN: opt -passes=loop-vectorize -mtriple=aarch64-gnu-linux -mcpu=neoverse-v1 -mattr=+sve -vector-library=ArmPL < %s -S -o - -debug-only=loop-vectorize 2>%t.2 | FileCheck %s --check-prefix=CHECK-ARMPL +; RUN: FileCheck --input-file=%t.1 --check-prefix=CHECK-COST %s +; RUN: FileCheck --input-file=%t.2 --check-prefix=CHECK-COST-ARMPL %s +; REQUIRES: asserts + +; CHECK-COST-LABEL: sincos_f32 +; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val) +; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) + +; CHECK-COST-ARMPL-LABEL: sincos_f32 +; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val) +; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 12 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) + +define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @sincos_f32( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 +; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 +; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK: [[EXIT:.*:]] +; +; CHECK-ARMPL-LABEL: define void @sincos_f32( +; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-ARMPL: [[ENTRY:.*:]] +; CHECK-ARMPL: [[VECTOR_PH:.*:]] +; CHECK-ARMPL: [[VECTOR_BODY:.*:]] +; CHECK-ARMPL: [[TMP12:%.*]] = call { , } @llvm.sincos.nxv4f32( [[WIDE_LOAD:%.*]]) +; CHECK-ARMPL: [[TMP13:%.*]] = call { , } @llvm.sincos.nxv4f32( [[WIDE_LOAD1:%.*]]) +; CHECK-ARMPL: [[TMP14:%.*]] = extractvalue { , } [[TMP12]], 0 +; CHECK-ARMPL: [[TMP15:%.*]] = extractvalue { , } [[TMP13]], 0 +; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP12]], 1 +; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP13]], 1 +; CHECK-ARMPL: store [[TMP14]], ptr [[TMP19:%.*]], align 4 +; CHECK-ARMPL: store [[TMP15]], ptr [[TMP22:%.*]], align 4 +; CHECK-ARMPL: store [[TMP16]], ptr [[TMP24:%.*]], align 4 +; CHECK-ARMPL: store [[TMP17]], ptr [[TMP27:%.*]], align 4 +; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] +; CHECK-ARMPL: [[SCALAR_PH:.*:]] +; CHECK-ARMPL: [[FOR_BODY:.*:]] +; CHECK-ARMPL: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) +; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK-ARMPL: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK-ARMPL: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK-ARMPL: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { float, float } @llvm.sincos.f32(float %in_val) + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; CHECK-COST-LABEL: sincos_f64 +; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincos.f64(double %in_val) +; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) + +; CHECK-COST-ARMPL-LABEL: sincos_f64 +; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincos.f64(double %in_val) +; CHECK-COST-ARMPL: Cost of 12 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) + +define void @sincos_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @sincos_f64( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 +; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 +; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.sincos.f64(double [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 +; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 +; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 +; CHECK: [[EXIT:.*:]] +; +; CHECK-ARMPL-LABEL: define void @sincos_f64( +; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK-ARMPL: [[ENTRY:.*:]] +; CHECK-ARMPL: [[VECTOR_PH:.*:]] +; CHECK-ARMPL: [[VECTOR_BODY:.*:]] +; CHECK-ARMPL: [[TMP12:%.*]] = call { , } @llvm.sincos.nxv2f64( [[WIDE_LOAD:%.*]]) +; CHECK-ARMPL: [[TMP13:%.*]] = call { , } @llvm.sincos.nxv2f64( [[WIDE_LOAD1:%.*]]) +; CHECK-ARMPL: [[TMP14:%.*]] = extractvalue { , } [[TMP12]], 0 +; CHECK-ARMPL: [[TMP15:%.*]] = extractvalue { , } [[TMP13]], 0 +; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP12]], 1 +; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP13]], 1 +; CHECK-ARMPL: store [[TMP14]], ptr [[TMP19:%.*]], align 8 +; CHECK-ARMPL: store [[TMP15]], ptr [[TMP22:%.*]], align 8 +; CHECK-ARMPL: store [[TMP16]], ptr [[TMP24:%.*]], align 8 +; CHECK-ARMPL: store [[TMP17]], ptr [[TMP27:%.*]], align 8 +; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] +; CHECK-ARMPL: [[SCALAR_PH:.*:]] +; CHECK-ARMPL: [[FOR_BODY:.*:]] +; CHECK-ARMPL: [[CALL:%.*]] = tail call { double, double } @llvm.sincos.f64(double [[IN_VAL:%.*]]) +; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 +; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 +; CHECK-ARMPL: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 +; CHECK-ARMPL: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 +; CHECK-ARMPL: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv + %in_val = load double, ptr %arrayidx, align 8 + %call = tail call { double, double } @llvm.sincos.f64(double %in_val) + %extract_a = extractvalue { double, double } %call, 0 + %extract_b = extractvalue { double, double } %call, 1 + %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv + store double %extract_a, ptr %arrayidx2, align 8 + %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv + store double %extract_b, ptr %arrayidx4, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; CHECK-COST-LABEL: predicated_sincos +; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val) +; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) + +; CHECK-COST-ARMPL-LABEL: predicated_sincos +; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val) +; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 12 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) + +define void @predicated_sincos(float %x, ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @predicated_sincos( +; CHECK-SAME: float [[X:%.*]], ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[IF_THEN:.*:]] +; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK: [[IF_MERGE:.*:]] +; CHECK: [[FOR_END:.*:]] +; +; CHECK-ARMPL-LABEL: define void @predicated_sincos( +; CHECK-ARMPL-SAME: float [[X:%.*]], ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK-ARMPL: [[ENTRY:.*:]] +; CHECK-ARMPL: [[VECTOR_PH:.*:]] +; CHECK-ARMPL: [[VECTOR_BODY:.*:]] +; CHECK-ARMPL: [[TMP15:%.*]] = call { , } @llvm.sincos.nxv4f32( [[WIDE_MASKED_LOAD:%.*]]) +; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP15]], 0 +; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP15]], 1 +; CHECK-ARMPL: call void @llvm.masked.store.nxv4f32.p0( [[TMP16]], ptr [[TMP19:%.*]], i32 4, [[TMP14:%.*]]) +; CHECK-ARMPL: call void @llvm.masked.store.nxv4f32.p0( [[TMP17]], ptr [[TMP21:%.*]], i32 4, [[TMP14]]) +; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] +; CHECK-ARMPL: [[SCALAR_PH:.*:]] +; CHECK-ARMPL: [[FOR_BODY:.*:]] +; CHECK-ARMPL: [[IF_THEN:.*:]] +; CHECK-ARMPL: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) +; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK-ARMPL: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK-ARMPL: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK-ARMPL: [[IF_MERGE:.*:]] +; CHECK-ARMPL: [[FOR_END:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %if.merge ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %if_cond = fcmp olt float %in_val, %x + br i1 %if_cond, label %if.then, label %if.merge + +if.then: + %call = tail call { float, float } @llvm.sincos.f32(float %in_val) + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + br label %if.merge + +if.merge: + %iv.next = add nuw nsw i64 %iv, 1 + %cond = icmp slt i64 %iv.next, 1024 + br i1 %cond, label %for.body, label %for.end + +for.end: + ret void +} + +; CHECK-COST-LABEL: modf_f32 +; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.modf.f32(float %in_val) +; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) + +; CHECK-COST-ARMPL-LABEL: modf_f32 +; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.modf.f32(float %in_val) +; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 11 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 12 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) + +define void @modf_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @modf_f32( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x float>, <2 x float> } @llvm.modf.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 +; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 +; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.modf.f32(float [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK: [[EXIT:.*:]] +; +; CHECK-ARMPL-LABEL: define void @modf_f32( +; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK-ARMPL: [[ENTRY:.*:]] +; CHECK-ARMPL: [[VECTOR_PH:.*:]] +; CHECK-ARMPL: [[VECTOR_BODY:.*:]] +; CHECK-ARMPL: [[TMP12:%.*]] = call { , } @llvm.modf.nxv4f32( [[WIDE_LOAD:%.*]]) +; CHECK-ARMPL: [[TMP13:%.*]] = call { , } @llvm.modf.nxv4f32( [[WIDE_LOAD1:%.*]]) +; CHECK-ARMPL: [[TMP14:%.*]] = extractvalue { , } [[TMP12]], 0 +; CHECK-ARMPL: [[TMP15:%.*]] = extractvalue { , } [[TMP13]], 0 +; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP12]], 1 +; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP13]], 1 +; CHECK-ARMPL: store [[TMP14]], ptr [[TMP19:%.*]], align 4 +; CHECK-ARMPL: store [[TMP15]], ptr [[TMP22:%.*]], align 4 +; CHECK-ARMPL: store [[TMP16]], ptr [[TMP24:%.*]], align 4 +; CHECK-ARMPL: store [[TMP17]], ptr [[TMP27:%.*]], align 4 +; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] +; CHECK-ARMPL: [[SCALAR_PH:.*:]] +; CHECK-ARMPL: [[FOR_BODY:.*:]] +; CHECK-ARMPL: [[CALL:%.*]] = tail call { float, float } @llvm.modf.f32(float [[IN_VAL:%.*]]) +; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK-ARMPL: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK-ARMPL: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK-ARMPL: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { float, float } @llvm.modf.f32(float %in_val) + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; CHECK-COST-LABEL: modf_f64 +; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.modf.f64(double %in_val) +; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) + +; CHECK-COST-ARMPL-LABEL: modf_f64 +; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.modf.f64(double %in_val) +; CHECK-COST-ARMPL: Cost of 11 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 12 for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) + +define void @modf_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @modf_f64( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x double>, <2 x double> } @llvm.modf.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 +; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 +; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.modf.f64(double [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 +; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 +; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 +; CHECK: [[EXIT:.*:]] +; +; CHECK-ARMPL-LABEL: define void @modf_f64( +; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK-ARMPL: [[ENTRY:.*:]] +; CHECK-ARMPL: [[VECTOR_PH:.*:]] +; CHECK-ARMPL: [[VECTOR_BODY:.*:]] +; CHECK-ARMPL: [[TMP12:%.*]] = call { , } @llvm.modf.nxv2f64( [[WIDE_LOAD:%.*]]) +; CHECK-ARMPL: [[TMP13:%.*]] = call { , } @llvm.modf.nxv2f64( [[WIDE_LOAD1:%.*]]) +; CHECK-ARMPL: [[TMP14:%.*]] = extractvalue { , } [[TMP12]], 0 +; CHECK-ARMPL: [[TMP15:%.*]] = extractvalue { , } [[TMP13]], 0 +; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP12]], 1 +; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP13]], 1 +; CHECK-ARMPL: store [[TMP14]], ptr [[TMP19:%.*]], align 8 +; CHECK-ARMPL: store [[TMP15]], ptr [[TMP22:%.*]], align 8 +; CHECK-ARMPL: store [[TMP16]], ptr [[TMP24:%.*]], align 8 +; CHECK-ARMPL: store [[TMP17]], ptr [[TMP27:%.*]], align 8 +; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] +; CHECK-ARMPL: [[SCALAR_PH:.*:]] +; CHECK-ARMPL: [[FOR_BODY:.*:]] +; CHECK-ARMPL: [[CALL:%.*]] = tail call { double, double } @llvm.modf.f64(double [[IN_VAL:%.*]]) +; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 +; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 +; CHECK-ARMPL: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 +; CHECK-ARMPL: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 +; CHECK-ARMPL: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv + %in_val = load double, ptr %arrayidx, align 8 + %call = tail call { double, double } @llvm.modf.f64(double %in_val) + %extract_a = extractvalue { double, double } %call, 0 + %extract_b = extractvalue { double, double } %call, 1 + %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv + store double %extract_a, ptr %arrayidx2, align 8 + %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv + store double %extract_b, ptr %arrayidx4, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; CHECK-COST-LABEL: sincospi_f32 +; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincospi.f32(float %in_val) +; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) + +; CHECK-COST-ARMPL-LABEL: sincospi_f32 +; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincospi.f32(float %in_val) +; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 12 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) + +define void @sincospi_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @sincospi_f32( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincospi.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 +; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 +; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincospi.f32(float [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK: [[EXIT:.*:]] +; +; CHECK-ARMPL-LABEL: define void @sincospi_f32( +; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK-ARMPL: [[ENTRY:.*:]] +; CHECK-ARMPL: [[VECTOR_PH:.*:]] +; CHECK-ARMPL: [[VECTOR_BODY:.*:]] +; CHECK-ARMPL: [[TMP12:%.*]] = call { , } @llvm.sincospi.nxv4f32( [[WIDE_LOAD:%.*]]) +; CHECK-ARMPL: [[TMP13:%.*]] = call { , } @llvm.sincospi.nxv4f32( [[WIDE_LOAD1:%.*]]) +; CHECK-ARMPL: [[TMP14:%.*]] = extractvalue { , } [[TMP12]], 0 +; CHECK-ARMPL: [[TMP15:%.*]] = extractvalue { , } [[TMP13]], 0 +; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP12]], 1 +; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP13]], 1 +; CHECK-ARMPL: store [[TMP14]], ptr [[TMP19:%.*]], align 4 +; CHECK-ARMPL: store [[TMP15]], ptr [[TMP22:%.*]], align 4 +; CHECK-ARMPL: store [[TMP16]], ptr [[TMP24:%.*]], align 4 +; CHECK-ARMPL: store [[TMP17]], ptr [[TMP27:%.*]], align 4 +; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] +; CHECK-ARMPL: [[SCALAR_PH:.*:]] +; CHECK-ARMPL: [[FOR_BODY:.*:]] +; CHECK-ARMPL: [[CALL:%.*]] = tail call { float, float } @llvm.sincospi.f32(float [[IN_VAL:%.*]]) +; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK-ARMPL: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK-ARMPL: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK-ARMPL: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { float, float } @llvm.sincospi.f32(float %in_val) + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; CHECK-COST-LABEL: sincospi_f64 +; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincospi.f64(double %in_val) +; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) + +; CHECK-COST-ARMPL-LABEL: sincospi_f64 +; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincospi.f64(double %in_val) +; CHECK-COST-ARMPL: Cost of 12 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) + +define void @sincospi_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @sincospi_f64( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x double>, <2 x double> } @llvm.sincospi.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 +; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 +; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.sincospi.f64(double [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 +; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 +; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 +; CHECK: [[EXIT:.*:]] +; +; CHECK-ARMPL-LABEL: define void @sincospi_f64( +; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK-ARMPL: [[ENTRY:.*:]] +; CHECK-ARMPL: [[VECTOR_PH:.*:]] +; CHECK-ARMPL: [[VECTOR_BODY:.*:]] +; CHECK-ARMPL: [[TMP12:%.*]] = call { , } @llvm.sincospi.nxv2f64( [[WIDE_LOAD:%.*]]) +; CHECK-ARMPL: [[TMP13:%.*]] = call { , } @llvm.sincospi.nxv2f64( [[WIDE_LOAD1:%.*]]) +; CHECK-ARMPL: [[TMP14:%.*]] = extractvalue { , } [[TMP12]], 0 +; CHECK-ARMPL: [[TMP15:%.*]] = extractvalue { , } [[TMP13]], 0 +; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP12]], 1 +; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP13]], 1 +; CHECK-ARMPL: store [[TMP14]], ptr [[TMP19:%.*]], align 8 +; CHECK-ARMPL: store [[TMP15]], ptr [[TMP22:%.*]], align 8 +; CHECK-ARMPL: store [[TMP16]], ptr [[TMP24:%.*]], align 8 +; CHECK-ARMPL: store [[TMP17]], ptr [[TMP27:%.*]], align 8 +; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] +; CHECK-ARMPL: [[SCALAR_PH:.*:]] +; CHECK-ARMPL: [[FOR_BODY:.*:]] +; CHECK-ARMPL: [[CALL:%.*]] = tail call { double, double } @llvm.sincospi.f64(double [[IN_VAL:%.*]]) +; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 +; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 +; CHECK-ARMPL: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 +; CHECK-ARMPL: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 +; CHECK-ARMPL: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv + %in_val = load double, ptr %arrayidx, align 8 + %call = tail call { double, double } @llvm.sincospi.f64(double %in_val) + %extract_a = extractvalue { double, double } %call, 0 + %extract_b = extractvalue { double, double } %call, 1 + %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv + store double %extract_a, ptr %arrayidx2, align 8 + %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv + store double %extract_b, ptr %arrayidx4, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sincos.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sincos.ll deleted file mode 100644 index a7e949838f762..0000000000000 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sincos.ll +++ /dev/null @@ -1,251 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(:|sincos|extractvalue|store)" --version 5 -; RUN: opt -passes=loop-vectorize -mtriple=aarch64-gnu-linux -mcpu=neoverse-v1 -mattr=+sve < %s -S -o - -debug-only=loop-vectorize 2>%t.1 | FileCheck %s --check-prefix=CHECK -; RUN: opt -passes=loop-vectorize -mtriple=aarch64-gnu-linux -mcpu=neoverse-v1 -mattr=+sve -vector-library=ArmPL < %s -S -o - -debug-only=loop-vectorize 2>%t.2 | FileCheck %s --check-prefix=CHECK-ARMPL -; RUN: FileCheck --input-file=%t.1 --check-prefix=CHECK-COST %s -; RUN: FileCheck --input-file=%t.2 --check-prefix=CHECK-COST-ARMPL %s -; REQUIRES: asserts - -; CHECK-COST-LABEL: sincos_f32 -; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val) -; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) - -; CHECK-COST-ARMPL-LABEL: sincos_f32 -; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val) -; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of 12 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) - -define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { -; CHECK-LABEL: define void @sincos_f32( -; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK: [[ENTRY:.*:]] -; CHECK: [[VECTOR_PH:.*:]] -; CHECK: [[VECTOR_BODY:.*:]] -; CHECK: [[TMP3:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) -; CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 0 -; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 -; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 -; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] -; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 -; CHECK: [[EXIT:.*:]] -; -; CHECK-ARMPL-LABEL: define void @sincos_f32( -; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-ARMPL: [[ENTRY:.*:]] -; CHECK-ARMPL: [[VECTOR_PH:.*:]] -; CHECK-ARMPL: [[VECTOR_BODY:.*:]] -; CHECK-ARMPL: [[TMP12:%.*]] = call { , } @llvm.sincos.nxv4f32( [[WIDE_LOAD:%.*]]) -; CHECK-ARMPL: [[TMP13:%.*]] = call { , } @llvm.sincos.nxv4f32( [[WIDE_LOAD1:%.*]]) -; CHECK-ARMPL: [[TMP14:%.*]] = extractvalue { , } [[TMP12]], 0 -; CHECK-ARMPL: [[TMP15:%.*]] = extractvalue { , } [[TMP13]], 0 -; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP12]], 1 -; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP13]], 1 -; CHECK-ARMPL: store [[TMP14]], ptr [[TMP19:%.*]], align 4 -; CHECK-ARMPL: store [[TMP15]], ptr [[TMP22:%.*]], align 4 -; CHECK-ARMPL: store [[TMP16]], ptr [[TMP24:%.*]], align 4 -; CHECK-ARMPL: store [[TMP17]], ptr [[TMP27:%.*]], align 4 -; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] -; CHECK-ARMPL: [[SCALAR_PH:.*:]] -; CHECK-ARMPL: [[FOR_BODY:.*:]] -; CHECK-ARMPL: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) -; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK-ARMPL: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK-ARMPL: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 -; CHECK-ARMPL: [[EXIT:.*:]] -; -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv - %in_val = load float, ptr %arrayidx, align 4 - %call = tail call { float, float } @llvm.sincos.f32(float %in_val) - %extract_a = extractvalue { float, float } %call, 0 - %extract_b = extractvalue { float, float } %call, 1 - %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv - store float %extract_a, ptr %arrayidx2, align 4 - %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv - store float %extract_b, ptr %arrayidx4, align 4 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 - br i1 %exitcond.not, label %exit, label %for.body - -exit: - ret void -} - -; CHECK-COST-LABEL: sincos_f64 -; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincos.f64(double %in_val) -; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) - -; CHECK-COST-ARMPL-LABEL: sincos_f64 -; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincos.f64(double %in_val) -; CHECK-COST-ARMPL: Cost of 12 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) - -define void @sincos_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { -; CHECK-LABEL: define void @sincos_f64( -; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { -; CHECK: [[ENTRY:.*:]] -; CHECK: [[VECTOR_PH:.*:]] -; CHECK: [[VECTOR_BODY:.*:]] -; CHECK: [[TMP3:%.*]] = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) -; CHECK: [[TMP4:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 0 -; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 -; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 -; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] -; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.sincos.f64(double [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 -; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 -; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 -; CHECK: [[EXIT:.*:]] -; -; CHECK-ARMPL-LABEL: define void @sincos_f64( -; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { -; CHECK-ARMPL: [[ENTRY:.*:]] -; CHECK-ARMPL: [[VECTOR_PH:.*:]] -; CHECK-ARMPL: [[VECTOR_BODY:.*:]] -; CHECK-ARMPL: [[TMP12:%.*]] = call { , } @llvm.sincos.nxv2f64( [[WIDE_LOAD:%.*]]) -; CHECK-ARMPL: [[TMP13:%.*]] = call { , } @llvm.sincos.nxv2f64( [[WIDE_LOAD1:%.*]]) -; CHECK-ARMPL: [[TMP14:%.*]] = extractvalue { , } [[TMP12]], 0 -; CHECK-ARMPL: [[TMP15:%.*]] = extractvalue { , } [[TMP13]], 0 -; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP12]], 1 -; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP13]], 1 -; CHECK-ARMPL: store [[TMP14]], ptr [[TMP19:%.*]], align 8 -; CHECK-ARMPL: store [[TMP15]], ptr [[TMP22:%.*]], align 8 -; CHECK-ARMPL: store [[TMP16]], ptr [[TMP24:%.*]], align 8 -; CHECK-ARMPL: store [[TMP17]], ptr [[TMP27:%.*]], align 8 -; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] -; CHECK-ARMPL: [[SCALAR_PH:.*:]] -; CHECK-ARMPL: [[FOR_BODY:.*:]] -; CHECK-ARMPL: [[CALL:%.*]] = tail call { double, double } @llvm.sincos.f64(double [[IN_VAL:%.*]]) -; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 -; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 -; CHECK-ARMPL: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 -; CHECK-ARMPL: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 -; CHECK-ARMPL: [[EXIT:.*:]] -; -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv - %in_val = load double, ptr %arrayidx, align 8 - %call = tail call { double, double } @llvm.sincos.f64(double %in_val) - %extract_a = extractvalue { double, double } %call, 0 - %extract_b = extractvalue { double, double } %call, 1 - %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv - store double %extract_a, ptr %arrayidx2, align 8 - %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv - store double %extract_b, ptr %arrayidx4, align 8 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 - br i1 %exitcond.not, label %exit, label %for.body - -exit: - ret void -} - -; CHECK-COST-LABEL: predicated_sincos -; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val) -; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) - -; CHECK-COST-ARMPL-LABEL: predicated_sincos -; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val) -; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of 12 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) - -define void @predicated_sincos(float %x, ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { -; CHECK-LABEL: define void @predicated_sincos( -; CHECK-SAME: float [[X:%.*]], ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { -; CHECK: [[ENTRY:.*:]] -; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[IF_THEN:.*:]] -; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 -; CHECK: [[IF_MERGE:.*:]] -; CHECK: [[FOR_END:.*:]] -; -; CHECK-ARMPL-LABEL: define void @predicated_sincos( -; CHECK-ARMPL-SAME: float [[X:%.*]], ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { -; CHECK-ARMPL: [[ENTRY:.*:]] -; CHECK-ARMPL: [[VECTOR_PH:.*:]] -; CHECK-ARMPL: [[VECTOR_BODY:.*:]] -; CHECK-ARMPL: [[TMP15:%.*]] = call { , } @llvm.sincos.nxv4f32( [[WIDE_MASKED_LOAD:%.*]]) -; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP15]], 0 -; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP15]], 1 -; CHECK-ARMPL: call void @llvm.masked.store.nxv4f32.p0( [[TMP16]], ptr [[TMP19:%.*]], i32 4, [[TMP14:%.*]]) -; CHECK-ARMPL: call void @llvm.masked.store.nxv4f32.p0( [[TMP17]], ptr [[TMP21:%.*]], i32 4, [[TMP14]]) -; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] -; CHECK-ARMPL: [[SCALAR_PH:.*:]] -; CHECK-ARMPL: [[FOR_BODY:.*:]] -; CHECK-ARMPL: [[IF_THEN:.*:]] -; CHECK-ARMPL: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) -; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK-ARMPL: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK-ARMPL: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 -; CHECK-ARMPL: [[IF_MERGE:.*:]] -; CHECK-ARMPL: [[FOR_END:.*:]] -; -entry: - br label %for.body - -for.body: - %iv = phi i64 [ %iv.next, %if.merge ], [ 0, %entry ] - %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv - %in_val = load float, ptr %arrayidx, align 4 - %if_cond = fcmp olt float %in_val, %x - br i1 %if_cond, label %if.then, label %if.merge - -if.then: - %call = tail call { float, float } @llvm.sincos.f32(float %in_val) - %extract_a = extractvalue { float, float } %call, 0 - %extract_b = extractvalue { float, float } %call, 1 - %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv - store float %extract_a, ptr %arrayidx2, align 4 - %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv - store float %extract_b, ptr %arrayidx4, align 4 - br label %if.merge - -if.merge: - %iv.next = add nuw nsw i64 %iv, 1 - %cond = icmp slt i64 %iv.next, 1024 - br i1 %cond, label %for.body, label %for.end - -for.end: - ret void -} diff --git a/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll new file mode 100644 index 0000000000000..d928a4b7ebe4b --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll @@ -0,0 +1,330 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(:|sincos|modf|extract|store)" --version 5 +; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s -S -o - | FileCheck %s + +define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @sincos_f32( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 +; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 +; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { float, float } @llvm.sincos.f32(float %in_val) + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +define void @sincos_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @sincos_f64( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 +; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 +; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.sincos.f64(double [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 +; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 +; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 +; CHECK: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv + %in_val = load double, ptr %arrayidx, align 8 + %call = tail call { double, double } @llvm.sincos.f64(double %in_val) + %extract_a = extractvalue { double, double } %call, 0 + %extract_b = extractvalue { double, double } %call, 1 + %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv + store double %extract_a, ptr %arrayidx2, align 8 + %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv + store double %extract_b, ptr %arrayidx4, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +define void @predicated_sincos(float %x, ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @predicated_sincos( +; CHECK-SAME: float [[X:%.*]], ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_BODY1:.*]]: +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_BODY1]] ], [ [[INDEX_NEXT:%.*]], %[[IF_THEN2:.*]] ] +; CHECK: [[TMP4:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP4]], 0 +; CHECK: [[TMP6:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP4]], 1 +; CHECK: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3:%.*]], i32 0 +; CHECK: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK: [[PRED_STORE_IF]]: +; CHECK: [[TMP9:%.*]] = extractelement <2 x float> [[TMP5]], i32 0 +; CHECK: store float [[TMP9]], ptr [[TMP8:%.*]], align 4 +; CHECK: [[TMP11:%.*]] = extractelement <2 x float> [[TMP6]], i32 0 +; CHECK: store float [[TMP11]], ptr [[TMP10:%.*]], align 4 +; CHECK: br label %[[PRED_STORE_CONTINUE]] +; CHECK: [[PRED_STORE_CONTINUE]]: +; CHECK: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 +; CHECK: br i1 [[TMP12]], label %[[PRED_STORE_IF1:.*]], label %[[IF_THEN2]] +; CHECK: [[PRED_STORE_IF1]]: +; CHECK: [[TMP15:%.*]] = extractelement <2 x float> [[TMP5]], i32 1 +; CHECK: store float [[TMP15]], ptr [[TMP14:%.*]], align 4 +; CHECK: [[TMP17:%.*]] = extractelement <2 x float> [[TMP6]], i32 1 +; CHECK: store float [[TMP17]], ptr [[TMP16:%.*]], align 4 +; CHECK: br label %[[IF_THEN2]] +; CHECK: [[IF_THEN2]]: +; CHECK: [[IF_THEN:.*:]] +; CHECK: [[IF_THEN3:.*:]] +; CHECK: [[IF_THEN4:.*:]] +; CHECK: [[IF_THEN1:.*:]] +; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK: [[IF_MERGE:.*:]] +; CHECK: [[FOR_END:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %if.merge ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %if_cond = fcmp olt float %in_val, %x + br i1 %if_cond, label %if.then, label %if.merge + +if.then: + %call = tail call { float, float } @llvm.sincos.f32(float %in_val) + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + br label %if.merge + +if.merge: + %iv.next = add nuw nsw i64 %iv, 1 + %cond = icmp slt i64 %iv.next, 1024 + br i1 %cond, label %for.body, label %for.end + +for.end: + ret void +} + +define void @modf_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @modf_f32( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x float>, <2 x float> } @llvm.modf.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 +; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 +; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.modf.f32(float [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { float, float } @llvm.modf.f32(float %in_val) + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +define void @modf_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @modf_f64( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x double>, <2 x double> } @llvm.modf.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 +; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 +; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.modf.f64(double [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 +; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 +; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 +; CHECK: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv + %in_val = load double, ptr %arrayidx, align 8 + %call = tail call { double, double } @llvm.modf.f64(double %in_val) + %extract_a = extractvalue { double, double } %call, 0 + %extract_b = extractvalue { double, double } %call, 1 + %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv + store double %extract_a, ptr %arrayidx2, align 8 + %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv + store double %extract_b, ptr %arrayidx4, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +define void @sincospi_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @sincospi_f32( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincospi.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 +; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 +; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincospi.f32(float [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { float, float } @llvm.sincospi.f32(float %in_val) + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +define void @sincospi_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @sincospi_f64( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x double>, <2 x double> } @llvm.sincospi.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 +; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 +; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.sincospi.f64(double [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 +; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 +; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 +; CHECK: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv + %in_val = load double, ptr %arrayidx, align 8 + %call = tail call { double, double } @llvm.sincospi.f64(double %in_val) + %extract_a = extractvalue { double, double } %call, 0 + %extract_b = extractvalue { double, double } %call, 1 + %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv + store double %extract_a, ptr %arrayidx2, align 8 + %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv + store double %extract_b, ptr %arrayidx4, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + diff --git a/llvm/test/Transforms/LoopVectorize/sincos.ll b/llvm/test/Transforms/LoopVectorize/sincos.ll deleted file mode 100644 index c2936eb8bb8b2..0000000000000 --- a/llvm/test/Transforms/LoopVectorize/sincos.ll +++ /dev/null @@ -1,157 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(:|sincos|extract|store)" --version 5 -; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s -S -o - | FileCheck %s - -define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { -; CHECK-LABEL: define void @sincos_f32( -; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { -; CHECK: [[ENTRY:.*:]] -; CHECK: [[VECTOR_PH:.*:]] -; CHECK: [[VECTOR_BODY:.*:]] -; CHECK: [[TMP3:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) -; CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 0 -; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 -; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 -; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] -; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 -; CHECK: [[EXIT:.*:]] -; -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv - %in_val = load float, ptr %arrayidx, align 4 - %call = tail call { float, float } @llvm.sincos.f32(float %in_val) - %extract_a = extractvalue { float, float } %call, 0 - %extract_b = extractvalue { float, float } %call, 1 - %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv - store float %extract_a, ptr %arrayidx2, align 4 - %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv - store float %extract_b, ptr %arrayidx4, align 4 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 - br i1 %exitcond.not, label %exit, label %for.body - -exit: - ret void -} - -define void @sincos_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { -; CHECK-LABEL: define void @sincos_f64( -; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { -; CHECK: [[ENTRY:.*:]] -; CHECK: [[VECTOR_PH:.*:]] -; CHECK: [[VECTOR_BODY:.*:]] -; CHECK: [[TMP3:%.*]] = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) -; CHECK: [[TMP4:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 0 -; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 -; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 -; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] -; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.sincos.f64(double [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 -; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 -; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 -; CHECK: [[EXIT:.*:]] -; -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv - %in_val = load double, ptr %arrayidx, align 8 - %call = tail call { double, double } @llvm.sincos.f64(double %in_val) - %extract_a = extractvalue { double, double } %call, 0 - %extract_b = extractvalue { double, double } %call, 1 - %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv - store double %extract_a, ptr %arrayidx2, align 8 - %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv - store double %extract_b, ptr %arrayidx4, align 8 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 - br i1 %exitcond.not, label %exit, label %for.body - -exit: - ret void -} - -define void @predicated_sincos(float %x, ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { -; CHECK-LABEL: define void @predicated_sincos( -; CHECK-SAME: float [[X:%.*]], ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { -; CHECK: [[ENTRY:.*:]] -; CHECK: [[VECTOR_BODY1:.*]]: -; CHECK: [[VECTOR_BODY:.*:]] -; CHECK: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_BODY1]] ], [ [[INDEX_NEXT:%.*]], %[[FOR_BODY1:.*]] ] -; CHECK: [[TMP4:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) -; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP4]], 0 -; CHECK: [[TMP6:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP4]], 1 -; CHECK: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3:%.*]], i32 0 -; CHECK: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: -; CHECK: [[TMP9:%.*]] = extractelement <2 x float> [[TMP5]], i32 0 -; CHECK: store float [[TMP9]], ptr [[TMP8:%.*]], align 4 -; CHECK: [[TMP11:%.*]] = extractelement <2 x float> [[TMP6]], i32 0 -; CHECK: store float [[TMP11]], ptr [[TMP10:%.*]], align 4 -; CHECK: br label %[[PRED_STORE_CONTINUE]] -; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 -; CHECK: br i1 [[TMP12]], label %[[PRED_STORE_IF1:.*]], label %[[FOR_BODY1]] -; CHECK: [[PRED_STORE_IF1]]: -; CHECK: [[TMP15:%.*]] = extractelement <2 x float> [[TMP5]], i32 1 -; CHECK: store float [[TMP15]], ptr [[TMP14:%.*]], align 4 -; CHECK: [[TMP17:%.*]] = extractelement <2 x float> [[TMP6]], i32 1 -; CHECK: store float [[TMP17]], ptr [[TMP16:%.*]], align 4 -; CHECK: br label %[[FOR_BODY1]] -; CHECK: [[FOR_BODY1]]: -; CHECK: [[IF_THEN1:.*:]] -; CHECK: [[IF_THEN2:.*:]] -; CHECK: [[IF_THEN:.*:]] -; CHECK: [[IF_THEN3:.*:]] -; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 -; CHECK: [[IF_MERGE:.*:]] -; CHECK: [[FOR_END:.*:]] -; -entry: - br label %for.body - -for.body: - %iv = phi i64 [ %iv.next, %if.merge ], [ 0, %entry ] - %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv - %in_val = load float, ptr %arrayidx, align 4 - %if_cond = fcmp olt float %in_val, %x - br i1 %if_cond, label %if.then, label %if.merge - -if.then: - %call = tail call { float, float } @llvm.sincos.f32(float %in_val) - %extract_a = extractvalue { float, float } %call, 0 - %extract_b = extractvalue { float, float } %call, 1 - %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv - store float %extract_a, ptr %arrayidx2, align 4 - %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv - store float %extract_b, ptr %arrayidx4, align 4 - br label %if.merge - -if.merge: - %iv.next = add nuw nsw i64 %iv, 1 - %cond = icmp slt i64 %iv.next, 1024 - br i1 %cond, label %for.body, label %for.end - -for.end: - ret void -} diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll index 11fa3337544a1..18acae5835724 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll @@ -6,663 +6,175 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-LABEL: define i32 @test( ; CHECK-SAME: ptr [[PIX1:%.*]], ptr [[PIX2:%.*]], i64 [[IDX_EXT:%.*]], i64 [[IDX_EXT63:%.*]], ptr [[ADD_PTR:%.*]], ptr [[ADD_PTR64:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[PIX1]], align 1 -; CHECK-NEXT: [[CONV1:%.*]] = zext i8 [[TMP0]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PIX1]], i64 4 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr i8, ptr [[PIX2]], i64 4 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr i8, ptr [[PIX1]], i64 1 -; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr i8, ptr [[PIX1]], i64 3 -; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1 -; CHECK-NEXT: [[CONV33:%.*]] = zext i8 [[TMP10]] to i32 ; CHECK-NEXT: [[ADD_PTR3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR644:%.*]] = getelementptr i8, ptr [[PIX2]], i64 [[IDX_EXT63]] -; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[ADD_PTR3]], align 1 -; CHECK-NEXT: [[CONV_1:%.*]] = zext i8 [[TMP11]] to i32 ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 4 ; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 4 -; CHECK-NEXT: [[ARRAYIDX8_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 1 -; CHECK-NEXT: [[ARRAYIDX27_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 3 -; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX27_1]], align 1 -; CHECK-NEXT: [[CONV33_1:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]] ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4 ; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4 -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP21:%.*]] = zext <2 x i8> [[TMP19]] to <2 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP31:%.*]] = zext <2 x i8> [[TMP22]] to <2 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = sub <2 x i32> [[TMP21]], [[TMP31]] -; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 -; CHECK-NEXT: [[TMP49:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP26:%.*]] = zext <2 x i8> [[TMP49]] to <2 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP50:%.*]] = zext <2 x i8> [[TMP27]] to <2 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = sub <2 x i32> [[TMP26]], [[TMP50]] -; CHECK-NEXT: [[TMP25:%.*]] = shl <2 x i32> [[TMP24]], splat (i32 16) -; CHECK-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP25]], [[TMP23]] -; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP51:%.*]] = zext <2 x i8> [[TMP32]] to <2 x i32> -; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP57:%.*]] = zext <2 x i8> [[TMP56]] to <2 x i32> -; CHECK-NEXT: [[TMP35:%.*]] = sub <2 x i32> [[TMP51]], [[TMP57]] -; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP39:%.*]] = zext <2 x i8> [[TMP38]] to <2 x i32> -; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP61:%.*]] = zext <2 x i8> [[TMP40]] to <2 x i32> -; CHECK-NEXT: [[TMP36:%.*]] = sub <2 x i32> [[TMP39]], [[TMP61]] -; CHECK-NEXT: [[TMP37:%.*]] = shl <2 x i32> [[TMP36]], splat (i32 16) -; CHECK-NEXT: [[TMP42:%.*]] = add <2 x i32> [[TMP37]], [[TMP35]] -; CHECK-NEXT: [[TMP34:%.*]] = add <2 x i32> [[TMP42]], [[TMP30]] -; CHECK-NEXT: [[TMP44:%.*]] = sub <2 x i32> [[TMP30]], [[TMP42]] -; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x i32> [[TMP34]], i32 0 -; CHECK-NEXT: [[CONV_2:%.*]] = extractelement <2 x i32> [[TMP34]], i32 1 -; CHECK-NEXT: [[ADD48_2:%.*]] = add i32 [[CONV_2]], [[TMP43]] -; CHECK-NEXT: [[TMP46:%.*]] = extractelement <2 x i32> [[TMP44]], i32 0 -; CHECK-NEXT: [[TMP47:%.*]] = extractelement <2 x i32> [[TMP44]], i32 1 -; CHECK-NEXT: [[ADD55_2:%.*]] = add i32 [[TMP47]], [[TMP46]] ; CHECK-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr i8, ptr null, i64 4 -; CHECK-NEXT: [[TMP53:%.*]] = load <2 x i8>, ptr null, align 1 ; CHECK-NEXT: [[TMP52:%.*]] = load i8, ptr null, align 1 -; CHECK-NEXT: [[TMP62:%.*]] = zext <2 x i8> [[TMP53]] to <2 x i32> -; CHECK-NEXT: [[TMP77:%.*]] = zext i8 [[TMP52]] to i32 -; CHECK-NEXT: [[TMP54:%.*]] = load <2 x i8>, ptr null, align 1 -; CHECK-NEXT: [[TMP55:%.*]] = zext <2 x i8> [[TMP54]] to <2 x i32> -; CHECK-NEXT: [[TMP59:%.*]] = sub <2 x i32> [[TMP62]], [[TMP55]] -; CHECK-NEXT: [[TMP41:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) -; CHECK-NEXT: [[TMP58:%.*]] = zext <2 x i8> [[TMP41]] to <2 x i32> -; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <2 x i32> [[TMP58]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP63:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_3]], align 1 -; CHECK-NEXT: [[TMP76:%.*]] = zext <2 x i8> [[TMP63]] to <2 x i32> -; CHECK-NEXT: [[TMP81:%.*]] = sub <2 x i32> [[TMP48]], [[TMP76]] -; CHECK-NEXT: [[TMP167:%.*]] = shl <2 x i32> [[TMP81]], splat (i32 16) -; CHECK-NEXT: [[TMP75:%.*]] = add <2 x i32> [[TMP167]], [[TMP59]] -; CHECK-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2 -; CHECK-NEXT: [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2 -; CHECK-NEXT: [[ARRAYIDX27_3:%.*]] = getelementptr i8, ptr null, i64 6 -; CHECK-NEXT: [[TMP64:%.*]] = load <2 x i8>, ptr [[ARRAYIDX20_3]], align 1 -; CHECK-NEXT: [[TMP79:%.*]] = zext <2 x i8> [[TMP64]] to <2 x i32> -; CHECK-NEXT: [[TMP82:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_3]], align 1 -; CHECK-NEXT: [[TMP91:%.*]] = zext <2 x i8> [[TMP82]] to <2 x i32> -; CHECK-NEXT: [[TMP65:%.*]] = sub <2 x i32> [[TMP79]], [[TMP91]] -; CHECK-NEXT: [[TMP170:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> zeroinitializer, i32 1, <2 x i1> splat (i1 true), <2 x i8> poison) -; CHECK-NEXT: [[TMP171:%.*]] = zext <2 x i8> [[TMP170]] to <2 x i32> -; CHECK-NEXT: [[TMP172:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_3]], align 1 -; CHECK-NEXT: [[TMP173:%.*]] = zext <2 x i8> [[TMP172]] to <2 x i32> -; CHECK-NEXT: [[TMP66:%.*]] = sub <2 x i32> [[TMP171]], [[TMP173]] -; CHECK-NEXT: [[TMP67:%.*]] = shl <2 x i32> [[TMP66]], splat (i32 16) -; CHECK-NEXT: [[TMP69:%.*]] = add <2 x i32> [[TMP67]], [[TMP65]] -; CHECK-NEXT: [[TMP176:%.*]] = extractelement <2 x i32> [[TMP75]], i32 0 -; CHECK-NEXT: [[TMP197:%.*]] = extractelement <2 x i32> [[TMP75]], i32 1 -; CHECK-NEXT: [[SUB59:%.*]] = add i32 [[TMP197]], [[TMP176]] -; CHECK-NEXT: [[SUB45_3:%.*]] = sub i32 [[TMP176]], [[TMP197]] -; CHECK-NEXT: [[ADD112_2:%.*]] = extractelement <2 x i32> [[TMP69]], i32 0 -; CHECK-NEXT: [[XOR_I63_2:%.*]] = extractelement <2 x i32> [[TMP69]], i32 1 -; CHECK-NEXT: [[SUB59_1:%.*]] = add i32 [[XOR_I63_2]], [[ADD112_2]] -; CHECK-NEXT: [[SUB47_3:%.*]] = sub i32 [[ADD112_2]], [[XOR_I63_2]] -; CHECK-NEXT: [[ADD94:%.*]] = add i32 [[SUB59_1]], [[SUB59]] -; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <2 x i32> [[TMP34]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP71:%.*]] = insertelement <2 x i32> [[TMP70]], i32 [[SUB59]], i32 0 -; CHECK-NEXT: [[TMP72:%.*]] = insertelement <2 x i32> [[TMP34]], i32 [[SUB59_1]], i32 0 -; CHECK-NEXT: [[TMP222:%.*]] = sub <2 x i32> [[TMP71]], [[TMP72]] -; CHECK-NEXT: [[ADD55_3:%.*]] = add i32 [[SUB47_3]], [[SUB45_3]] -; CHECK-NEXT: [[TMP74:%.*]] = shufflevector <2 x i32> [[TMP44]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP78:%.*]] = insertelement <2 x i32> [[TMP74]], i32 [[SUB45_3]], i32 0 -; CHECK-NEXT: [[TMP80:%.*]] = insertelement <2 x i32> [[TMP44]], i32 [[SUB47_3]], i32 0 -; CHECK-NEXT: [[TMP85:%.*]] = sub <2 x i32> [[TMP78]], [[TMP80]] -; CHECK-NEXT: [[ADD95:%.*]] = add i32 [[ADD94]], [[ADD48_2]] -; CHECK-NEXT: [[SUB86_3:%.*]] = sub i32 [[ADD48_2]], [[ADD94]] -; CHECK-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[TMP77]], 15 -; CHECK-NEXT: [[AND_I_1:%.*]] = and i32 [[SHR_I_1]], 65537 -; CHECK-NEXT: [[MUL_I_1:%.*]] = mul i32 [[AND_I_1]], 65535 -; CHECK-NEXT: [[SHR_I49_1:%.*]] = lshr i32 [[CONV_2]], 15 -; CHECK-NEXT: [[AND_I50_1:%.*]] = and i32 [[SHR_I49_1]], 65537 -; CHECK-NEXT: [[MUL_I51_1:%.*]] = mul i32 [[AND_I50_1]], 65535 -; CHECK-NEXT: [[TMP86:%.*]] = extractelement <2 x i32> [[TMP222]], i32 0 -; CHECK-NEXT: [[TMP87:%.*]] = extractelement <2 x i32> [[TMP222]], i32 1 -; CHECK-NEXT: [[ADD94_3:%.*]] = add i32 [[TMP86]], [[TMP87]] -; CHECK-NEXT: [[ADD112_1:%.*]] = sub i32 [[TMP87]], [[TMP86]] -; CHECK-NEXT: [[SHR_I49_2:%.*]] = lshr i32 [[CONV_1]], 15 -; CHECK-NEXT: [[AND_I50_2:%.*]] = and i32 [[SHR_I49_2]], 65537 -; CHECK-NEXT: [[MUL_I51_2:%.*]] = mul i32 [[AND_I50_2]], 65535 -; CHECK-NEXT: [[TMP88:%.*]] = extractelement <2 x i32> [[TMP85]], i32 0 -; CHECK-NEXT: [[TMP89:%.*]] = extractelement <2 x i32> [[TMP85]], i32 1 -; CHECK-NEXT: [[ADD94_4:%.*]] = add i32 [[TMP88]], [[TMP89]] -; CHECK-NEXT: [[SUB102_3:%.*]] = sub i32 [[TMP89]], [[TMP88]] -; CHECK-NEXT: [[SHR_I49_3:%.*]] = lshr i32 [[CONV1]], 15 -; CHECK-NEXT: [[AND_I50_3:%.*]] = and i32 [[SHR_I49_3]], 65537 -; CHECK-NEXT: [[MUL_I51_3:%.*]] = mul i32 [[AND_I50_3]], 65535 -; CHECK-NEXT: [[TMP90:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8]], align 1 -; CHECK-NEXT: [[TMP102:%.*]] = zext <2 x i8> [[TMP90]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr null, align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[PIX1]], align 1 ; CHECK-NEXT: [[TMP92:%.*]] = load <4 x i8>, ptr [[PIX2]], align 1 -; CHECK-NEXT: [[TMP93:%.*]] = shufflevector <4 x i8> [[TMP92]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP94:%.*]] = zext <2 x i8> [[TMP93]] to <2 x i32> ; CHECK-NEXT: [[TMP95:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 -; CHECK-NEXT: [[TMP96:%.*]] = shufflevector <4 x i8> [[TMP95]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP97:%.*]] = zext <2 x i8> [[TMP96]] to <2 x i32> ; CHECK-NEXT: [[TMP98:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 -; CHECK-NEXT: [[TMP99:%.*]] = shufflevector <4 x i8> [[TMP98]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP100:%.*]] = zext <2 x i8> [[TMP99]] to <2 x i32> -; CHECK-NEXT: [[TMP101:%.*]] = sub <2 x i32> [[TMP97]], [[TMP100]] -; CHECK-NEXT: [[TMP224:%.*]] = shl <2 x i32> [[TMP101]], splat (i32 16) -; CHECK-NEXT: [[TMP103:%.*]] = shufflevector <4 x i8> [[TMP92]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP104:%.*]] = zext <2 x i8> [[TMP103]] to <2 x i32> -; CHECK-NEXT: [[TMP105:%.*]] = shufflevector <4 x i8> [[TMP95]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP106:%.*]] = zext <2 x i8> [[TMP105]] to <2 x i32> -; CHECK-NEXT: [[TMP107:%.*]] = shufflevector <4 x i8> [[TMP98]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP108:%.*]] = zext <2 x i8> [[TMP107]] to <2 x i32> -; CHECK-NEXT: [[TMP109:%.*]] = sub <2 x i32> [[TMP106]], [[TMP108]] -; CHECK-NEXT: [[TMP110:%.*]] = shl <2 x i32> [[TMP109]], splat (i32 16) -; CHECK-NEXT: [[TMP111:%.*]] = insertelement <2 x i32> [[TMP102]], i32 [[CONV33]], i32 1 -; CHECK-NEXT: [[TMP112:%.*]] = sub <2 x i32> [[TMP111]], [[TMP104]] -; CHECK-NEXT: [[TMP113:%.*]] = add <2 x i32> [[TMP110]], [[TMP112]] -; CHECK-NEXT: [[TMP114:%.*]] = insertelement <2 x i32> [[TMP102]], i32 [[CONV1]], i32 0 -; CHECK-NEXT: [[TMP115:%.*]] = sub <2 x i32> [[TMP114]], [[TMP94]] -; CHECK-NEXT: [[TMP116:%.*]] = add <2 x i32> [[TMP224]], [[TMP115]] -; CHECK-NEXT: [[TMP117:%.*]] = shufflevector <2 x i32> [[TMP113]], <2 x i32> [[TMP116]], <2 x i32> -; CHECK-NEXT: [[TMP126:%.*]] = add <2 x i32> [[TMP113]], [[TMP116]] -; CHECK-NEXT: [[TMP119:%.*]] = sub <2 x i32> [[TMP116]], [[TMP113]] -; CHECK-NEXT: [[TMP120:%.*]] = extractelement <2 x i32> [[TMP126]], i32 0 -; CHECK-NEXT: [[TMP127:%.*]] = extractelement <2 x i32> [[TMP126]], i32 1 -; CHECK-NEXT: [[ADD48:%.*]] = add i32 [[TMP127]], [[TMP120]] -; CHECK-NEXT: [[TMP166:%.*]] = sub i32 [[TMP120]], [[TMP127]] -; CHECK-NEXT: [[TMP128:%.*]] = extractelement <2 x i32> [[TMP119]], i32 0 -; CHECK-NEXT: [[TMP129:%.*]] = extractelement <2 x i32> [[TMP119]], i32 1 -; CHECK-NEXT: [[ADD55:%.*]] = add i32 [[TMP129]], [[TMP128]] -; CHECK-NEXT: [[SUB60:%.*]] = sub i32 [[TMP128]], [[TMP129]] -; CHECK-NEXT: [[SHR_I59:%.*]] = lshr i32 [[TMP127]], 15 -; CHECK-NEXT: [[AND_I60:%.*]] = and i32 [[SHR_I59]], 65537 -; CHECK-NEXT: [[MUL_I61:%.*]] = mul i32 [[AND_I60]], 65535 -; CHECK-NEXT: [[SHR_I59_1:%.*]] = lshr i32 [[TMP129]], 15 -; CHECK-NEXT: [[AND_I60_1:%.*]] = and i32 [[SHR_I59_1]], 65537 -; CHECK-NEXT: [[MUL_I61_1:%.*]] = mul i32 [[AND_I60_1]], 65535 -; CHECK-NEXT: [[TMP130:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8_1]], align 1 -; CHECK-NEXT: [[TMP131:%.*]] = zext <2 x i8> [[TMP130]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i8>, ptr [[ADD_PTR3]], align 1 ; CHECK-NEXT: [[TMP132:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1 -; CHECK-NEXT: [[TMP133:%.*]] = shufflevector <4 x i8> [[TMP132]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP134:%.*]] = zext <2 x i8> [[TMP133]] to <2 x i32> ; CHECK-NEXT: [[TMP135:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 -; CHECK-NEXT: [[TMP136:%.*]] = shufflevector <4 x i8> [[TMP135]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP137:%.*]] = zext <2 x i8> [[TMP136]] to <2 x i32> ; CHECK-NEXT: [[TMP138:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 -; CHECK-NEXT: [[TMP139:%.*]] = shufflevector <4 x i8> [[TMP138]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP140:%.*]] = zext <2 x i8> [[TMP139]] to <2 x i32> -; CHECK-NEXT: [[TMP141:%.*]] = sub <2 x i32> [[TMP137]], [[TMP140]] -; CHECK-NEXT: [[TMP142:%.*]] = shl <2 x i32> [[TMP141]], splat (i32 16) -; CHECK-NEXT: [[TMP143:%.*]] = shufflevector <4 x i8> [[TMP132]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP144:%.*]] = zext <2 x i8> [[TMP143]] to <2 x i32> -; CHECK-NEXT: [[TMP145:%.*]] = shufflevector <4 x i8> [[TMP135]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP146:%.*]] = zext <2 x i8> [[TMP145]] to <2 x i32> -; CHECK-NEXT: [[TMP147:%.*]] = shufflevector <4 x i8> [[TMP138]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP148:%.*]] = zext <2 x i8> [[TMP147]] to <2 x i32> -; CHECK-NEXT: [[TMP149:%.*]] = sub <2 x i32> [[TMP146]], [[TMP148]] -; CHECK-NEXT: [[TMP150:%.*]] = shl <2 x i32> [[TMP149]], splat (i32 16) -; CHECK-NEXT: [[TMP151:%.*]] = insertelement <2 x i32> [[TMP131]], i32 [[CONV33_1]], i32 1 -; CHECK-NEXT: [[TMP225:%.*]] = sub <2 x i32> [[TMP151]], [[TMP144]] -; CHECK-NEXT: [[TMP153:%.*]] = add <2 x i32> [[TMP150]], [[TMP225]] -; CHECK-NEXT: [[TMP154:%.*]] = insertelement <2 x i32> [[TMP131]], i32 [[CONV_1]], i32 0 -; CHECK-NEXT: [[TMP155:%.*]] = sub <2 x i32> [[TMP154]], [[TMP134]] -; CHECK-NEXT: [[TMP156:%.*]] = add <2 x i32> [[TMP142]], [[TMP155]] -; CHECK-NEXT: [[TMP157:%.*]] = add <2 x i32> [[TMP153]], [[TMP156]] -; CHECK-NEXT: [[TMP158:%.*]] = sub <2 x i32> [[TMP156]], [[TMP153]] -; CHECK-NEXT: [[TMP159:%.*]] = extractelement <2 x i32> [[TMP157]], i32 0 -; CHECK-NEXT: [[TMP160:%.*]] = extractelement <2 x i32> [[TMP157]], i32 1 -; CHECK-NEXT: [[ADD48_1:%.*]] = add i32 [[TMP160]], [[TMP159]] -; CHECK-NEXT: [[SUB51_1:%.*]] = sub i32 [[TMP159]], [[TMP160]] -; CHECK-NEXT: [[TMP161:%.*]] = extractelement <2 x i32> [[TMP158]], i32 0 -; CHECK-NEXT: [[TMP162:%.*]] = extractelement <2 x i32> [[TMP158]], i32 1 -; CHECK-NEXT: [[ADD55_1:%.*]] = add i32 [[TMP162]], [[TMP161]] -; CHECK-NEXT: [[SUB59_2:%.*]] = sub i32 [[TMP161]], [[TMP162]] -; CHECK-NEXT: [[SHR_I54:%.*]] = lshr i32 [[TMP160]], 15 -; CHECK-NEXT: [[AND_I55:%.*]] = and i32 [[SHR_I54]], 65537 -; CHECK-NEXT: [[MUL_I56:%.*]] = mul i32 [[AND_I55]], 65535 -; CHECK-NEXT: [[SHR_I54_1:%.*]] = lshr i32 [[TMP162]], 15 -; CHECK-NEXT: [[AND_I55_1:%.*]] = and i32 [[SHR_I54_1]], 65537 -; CHECK-NEXT: [[MUL_I56_1:%.*]] = mul i32 [[AND_I55_1]], 65535 -; CHECK-NEXT: [[TMP163:%.*]] = lshr <2 x i32> [[TMP131]], splat (i32 15) -; CHECK-NEXT: [[TMP164:%.*]] = and <2 x i32> [[TMP163]], splat (i32 65537) -; CHECK-NEXT: [[TMP165:%.*]] = mul <2 x i32> [[TMP164]], splat (i32 65535) -; CHECK-NEXT: [[ADD78:%.*]] = add i32 [[ADD48_1]], [[ADD48]] -; CHECK-NEXT: [[SUB86:%.*]] = sub i32 [[ADD48]], [[ADD48_1]] -; CHECK-NEXT: [[ADD103:%.*]] = add i32 [[ADD95]], [[ADD78]] -; CHECK-NEXT: [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD95]] -; CHECK-NEXT: [[ADD105:%.*]] = add i32 [[SUB86_3]], [[SUB86]] -; CHECK-NEXT: [[SUB106:%.*]] = sub i32 [[SUB86]], [[SUB86_3]] -; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I_1]], [[ADD103]] -; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[TMP77]] -; CHECK-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51_1]], [[ADD105]] -; CHECK-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[CONV_2]] -; CHECK-NEXT: [[ADD_I57:%.*]] = add i32 [[MUL_I56]], [[SUB104]] -; CHECK-NEXT: [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[TMP160]] -; CHECK-NEXT: [[ADD_I62:%.*]] = add i32 [[MUL_I61]], [[SUB106]] -; CHECK-NEXT: [[XOR_I63:%.*]] = xor i32 [[ADD_I62]], [[TMP127]] -; CHECK-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I53]], [[XOR_I]] -; CHECK-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I58]] -; CHECK-NEXT: [[ADD105_3:%.*]] = add i32 [[ADD112]], [[XOR_I63]] -; CHECK-NEXT: [[TMP169:%.*]] = load <2 x i8>, ptr [[ADD_PTR_1]], align 1 -; CHECK-NEXT: [[TMP181:%.*]] = zext <2 x i8> [[TMP169]] to <2 x i32> -; CHECK-NEXT: [[TMP152:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_2]], i32 0 -; CHECK-NEXT: [[TMP182:%.*]] = shufflevector <2 x i32> [[TMP152]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP183:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_3]], i32 0 -; CHECK-NEXT: [[TMP184:%.*]] = shufflevector <2 x i32> [[TMP183]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP191:%.*]] = sub <2 x i32> [[TMP182]], [[TMP184]] -; CHECK-NEXT: [[TMP192:%.*]] = add <2 x i32> [[TMP182]], [[TMP184]] -; CHECK-NEXT: [[TMP194:%.*]] = shufflevector <2 x i32> [[TMP191]], <2 x i32> [[TMP192]], <2 x i32> -; CHECK-NEXT: [[TMP195:%.*]] = lshr <2 x i32> [[TMP181]], splat (i32 15) -; CHECK-NEXT: [[TMP196:%.*]] = and <2 x i32> [[TMP195]], splat (i32 65537) -; CHECK-NEXT: [[TMP198:%.*]] = mul <2 x i32> [[TMP196]], splat (i32 65535) -; CHECK-NEXT: [[TMP202:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55]], i32 0 -; CHECK-NEXT: [[TMP203:%.*]] = shufflevector <2 x i32> [[TMP202]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP205:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_1]], i32 0 -; CHECK-NEXT: [[TMP206:%.*]] = shufflevector <2 x i32> [[TMP205]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP207:%.*]] = sub <2 x i32> [[TMP203]], [[TMP206]] -; CHECK-NEXT: [[TMP210:%.*]] = add <2 x i32> [[TMP203]], [[TMP206]] -; CHECK-NEXT: [[TMP168:%.*]] = shufflevector <2 x i32> [[TMP207]], <2 x i32> [[TMP210]], <2 x i32> -; CHECK-NEXT: [[ADD94_1:%.*]] = extractelement <2 x i32> [[TMP194]], i32 1 -; CHECK-NEXT: [[ADD78_1:%.*]] = extractelement <2 x i32> [[TMP168]], i32 1 -; CHECK-NEXT: [[SUB104_1:%.*]] = sub i32 [[ADD78_1]], [[ADD94_1]] -; CHECK-NEXT: [[TMP220:%.*]] = add <2 x i32> [[TMP194]], [[TMP168]] -; CHECK-NEXT: [[SUB102_1:%.*]] = extractelement <2 x i32> [[TMP194]], i32 0 -; CHECK-NEXT: [[SUB86_1:%.*]] = extractelement <2 x i32> [[TMP168]], i32 0 -; CHECK-NEXT: [[TMP174:%.*]] = shufflevector <2 x i32> [[TMP168]], <2 x i32> [[TMP194]], <2 x i32> -; CHECK-NEXT: [[SUB106_1:%.*]] = sub i32 [[SUB86_1]], [[SUB102_1]] -; CHECK-NEXT: [[TMP175:%.*]] = add <2 x i32> [[TMP198]], [[TMP220]] -; CHECK-NEXT: [[TMP221:%.*]] = xor <2 x i32> [[TMP175]], [[TMP181]] -; CHECK-NEXT: [[ADD_I57_1:%.*]] = add i32 [[MUL_I56_1]], [[SUB104_1]] -; CHECK-NEXT: [[XOR_I58_1:%.*]] = xor i32 [[ADD_I57_1]], [[TMP162]] -; CHECK-NEXT: [[ADD_I62_1:%.*]] = add i32 [[MUL_I61_1]], [[SUB106_1]] -; CHECK-NEXT: [[XOR_I63_1:%.*]] = xor i32 [[ADD_I62_1]], [[TMP129]] -; CHECK-NEXT: [[XOR_I53_1:%.*]] = extractelement <2 x i32> [[TMP221]], i32 0 -; CHECK-NEXT: [[ADD108_1:%.*]] = add i32 [[XOR_I53_1]], [[ADD105_3]] -; CHECK-NEXT: [[XOR_I_1:%.*]] = extractelement <2 x i32> [[TMP221]], i32 1 -; CHECK-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[XOR_I_1]] -; CHECK-NEXT: [[ADD112_5:%.*]] = add i32 [[ADD110_1]], [[XOR_I58_1]] -; CHECK-NEXT: [[ADD113_2:%.*]] = add i32 [[ADD112_5]], [[XOR_I63_1]] -; CHECK-NEXT: [[ADD78_3:%.*]] = add i32 [[SUB51_1]], [[TMP166]] -; CHECK-NEXT: [[TMP204:%.*]] = sub i32 [[TMP166]], [[SUB51_1]] -; CHECK-NEXT: [[TMP177:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_3]], i32 0 -; CHECK-NEXT: [[TMP178:%.*]] = shufflevector <2 x i32> [[TMP177]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP179:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_3]], i32 0 -; CHECK-NEXT: [[TMP180:%.*]] = shufflevector <2 x i32> [[TMP179]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP199:%.*]] = add <2 x i32> [[TMP178]], [[TMP180]] -; CHECK-NEXT: [[TMP200:%.*]] = sub <2 x i32> [[TMP178]], [[TMP180]] -; CHECK-NEXT: [[TMP201:%.*]] = shufflevector <2 x i32> [[TMP199]], <2 x i32> [[TMP200]], <2 x i32> -; CHECK-NEXT: [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[TMP204]] -; CHECK-NEXT: [[SUB106_2:%.*]] = sub i32 [[TMP204]], [[ADD112_1]] -; CHECK-NEXT: [[ADD_I52_2:%.*]] = add i32 [[MUL_I51_2]], [[ADD113_1]] -; CHECK-NEXT: [[XOR_I53_2:%.*]] = xor i32 [[ADD_I52_2]], [[CONV_1]] -; CHECK-NEXT: [[TMP208:%.*]] = add <2 x i32> [[TMP165]], [[TMP201]] -; CHECK-NEXT: [[TMP209:%.*]] = xor <2 x i32> [[TMP208]], [[TMP131]] -; CHECK-NEXT: [[SHR_I59_2:%.*]] = lshr i32 [[TMP120]], 15 -; CHECK-NEXT: [[AND_I60_2:%.*]] = and i32 [[SHR_I59_2]], 65537 -; CHECK-NEXT: [[MUL_I61_2:%.*]] = mul i32 [[AND_I60_2]], 65535 -; CHECK-NEXT: [[ADD_I62_2:%.*]] = add i32 [[MUL_I61_2]], [[SUB106_2]] -; CHECK-NEXT: [[XOR_I63_4:%.*]] = xor i32 [[ADD_I62_2]], [[TMP120]] -; CHECK-NEXT: [[ADD108_2:%.*]] = add i32 [[XOR_I53_2]], [[ADD113_2]] -; CHECK-NEXT: [[TMP211:%.*]] = extractelement <2 x i32> [[TMP209]], i32 0 -; CHECK-NEXT: [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[TMP211]] -; CHECK-NEXT: [[TMP212:%.*]] = extractelement <2 x i32> [[TMP209]], i32 1 -; CHECK-NEXT: [[ADD112_4:%.*]] = add i32 [[ADD110_2]], [[TMP212]] -; CHECK-NEXT: [[ADD113_4:%.*]] = add i32 [[ADD112_4]], [[XOR_I63_4]] -; CHECK-NEXT: [[ADD78_4:%.*]] = add i32 [[SUB59_2]], [[SUB60]] -; CHECK-NEXT: [[SUB86_4:%.*]] = sub i32 [[SUB60]], [[SUB59_2]] -; CHECK-NEXT: [[TMP213:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_4]], i32 0 -; CHECK-NEXT: [[TMP214:%.*]] = shufflevector <2 x i32> [[TMP213]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP215:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_4]], i32 0 -; CHECK-NEXT: [[TMP216:%.*]] = shufflevector <2 x i32> [[TMP215]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP217:%.*]] = add <2 x i32> [[TMP214]], [[TMP216]] -; CHECK-NEXT: [[TMP218:%.*]] = sub <2 x i32> [[TMP214]], [[TMP216]] -; CHECK-NEXT: [[TMP219:%.*]] = shufflevector <2 x i32> [[TMP217]], <2 x i32> [[TMP218]], <2 x i32> -; CHECK-NEXT: [[ADD105_4:%.*]] = add i32 [[SUB102_3]], [[SUB86_4]] -; CHECK-NEXT: [[SUB106_3:%.*]] = sub i32 [[SUB86_4]], [[SUB102_3]] -; CHECK-NEXT: [[ADD_I52_4:%.*]] = add i32 [[MUL_I51_3]], [[ADD105_4]] -; CHECK-NEXT: [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_4]], [[CONV1]] -; CHECK-NEXT: [[TMP185:%.*]] = lshr <2 x i32> [[TMP102]], splat (i32 15) -; CHECK-NEXT: [[TMP193:%.*]] = and <2 x i32> [[TMP185]], splat (i32 65537) -; CHECK-NEXT: [[TMP186:%.*]] = mul <2 x i32> [[TMP193]], splat (i32 65535) -; CHECK-NEXT: [[TMP187:%.*]] = add <2 x i32> [[TMP186]], [[TMP219]] -; CHECK-NEXT: [[TMP188:%.*]] = xor <2 x i32> [[TMP187]], [[TMP102]] -; CHECK-NEXT: [[SHR_I59_3:%.*]] = lshr i32 [[CONV33]], 15 -; CHECK-NEXT: [[AND_I60_3:%.*]] = and i32 [[SHR_I59_3]], 65537 -; CHECK-NEXT: [[MUL_I61_3:%.*]] = mul i32 [[AND_I60_3]], 65535 -; CHECK-NEXT: [[ADD_I62_3:%.*]] = add i32 [[MUL_I61_3]], [[SUB106_3]] -; CHECK-NEXT: [[XOR_I63_3:%.*]] = xor i32 [[ADD_I62_3]], [[CONV33]] -; CHECK-NEXT: [[ADD108_3:%.*]] = add i32 [[XOR_I53_3]], [[ADD113_4]] -; CHECK-NEXT: [[TMP189:%.*]] = extractelement <2 x i32> [[TMP188]], i32 0 -; CHECK-NEXT: [[ADD110_3:%.*]] = add i32 [[ADD108_3]], [[TMP189]] -; CHECK-NEXT: [[TMP190:%.*]] = extractelement <2 x i32> [[TMP188]], i32 1 -; CHECK-NEXT: [[ADD112_3:%.*]] = add i32 [[ADD110_3]], [[TMP190]] -; CHECK-NEXT: [[ADD113_3:%.*]] = add i32 [[ADD112_3]], [[XOR_I63_3]] +; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i8>, ptr null, align 1 +; CHECK-NEXT: [[TMP15:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP10]], i64 0) +; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP15]], <4 x i8> [[TMP14]], i64 4) +; CHECK-NEXT: [[TMP17:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP16]], <4 x i8> [[TMP2]], i64 8) +; CHECK-NEXT: [[TMP18:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP17]], <4 x i8> [[TMP6]], i64 12) +; CHECK-NEXT: [[TMP19:%.*]] = zext <16 x i8> [[TMP18]] to <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = load <4 x i8>, ptr null, align 1 +; CHECK-NEXT: [[TMP21:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP11]], i64 0) +; CHECK-NEXT: [[TMP22:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP21]], <4 x i8> [[TMP20]], i64 4) +; CHECK-NEXT: [[TMP23:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP22]], <4 x i8> [[TMP92]], i64 8) +; CHECK-NEXT: [[TMP24:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP23]], <4 x i8> [[TMP132]], i64 12) +; CHECK-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = sub <16 x i32> [[TMP19]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <16 x i32> [[TMP26]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <2 x i8> [[TMP28]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> [[TMP29]], <16 x i32> +; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP135]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <16 x i8> [[TMP30]], <16 x i8> [[TMP31]], <16 x i32> +; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP95]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <16 x i8> [[TMP32]], <16 x i8> [[TMP33]], <16 x i32> +; CHECK-NEXT: [[TMP35:%.*]] = insertelement <16 x i8> [[TMP34]], i8 [[TMP3]], i32 5 +; CHECK-NEXT: [[TMP36:%.*]] = insertelement <16 x i8> [[TMP35]], i8 [[TMP52]], i32 9 +; CHECK-NEXT: [[TMP37:%.*]] = zext <16 x i8> [[TMP36]] to <16 x i32> +; CHECK-NEXT: [[TMP38:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 +; CHECK-NEXT: [[TMP39:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP13]], i64 0) +; CHECK-NEXT: [[TMP40:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP39]], <4 x i8> [[TMP38]], i64 4) +; CHECK-NEXT: [[TMP41:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP40]], <4 x i8> [[TMP98]], i64 8) +; CHECK-NEXT: [[TMP42:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP41]], <4 x i8> [[TMP138]], i64 12) +; CHECK-NEXT: [[TMP43:%.*]] = zext <16 x i8> [[TMP42]] to <16 x i32> +; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP45:%.*]] = sub <16 x i32> [[TMP37]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = shl <16 x i32> [[TMP45]], splat (i32 16) +; CHECK-NEXT: [[TMP47:%.*]] = add <16 x i32> [[TMP46]], [[TMP27]] +; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <16 x i32> [[TMP47]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP49:%.*]] = add <16 x i32> [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = sub <16 x i32> [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP49]], <16 x i32> [[TMP50]], <16 x i32> +; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = add <16 x i32> [[TMP51]], [[TMP70]] +; CHECK-NEXT: [[TMP54:%.*]] = sub <16 x i32> [[TMP51]], [[TMP70]] +; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP57:%.*]] = sub <16 x i32> [[TMP55]], [[TMP56]] +; CHECK-NEXT: [[TMP58:%.*]] = add <16 x i32> [[TMP55]], [[TMP56]] +; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> +; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = add <16 x i32> [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = sub <16 x i32> [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <16 x i32> +; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> [[TMP19]], <16 x i32> +; CHECK-NEXT: [[TMP65:%.*]] = lshr <16 x i32> [[TMP64]], splat (i32 15) +; CHECK-NEXT: [[TMP66:%.*]] = and <16 x i32> [[TMP65]], splat (i32 65537) +; CHECK-NEXT: [[TMP67:%.*]] = mul <16 x i32> [[TMP66]], splat (i32 65535) +; CHECK-NEXT: [[TMP68:%.*]] = add <16 x i32> [[TMP67]], [[TMP63]] +; CHECK-NEXT: [[TMP69:%.*]] = xor <16 x i32> [[TMP68]], [[TMP64]] +; CHECK-NEXT: [[ADD113_3:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]]) ; CHECK-NEXT: ret i32 [[ADD113_3]] ; ; THR15-LABEL: define i32 @test( ; THR15-SAME: ptr [[PIX1:%.*]], ptr [[PIX2:%.*]], i64 [[IDX_EXT:%.*]], i64 [[IDX_EXT63:%.*]], ptr [[ADD_PTR:%.*]], ptr [[ADD_PTR64:%.*]]) #[[ATTR0:[0-9]+]] { ; THR15-NEXT: entry: -; THR15-NEXT: [[TMP0:%.*]] = load i8, ptr [[PIX1]], align 1 -; THR15-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i32 ; THR15-NEXT: [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 4 ; THR15-NEXT: [[ARRAYIDX5:%.*]] = getelementptr i8, ptr [[PIX2]], i64 4 -; THR15-NEXT: [[ARRAYIDX8:%.*]] = getelementptr i8, ptr [[PIX1]], i64 1 -; THR15-NEXT: [[ARRAYIDX32:%.*]] = getelementptr i8, ptr [[PIX1]], i64 3 -; THR15-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1 -; THR15-NEXT: [[CONV33:%.*]] = zext i8 [[TMP1]] to i32 ; THR15-NEXT: [[ADD_PTR3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 [[IDX_EXT]] ; THR15-NEXT: [[ADD_PTR644:%.*]] = getelementptr i8, ptr [[PIX2]], i64 [[IDX_EXT63]] -; THR15-NEXT: [[TMP2:%.*]] = load i8, ptr [[ADD_PTR3]], align 1 -; THR15-NEXT: [[CONV_1:%.*]] = zext i8 [[TMP2]] to i32 ; THR15-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 4 ; THR15-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 4 -; THR15-NEXT: [[ARRAYIDX8_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 1 -; THR15-NEXT: [[ARRAYIDX27_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 3 -; THR15-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX27_1]], align 1 -; THR15-NEXT: [[CONV33_1:%.*]] = zext i8 [[TMP5]] to i32 ; THR15-NEXT: [[ADD_PTR_1:%.*]] = getelementptr i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]] ; THR15-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]] ; THR15-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4 ; THR15-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4 -; THR15-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 1 -; THR15-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 -; THR15-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1 -; THR15-NEXT: [[TMP6:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1 -; THR15-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP20:%.*]] = zext <2 x i8> [[TMP19]] to <2 x i32> -; THR15-NEXT: [[TMP87:%.*]] = zext i8 [[TMP6]] to i32 -; THR15-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 -; THR15-NEXT: [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP22:%.*]] = zext <2 x i8> [[TMP21]] to <2 x i32> -; THR15-NEXT: [[TMP23:%.*]] = sub <2 x i32> [[TMP20]], [[TMP22]] -; THR15-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 -; THR15-NEXT: [[TMP24:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP25:%.*]] = zext <2 x i8> [[TMP24]] to <2 x i32> -; THR15-NEXT: [[TMP16:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 -; THR15-NEXT: [[TMP26:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP27:%.*]] = zext <2 x i8> [[TMP26]] to <2 x i32> -; THR15-NEXT: [[TMP28:%.*]] = sub <2 x i32> [[TMP25]], [[TMP27]] -; THR15-NEXT: [[TMP29:%.*]] = shl <2 x i32> [[TMP28]], splat (i32 16) -; THR15-NEXT: [[TMP59:%.*]] = add <2 x i32> [[TMP29]], [[TMP23]] -; THR15-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP32:%.*]] = zext <2 x i8> [[TMP31]] to <2 x i32> -; THR15-NEXT: [[TMP86:%.*]] = zext i8 [[TMP7]] to i32 -; THR15-NEXT: [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP34:%.*]] = zext <2 x i8> [[TMP33]] to <2 x i32> -; THR15-NEXT: [[TMP35:%.*]] = sub <2 x i32> [[TMP32]], [[TMP34]] -; THR15-NEXT: [[TMP36:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP37:%.*]] = zext <2 x i8> [[TMP36]] to <2 x i32> -; THR15-NEXT: [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP39:%.*]] = zext <2 x i8> [[TMP38]] to <2 x i32> -; THR15-NEXT: [[TMP40:%.*]] = sub <2 x i32> [[TMP37]], [[TMP39]] -; THR15-NEXT: [[TMP41:%.*]] = shl <2 x i32> [[TMP40]], splat (i32 16) -; THR15-NEXT: [[TMP76:%.*]] = add <2 x i32> [[TMP41]], [[TMP35]] -; THR15-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP76]], [[TMP59]] -; THR15-NEXT: [[TMP42:%.*]] = sub <2 x i32> [[TMP59]], [[TMP76]] -; THR15-NEXT: [[TMP43:%.*]] = extractelement <2 x i32> [[TMP30]], i32 0 -; THR15-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[TMP30]], i32 1 -; THR15-NEXT: [[ADD44_2:%.*]] = add i32 [[TMP44]], [[TMP43]] -; THR15-NEXT: [[TMP45:%.*]] = extractelement <2 x i32> [[TMP42]], i32 0 -; THR15-NEXT: [[TMP46:%.*]] = extractelement <2 x i32> [[TMP42]], i32 1 -; THR15-NEXT: [[ADD46_2:%.*]] = add i32 [[TMP46]], [[TMP45]] ; THR15-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr i8, ptr null, i64 4 -; THR15-NEXT: [[TMP47:%.*]] = load <2 x i8>, ptr null, align 1 ; THR15-NEXT: [[TMP48:%.*]] = load i8, ptr null, align 1 -; THR15-NEXT: [[TMP49:%.*]] = zext <2 x i8> [[TMP47]] to <2 x i32> -; THR15-NEXT: [[CONV_3:%.*]] = zext i8 [[TMP48]] to i32 -; THR15-NEXT: [[TMP50:%.*]] = load <2 x i8>, ptr null, align 1 -; THR15-NEXT: [[TMP51:%.*]] = zext <2 x i8> [[TMP50]] to <2 x i32> -; THR15-NEXT: [[TMP52:%.*]] = sub <2 x i32> [[TMP49]], [[TMP51]] -; THR15-NEXT: [[TMP53:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) -; THR15-NEXT: [[TMP54:%.*]] = zext <2 x i8> [[TMP53]] to <2 x i32> -; THR15-NEXT: [[TMP77:%.*]] = shufflevector <2 x i32> [[TMP54]], <2 x i32> poison, <2 x i32> -; THR15-NEXT: [[TMP55:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_3]], align 1 -; THR15-NEXT: [[TMP56:%.*]] = zext <2 x i8> [[TMP55]] to <2 x i32> -; THR15-NEXT: [[TMP57:%.*]] = sub <2 x i32> [[TMP77]], [[TMP56]] -; THR15-NEXT: [[TMP58:%.*]] = shl <2 x i32> [[TMP57]], splat (i32 16) -; THR15-NEXT: [[TMP72:%.*]] = add <2 x i32> [[TMP58]], [[TMP52]] -; THR15-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2 -; THR15-NEXT: [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2 -; THR15-NEXT: [[ARRAYIDX27_3:%.*]] = getelementptr i8, ptr null, i64 6 -; THR15-NEXT: [[TMP60:%.*]] = load <2 x i8>, ptr [[ARRAYIDX20_3]], align 1 -; THR15-NEXT: [[TMP61:%.*]] = zext <2 x i8> [[TMP60]] to <2 x i32> -; THR15-NEXT: [[TMP62:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_3]], align 1 -; THR15-NEXT: [[TMP63:%.*]] = zext <2 x i8> [[TMP62]] to <2 x i32> -; THR15-NEXT: [[TMP64:%.*]] = sub <2 x i32> [[TMP61]], [[TMP63]] -; THR15-NEXT: [[TMP65:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> zeroinitializer, i32 1, <2 x i1> splat (i1 true), <2 x i8> poison) -; THR15-NEXT: [[TMP66:%.*]] = zext <2 x i8> [[TMP65]] to <2 x i32> -; THR15-NEXT: [[TMP67:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_3]], align 1 -; THR15-NEXT: [[TMP68:%.*]] = zext <2 x i8> [[TMP67]] to <2 x i32> -; THR15-NEXT: [[TMP69:%.*]] = sub <2 x i32> [[TMP66]], [[TMP68]] -; THR15-NEXT: [[TMP70:%.*]] = shl <2 x i32> [[TMP69]], splat (i32 16) -; THR15-NEXT: [[TMP73:%.*]] = add <2 x i32> [[TMP70]], [[TMP64]] -; THR15-NEXT: [[TMP74:%.*]] = extractelement <2 x i32> [[TMP72]], i32 0 -; THR15-NEXT: [[TMP75:%.*]] = extractelement <2 x i32> [[TMP72]], i32 1 -; THR15-NEXT: [[ADD48_3:%.*]] = add i32 [[TMP75]], [[TMP74]] -; THR15-NEXT: [[SUB45_3:%.*]] = sub i32 [[TMP74]], [[TMP75]] -; THR15-NEXT: [[TMP80:%.*]] = extractelement <2 x i32> [[TMP73]], i32 0 -; THR15-NEXT: [[TMP81:%.*]] = extractelement <2 x i32> [[TMP73]], i32 1 -; THR15-NEXT: [[ADD55_3:%.*]] = add i32 [[TMP81]], [[TMP80]] -; THR15-NEXT: [[SUB47_3:%.*]] = sub i32 [[TMP80]], [[TMP81]] -; THR15-NEXT: [[ADD48_4:%.*]] = add i32 [[ADD55_3]], [[ADD48_3]] -; THR15-NEXT: [[TMP78:%.*]] = shufflevector <2 x i32> [[TMP30]], <2 x i32> poison, <2 x i32> -; THR15-NEXT: [[TMP71:%.*]] = insertelement <2 x i32> [[TMP78]], i32 [[ADD48_3]], i32 0 -; THR15-NEXT: [[TMP83:%.*]] = insertelement <2 x i32> [[TMP30]], i32 [[ADD55_3]], i32 0 -; THR15-NEXT: [[TMP79:%.*]] = sub <2 x i32> [[TMP71]], [[TMP83]] -; THR15-NEXT: [[ADD55_4:%.*]] = add i32 [[SUB47_3]], [[SUB45_3]] -; THR15-NEXT: [[TMP137:%.*]] = shufflevector <2 x i32> [[TMP42]], <2 x i32> poison, <2 x i32> -; THR15-NEXT: [[TMP82:%.*]] = insertelement <2 x i32> [[TMP137]], i32 [[SUB45_3]], i32 0 -; THR15-NEXT: [[TMP84:%.*]] = insertelement <2 x i32> [[TMP42]], i32 [[SUB47_3]], i32 0 -; THR15-NEXT: [[TMP85:%.*]] = sub <2 x i32> [[TMP82]], [[TMP84]] -; THR15-NEXT: [[ADD94:%.*]] = add i32 [[ADD48_4]], [[ADD44_2]] -; THR15-NEXT: [[SUB102:%.*]] = sub i32 [[ADD44_2]], [[ADD48_4]] -; THR15-NEXT: [[SHR_I:%.*]] = lshr i32 [[CONV_3]], 15 -; THR15-NEXT: [[AND_I:%.*]] = and i32 [[SHR_I]], 65537 -; THR15-NEXT: [[MUL_I:%.*]] = mul i32 [[AND_I]], 65535 -; THR15-NEXT: [[SHR_I49:%.*]] = lshr i32 [[TMP44]], 15 -; THR15-NEXT: [[AND_I50:%.*]] = and i32 [[SHR_I49]], 65537 -; THR15-NEXT: [[MUL_I51:%.*]] = mul i32 [[AND_I50]], 65535 -; THR15-NEXT: [[ADD94_1:%.*]] = add i32 [[ADD55_4]], [[ADD46_2]] -; THR15-NEXT: [[SUB102_1:%.*]] = sub i32 [[ADD46_2]], [[ADD55_4]] -; THR15-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[TMP86]], 15 -; THR15-NEXT: [[AND_I_1:%.*]] = and i32 [[SHR_I_1]], 65537 -; THR15-NEXT: [[MUL_I_1:%.*]] = mul i32 [[AND_I_1]], 65535 -; THR15-NEXT: [[SHR_I49_1:%.*]] = lshr i32 [[TMP87]], 15 -; THR15-NEXT: [[AND_I50_1:%.*]] = and i32 [[SHR_I49_1]], 65537 -; THR15-NEXT: [[MUL_I51_1:%.*]] = mul i32 [[AND_I50_1]], 65535 -; THR15-NEXT: [[TMP88:%.*]] = extractelement <2 x i32> [[TMP79]], i32 0 -; THR15-NEXT: [[TMP89:%.*]] = extractelement <2 x i32> [[TMP79]], i32 1 -; THR15-NEXT: [[ADD94_2:%.*]] = add i32 [[TMP88]], [[TMP89]] -; THR15-NEXT: [[SUB102_2:%.*]] = sub i32 [[TMP89]], [[TMP88]] -; THR15-NEXT: [[SHR_I49_2:%.*]] = lshr i32 [[CONV_1]], 15 -; THR15-NEXT: [[AND_I50_2:%.*]] = and i32 [[SHR_I49_2]], 65537 -; THR15-NEXT: [[MUL_I51_2:%.*]] = mul i32 [[AND_I50_2]], 65535 -; THR15-NEXT: [[TMP90:%.*]] = extractelement <2 x i32> [[TMP85]], i32 0 -; THR15-NEXT: [[TMP91:%.*]] = extractelement <2 x i32> [[TMP85]], i32 1 -; THR15-NEXT: [[ADD94_3:%.*]] = add i32 [[TMP90]], [[TMP91]] -; THR15-NEXT: [[SUB102_3:%.*]] = sub i32 [[TMP91]], [[TMP90]] -; THR15-NEXT: [[SHR_I49_3:%.*]] = lshr i32 [[CONV]], 15 -; THR15-NEXT: [[AND_I50_3:%.*]] = and i32 [[SHR_I49_3]], 65537 -; THR15-NEXT: [[MUL_I51_3:%.*]] = mul i32 [[AND_I50_3]], 65535 -; THR15-NEXT: [[TMP92:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8]], align 1 -; THR15-NEXT: [[TMP93:%.*]] = zext <2 x i8> [[TMP92]] to <2 x i32> +; THR15-NEXT: [[TMP1:%.*]] = load i8, ptr null, align 1 +; THR15-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[PIX1]], align 1 ; THR15-NEXT: [[TMP143:%.*]] = load <4 x i8>, ptr [[PIX2]], align 1 -; THR15-NEXT: [[TMP94:%.*]] = shufflevector <4 x i8> [[TMP143]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP95:%.*]] = zext <2 x i8> [[TMP94]] to <2 x i32> ; THR15-NEXT: [[TMP146:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1 -; THR15-NEXT: [[TMP96:%.*]] = shufflevector <4 x i8> [[TMP146]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP97:%.*]] = zext <2 x i8> [[TMP96]] to <2 x i32> ; THR15-NEXT: [[TMP147:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 -; THR15-NEXT: [[TMP98:%.*]] = shufflevector <4 x i8> [[TMP147]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP99:%.*]] = zext <2 x i8> [[TMP98]] to <2 x i32> -; THR15-NEXT: [[TMP100:%.*]] = sub <2 x i32> [[TMP97]], [[TMP99]] -; THR15-NEXT: [[TMP101:%.*]] = shl <2 x i32> [[TMP100]], splat (i32 16) -; THR15-NEXT: [[TMP102:%.*]] = shufflevector <4 x i8> [[TMP143]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP103:%.*]] = zext <2 x i8> [[TMP102]] to <2 x i32> -; THR15-NEXT: [[TMP104:%.*]] = shufflevector <4 x i8> [[TMP146]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP105:%.*]] = zext <2 x i8> [[TMP104]] to <2 x i32> -; THR15-NEXT: [[TMP106:%.*]] = shufflevector <4 x i8> [[TMP147]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP107:%.*]] = zext <2 x i8> [[TMP106]] to <2 x i32> -; THR15-NEXT: [[TMP108:%.*]] = sub <2 x i32> [[TMP105]], [[TMP107]] -; THR15-NEXT: [[TMP109:%.*]] = shl <2 x i32> [[TMP108]], splat (i32 16) -; THR15-NEXT: [[TMP110:%.*]] = insertelement <2 x i32> [[TMP93]], i32 [[CONV33]], i32 1 -; THR15-NEXT: [[TMP111:%.*]] = sub <2 x i32> [[TMP110]], [[TMP103]] -; THR15-NEXT: [[TMP112:%.*]] = add <2 x i32> [[TMP109]], [[TMP111]] -; THR15-NEXT: [[TMP113:%.*]] = insertelement <2 x i32> [[TMP93]], i32 [[CONV]], i32 0 -; THR15-NEXT: [[TMP114:%.*]] = sub <2 x i32> [[TMP113]], [[TMP95]] -; THR15-NEXT: [[TMP115:%.*]] = add <2 x i32> [[TMP101]], [[TMP114]] -; THR15-NEXT: [[TMP116:%.*]] = shufflevector <2 x i32> [[TMP112]], <2 x i32> [[TMP115]], <2 x i32> -; THR15-NEXT: [[TMP117:%.*]] = add <2 x i32> [[TMP112]], [[TMP115]] -; THR15-NEXT: [[TMP118:%.*]] = sub <2 x i32> [[TMP115]], [[TMP112]] -; THR15-NEXT: [[TMP119:%.*]] = extractelement <2 x i32> [[TMP117]], i32 0 -; THR15-NEXT: [[TMP120:%.*]] = extractelement <2 x i32> [[TMP117]], i32 1 -; THR15-NEXT: [[ADD48:%.*]] = add i32 [[TMP120]], [[TMP119]] -; THR15-NEXT: [[SUB51:%.*]] = sub i32 [[TMP119]], [[TMP120]] -; THR15-NEXT: [[TMP121:%.*]] = extractelement <2 x i32> [[TMP118]], i32 0 -; THR15-NEXT: [[TMP122:%.*]] = extractelement <2 x i32> [[TMP118]], i32 1 -; THR15-NEXT: [[ADD55:%.*]] = add i32 [[TMP122]], [[TMP121]] -; THR15-NEXT: [[SUB59:%.*]] = sub i32 [[TMP121]], [[TMP122]] -; THR15-NEXT: [[SHR_I59:%.*]] = lshr i32 [[TMP120]], 15 -; THR15-NEXT: [[AND_I60:%.*]] = and i32 [[SHR_I59]], 65537 -; THR15-NEXT: [[MUL_I61:%.*]] = mul i32 [[AND_I60]], 65535 -; THR15-NEXT: [[SHR_I59_1:%.*]] = lshr i32 [[TMP122]], 15 -; THR15-NEXT: [[AND_I60_1:%.*]] = and i32 [[SHR_I59_1]], 65537 -; THR15-NEXT: [[MUL_I61_1:%.*]] = mul i32 [[AND_I60_1]], 65535 -; THR15-NEXT: [[TMP123:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8_1]], align 1 -; THR15-NEXT: [[TMP124:%.*]] = zext <2 x i8> [[TMP123]] to <2 x i32> +; THR15-NEXT: [[TMP6:%.*]] = load <4 x i8>, ptr [[ADD_PTR3]], align 1 ; THR15-NEXT: [[TMP148:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1 -; THR15-NEXT: [[TMP125:%.*]] = shufflevector <4 x i8> [[TMP148]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP126:%.*]] = zext <2 x i8> [[TMP125]] to <2 x i32> ; THR15-NEXT: [[TMP152:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 -; THR15-NEXT: [[TMP127:%.*]] = shufflevector <4 x i8> [[TMP152]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP128:%.*]] = zext <2 x i8> [[TMP127]] to <2 x i32> ; THR15-NEXT: [[TMP153:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 -; THR15-NEXT: [[TMP129:%.*]] = shufflevector <4 x i8> [[TMP153]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP130:%.*]] = zext <2 x i8> [[TMP129]] to <2 x i32> -; THR15-NEXT: [[TMP131:%.*]] = sub <2 x i32> [[TMP128]], [[TMP130]] -; THR15-NEXT: [[TMP132:%.*]] = shl <2 x i32> [[TMP131]], splat (i32 16) -; THR15-NEXT: [[TMP138:%.*]] = shufflevector <4 x i8> [[TMP148]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP139:%.*]] = zext <2 x i8> [[TMP138]] to <2 x i32> -; THR15-NEXT: [[TMP154:%.*]] = shufflevector <4 x i8> [[TMP152]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP155:%.*]] = zext <2 x i8> [[TMP154]] to <2 x i32> -; THR15-NEXT: [[TMP133:%.*]] = shufflevector <4 x i8> [[TMP153]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP134:%.*]] = zext <2 x i8> [[TMP133]] to <2 x i32> -; THR15-NEXT: [[TMP135:%.*]] = sub <2 x i32> [[TMP155]], [[TMP134]] -; THR15-NEXT: [[TMP170:%.*]] = shl <2 x i32> [[TMP135]], splat (i32 16) -; THR15-NEXT: [[TMP140:%.*]] = insertelement <2 x i32> [[TMP124]], i32 [[CONV33_1]], i32 1 -; THR15-NEXT: [[TMP141:%.*]] = sub <2 x i32> [[TMP140]], [[TMP139]] -; THR15-NEXT: [[TMP171:%.*]] = add <2 x i32> [[TMP170]], [[TMP141]] -; THR15-NEXT: [[TMP186:%.*]] = insertelement <2 x i32> [[TMP124]], i32 [[CONV_1]], i32 0 -; THR15-NEXT: [[TMP187:%.*]] = sub <2 x i32> [[TMP186]], [[TMP126]] -; THR15-NEXT: [[TMP142:%.*]] = add <2 x i32> [[TMP132]], [[TMP187]] -; THR15-NEXT: [[TMP136:%.*]] = add <2 x i32> [[TMP171]], [[TMP142]] -; THR15-NEXT: [[TMP149:%.*]] = sub <2 x i32> [[TMP142]], [[TMP171]] -; THR15-NEXT: [[TMP144:%.*]] = extractelement <2 x i32> [[TMP136]], i32 0 -; THR15-NEXT: [[TMP145:%.*]] = extractelement <2 x i32> [[TMP136]], i32 1 -; THR15-NEXT: [[ADD48_2:%.*]] = add i32 [[TMP145]], [[TMP144]] -; THR15-NEXT: [[SUB45_1:%.*]] = sub i32 [[TMP144]], [[TMP145]] -; THR15-NEXT: [[TMP150:%.*]] = extractelement <2 x i32> [[TMP149]], i32 0 -; THR15-NEXT: [[TMP151:%.*]] = extractelement <2 x i32> [[TMP149]], i32 1 -; THR15-NEXT: [[ADD48_1:%.*]] = add i32 [[TMP151]], [[TMP150]] -; THR15-NEXT: [[SUB51_1:%.*]] = sub i32 [[TMP150]], [[TMP151]] -; THR15-NEXT: [[SHR_I54:%.*]] = lshr i32 [[TMP145]], 15 -; THR15-NEXT: [[AND_I55:%.*]] = and i32 [[SHR_I54]], 65537 -; THR15-NEXT: [[MUL_I56:%.*]] = mul i32 [[AND_I55]], 65535 -; THR15-NEXT: [[SHR_I54_1:%.*]] = lshr i32 [[TMP151]], 15 -; THR15-NEXT: [[AND_I55_1:%.*]] = and i32 [[SHR_I54_1]], 65537 -; THR15-NEXT: [[MUL_I56_1:%.*]] = mul i32 [[AND_I55_1]], 65535 -; THR15-NEXT: [[TMP156:%.*]] = lshr <2 x i32> [[TMP124]], splat (i32 15) -; THR15-NEXT: [[TMP157:%.*]] = and <2 x i32> [[TMP156]], splat (i32 65537) -; THR15-NEXT: [[TMP158:%.*]] = mul <2 x i32> [[TMP157]], splat (i32 65535) -; THR15-NEXT: [[ADD78:%.*]] = add i32 [[ADD48_2]], [[ADD48]] -; THR15-NEXT: [[SUB86:%.*]] = sub i32 [[ADD48]], [[ADD48_2]] -; THR15-NEXT: [[ADD103:%.*]] = add i32 [[ADD94]], [[ADD78]] -; THR15-NEXT: [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD94]] -; THR15-NEXT: [[ADD105:%.*]] = add i32 [[SUB102]], [[SUB86]] -; THR15-NEXT: [[SUB106:%.*]] = sub i32 [[SUB86]], [[SUB102]] -; THR15-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I]], [[ADD103]] -; THR15-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[CONV_3]] -; THR15-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51]], [[ADD105]] -; THR15-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[TMP44]] -; THR15-NEXT: [[ADD_I57:%.*]] = add i32 [[MUL_I56]], [[SUB104]] -; THR15-NEXT: [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[TMP145]] -; THR15-NEXT: [[ADD_I62:%.*]] = add i32 [[MUL_I61]], [[SUB106]] -; THR15-NEXT: [[XOR_I63:%.*]] = xor i32 [[ADD_I62]], [[TMP120]] -; THR15-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I53]], [[XOR_I]] -; THR15-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I58]] -; THR15-NEXT: [[ADD113:%.*]] = add i32 [[ADD112]], [[XOR_I63]] -; THR15-NEXT: [[ADD78_1:%.*]] = add i32 [[ADD48_1]], [[ADD55]] -; THR15-NEXT: [[SUB86_1:%.*]] = sub i32 [[ADD55]], [[ADD48_1]] -; THR15-NEXT: [[ADD103_1:%.*]] = add i32 [[ADD94_1]], [[ADD78_1]] -; THR15-NEXT: [[SUB104_1:%.*]] = sub i32 [[ADD78_1]], [[ADD94_1]] -; THR15-NEXT: [[ADD105_1:%.*]] = add i32 [[SUB102_1]], [[SUB86_1]] -; THR15-NEXT: [[SUB106_1:%.*]] = sub i32 [[SUB86_1]], [[SUB102_1]] -; THR15-NEXT: [[ADD_I_1:%.*]] = add i32 [[MUL_I_1]], [[ADD103_1]] -; THR15-NEXT: [[XOR_I_1:%.*]] = xor i32 [[ADD_I_1]], [[TMP86]] -; THR15-NEXT: [[ADD_I52_1:%.*]] = add i32 [[MUL_I51_1]], [[ADD105_1]] -; THR15-NEXT: [[XOR_I53_1:%.*]] = xor i32 [[ADD_I52_1]], [[TMP87]] -; THR15-NEXT: [[ADD_I57_1:%.*]] = add i32 [[MUL_I56_1]], [[SUB104_1]] -; THR15-NEXT: [[XOR_I58_1:%.*]] = xor i32 [[ADD_I57_1]], [[TMP151]] -; THR15-NEXT: [[ADD_I62_1:%.*]] = add i32 [[MUL_I61_1]], [[SUB106_1]] -; THR15-NEXT: [[XOR_I63_1:%.*]] = xor i32 [[ADD_I62_1]], [[TMP122]] -; THR15-NEXT: [[ADD108_1:%.*]] = add i32 [[XOR_I53_1]], [[ADD113]] -; THR15-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[XOR_I_1]] -; THR15-NEXT: [[ADD112_1:%.*]] = add i32 [[ADD110_1]], [[XOR_I58_1]] -; THR15-NEXT: [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[XOR_I63_1]] -; THR15-NEXT: [[ADD78_2:%.*]] = add i32 [[SUB45_1]], [[SUB51]] -; THR15-NEXT: [[SUB86_2:%.*]] = sub i32 [[SUB51]], [[SUB45_1]] -; THR15-NEXT: [[TMP159:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_2]], i32 0 -; THR15-NEXT: [[TMP160:%.*]] = shufflevector <2 x i32> [[TMP159]], <2 x i32> poison, <2 x i32> zeroinitializer -; THR15-NEXT: [[TMP161:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_2]], i32 0 -; THR15-NEXT: [[TMP162:%.*]] = shufflevector <2 x i32> [[TMP161]], <2 x i32> poison, <2 x i32> zeroinitializer -; THR15-NEXT: [[TMP163:%.*]] = add <2 x i32> [[TMP160]], [[TMP162]] -; THR15-NEXT: [[TMP164:%.*]] = sub <2 x i32> [[TMP160]], [[TMP162]] -; THR15-NEXT: [[TMP165:%.*]] = shufflevector <2 x i32> [[TMP163]], <2 x i32> [[TMP164]], <2 x i32> -; THR15-NEXT: [[ADD105_2:%.*]] = add i32 [[SUB102_2]], [[SUB86_2]] -; THR15-NEXT: [[SUB106_2:%.*]] = sub i32 [[SUB86_2]], [[SUB102_2]] -; THR15-NEXT: [[ADD_I52_2:%.*]] = add i32 [[MUL_I51_2]], [[ADD105_2]] -; THR15-NEXT: [[XOR_I53_2:%.*]] = xor i32 [[ADD_I52_2]], [[CONV_1]] -; THR15-NEXT: [[TMP166:%.*]] = add <2 x i32> [[TMP158]], [[TMP165]] -; THR15-NEXT: [[TMP167:%.*]] = xor <2 x i32> [[TMP166]], [[TMP124]] -; THR15-NEXT: [[SHR_I59_2:%.*]] = lshr i32 [[TMP119]], 15 -; THR15-NEXT: [[AND_I60_2:%.*]] = and i32 [[SHR_I59_2]], 65537 -; THR15-NEXT: [[MUL_I61_2:%.*]] = mul i32 [[AND_I60_2]], 65535 -; THR15-NEXT: [[ADD_I62_2:%.*]] = add i32 [[MUL_I61_2]], [[SUB106_2]] -; THR15-NEXT: [[XOR_I63_2:%.*]] = xor i32 [[ADD_I62_2]], [[TMP119]] -; THR15-NEXT: [[ADD108_2:%.*]] = add i32 [[XOR_I53_2]], [[ADD113_1]] -; THR15-NEXT: [[TMP168:%.*]] = extractelement <2 x i32> [[TMP167]], i32 0 -; THR15-NEXT: [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[TMP168]] -; THR15-NEXT: [[TMP169:%.*]] = extractelement <2 x i32> [[TMP167]], i32 1 -; THR15-NEXT: [[ADD112_2:%.*]] = add i32 [[ADD110_2]], [[TMP169]] -; THR15-NEXT: [[ADD113_2:%.*]] = add i32 [[ADD112_2]], [[XOR_I63_2]] -; THR15-NEXT: [[ADD78_3:%.*]] = add i32 [[SUB51_1]], [[SUB59]] -; THR15-NEXT: [[SUB86_3:%.*]] = sub i32 [[SUB59]], [[SUB51_1]] -; THR15-NEXT: [[TMP172:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_3]], i32 0 -; THR15-NEXT: [[TMP173:%.*]] = shufflevector <2 x i32> [[TMP172]], <2 x i32> poison, <2 x i32> zeroinitializer -; THR15-NEXT: [[TMP174:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_3]], i32 0 -; THR15-NEXT: [[TMP175:%.*]] = shufflevector <2 x i32> [[TMP174]], <2 x i32> poison, <2 x i32> zeroinitializer -; THR15-NEXT: [[TMP176:%.*]] = add <2 x i32> [[TMP173]], [[TMP175]] -; THR15-NEXT: [[TMP177:%.*]] = sub <2 x i32> [[TMP173]], [[TMP175]] -; THR15-NEXT: [[TMP178:%.*]] = shufflevector <2 x i32> [[TMP176]], <2 x i32> [[TMP177]], <2 x i32> -; THR15-NEXT: [[ADD105_3:%.*]] = add i32 [[SUB102_3]], [[SUB86_3]] -; THR15-NEXT: [[SUB106_3:%.*]] = sub i32 [[SUB86_3]], [[SUB102_3]] -; THR15-NEXT: [[ADD_I52_3:%.*]] = add i32 [[MUL_I51_3]], [[ADD105_3]] -; THR15-NEXT: [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_3]], [[CONV]] -; THR15-NEXT: [[TMP179:%.*]] = lshr <2 x i32> [[TMP93]], splat (i32 15) -; THR15-NEXT: [[TMP180:%.*]] = and <2 x i32> [[TMP179]], splat (i32 65537) -; THR15-NEXT: [[TMP181:%.*]] = mul <2 x i32> [[TMP180]], splat (i32 65535) -; THR15-NEXT: [[TMP182:%.*]] = add <2 x i32> [[TMP181]], [[TMP178]] -; THR15-NEXT: [[TMP183:%.*]] = xor <2 x i32> [[TMP182]], [[TMP93]] -; THR15-NEXT: [[SHR_I59_3:%.*]] = lshr i32 [[CONV33]], 15 -; THR15-NEXT: [[AND_I60_3:%.*]] = and i32 [[SHR_I59_3]], 65537 -; THR15-NEXT: [[MUL_I61_3:%.*]] = mul i32 [[AND_I60_3]], 65535 -; THR15-NEXT: [[ADD_I62_3:%.*]] = add i32 [[MUL_I61_3]], [[SUB106_3]] -; THR15-NEXT: [[XOR_I63_3:%.*]] = xor i32 [[ADD_I62_3]], [[CONV33]] -; THR15-NEXT: [[ADD108_3:%.*]] = add i32 [[XOR_I53_3]], [[ADD113_2]] -; THR15-NEXT: [[TMP184:%.*]] = extractelement <2 x i32> [[TMP183]], i32 0 -; THR15-NEXT: [[ADD110_3:%.*]] = add i32 [[ADD108_3]], [[TMP184]] -; THR15-NEXT: [[TMP185:%.*]] = extractelement <2 x i32> [[TMP183]], i32 1 -; THR15-NEXT: [[ADD112_3:%.*]] = add i32 [[ADD110_3]], [[TMP185]] -; THR15-NEXT: [[ADD113_3:%.*]] = add i32 [[ADD112_3]], [[XOR_I63_3]] +; THR15-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 +; THR15-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 +; THR15-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 +; THR15-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 +; THR15-NEXT: [[TMP14:%.*]] = load <4 x i8>, ptr null, align 1 +; THR15-NEXT: [[TMP15:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP10]], i64 0) +; THR15-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP15]], <4 x i8> [[TMP14]], i64 4) +; THR15-NEXT: [[TMP17:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP16]], <4 x i8> [[TMP2]], i64 8) +; THR15-NEXT: [[TMP18:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP17]], <4 x i8> [[TMP6]], i64 12) +; THR15-NEXT: [[TMP19:%.*]] = zext <16 x i8> [[TMP18]] to <16 x i32> +; THR15-NEXT: [[TMP20:%.*]] = load <4 x i8>, ptr null, align 1 +; THR15-NEXT: [[TMP21:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP11]], i64 0) +; THR15-NEXT: [[TMP22:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP21]], <4 x i8> [[TMP20]], i64 4) +; THR15-NEXT: [[TMP23:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP22]], <4 x i8> [[TMP143]], i64 8) +; THR15-NEXT: [[TMP24:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP23]], <4 x i8> [[TMP148]], i64 12) +; THR15-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i32> +; THR15-NEXT: [[TMP26:%.*]] = sub <16 x i32> [[TMP19]], [[TMP25]] +; THR15-NEXT: [[TMP27:%.*]] = shufflevector <16 x i32> [[TMP26]], <16 x i32> poison, <16 x i32> +; THR15-NEXT: [[TMP28:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) +; THR15-NEXT: [[TMP29:%.*]] = shufflevector <2 x i8> [[TMP28]], <2 x i8> poison, <4 x i32> +; THR15-NEXT: [[TMP30:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> [[TMP29]], <16 x i32> +; THR15-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP152]], <4 x i8> poison, <16 x i32> +; THR15-NEXT: [[TMP32:%.*]] = shufflevector <16 x i8> [[TMP30]], <16 x i8> [[TMP31]], <16 x i32> +; THR15-NEXT: [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP146]], <4 x i8> poison, <16 x i32> +; THR15-NEXT: [[TMP34:%.*]] = shufflevector <16 x i8> [[TMP32]], <16 x i8> [[TMP33]], <16 x i32> +; THR15-NEXT: [[TMP35:%.*]] = insertelement <16 x i8> [[TMP34]], i8 [[TMP1]], i32 5 +; THR15-NEXT: [[TMP36:%.*]] = insertelement <16 x i8> [[TMP35]], i8 [[TMP48]], i32 9 +; THR15-NEXT: [[TMP37:%.*]] = zext <16 x i8> [[TMP36]] to <16 x i32> +; THR15-NEXT: [[TMP38:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 +; THR15-NEXT: [[TMP39:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP13]], i64 0) +; THR15-NEXT: [[TMP40:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP39]], <4 x i8> [[TMP38]], i64 4) +; THR15-NEXT: [[TMP41:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP40]], <4 x i8> [[TMP147]], i64 8) +; THR15-NEXT: [[TMP42:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP41]], <4 x i8> [[TMP153]], i64 12) +; THR15-NEXT: [[TMP43:%.*]] = zext <16 x i8> [[TMP42]] to <16 x i32> +; THR15-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> +; THR15-NEXT: [[TMP45:%.*]] = sub <16 x i32> [[TMP37]], [[TMP44]] +; THR15-NEXT: [[TMP46:%.*]] = shl <16 x i32> [[TMP45]], splat (i32 16) +; THR15-NEXT: [[TMP47:%.*]] = add <16 x i32> [[TMP46]], [[TMP27]] +; THR15-NEXT: [[TMP70:%.*]] = shufflevector <16 x i32> [[TMP47]], <16 x i32> poison, <16 x i32> +; THR15-NEXT: [[TMP49:%.*]] = add <16 x i32> [[TMP47]], [[TMP70]] +; THR15-NEXT: [[TMP50:%.*]] = sub <16 x i32> [[TMP47]], [[TMP70]] +; THR15-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP49]], <16 x i32> [[TMP50]], <16 x i32> +; THR15-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> +; THR15-NEXT: [[TMP53:%.*]] = add <16 x i32> [[TMP51]], [[TMP52]] +; THR15-NEXT: [[TMP54:%.*]] = sub <16 x i32> [[TMP51]], [[TMP52]] +; THR15-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> +; THR15-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> +; THR15-NEXT: [[TMP57:%.*]] = sub <16 x i32> [[TMP55]], [[TMP56]] +; THR15-NEXT: [[TMP58:%.*]] = add <16 x i32> [[TMP55]], [[TMP56]] +; THR15-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> +; THR15-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> +; THR15-NEXT: [[TMP61:%.*]] = add <16 x i32> [[TMP59]], [[TMP60]] +; THR15-NEXT: [[TMP62:%.*]] = sub <16 x i32> [[TMP59]], [[TMP60]] +; THR15-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <16 x i32> +; THR15-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> [[TMP19]], <16 x i32> +; THR15-NEXT: [[TMP65:%.*]] = lshr <16 x i32> [[TMP64]], splat (i32 15) +; THR15-NEXT: [[TMP66:%.*]] = and <16 x i32> [[TMP65]], splat (i32 65537) +; THR15-NEXT: [[TMP67:%.*]] = mul <16 x i32> [[TMP66]], splat (i32 65535) +; THR15-NEXT: [[TMP68:%.*]] = add <16 x i32> [[TMP67]], [[TMP63]] +; THR15-NEXT: [[TMP69:%.*]] = xor <16 x i32> [[TMP68]], [[TMP64]] +; THR15-NEXT: [[ADD113_3:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]]) ; THR15-NEXT: ret i32 [[ADD113_3]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll index 5b0f4a69de4c3..7723746dda301 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll @@ -342,8 +342,8 @@ define void @reduce_or_2() { ; ZVFHMIN-NEXT: [[TMP4:%.*]] = insertelement <16 x i64> , i64 [[TMP1]], i32 6 ; ZVFHMIN-NEXT: [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer ; ZVFHMIN-NEXT: [[RDX_OP:%.*]] = or <16 x i1> [[TMP3]], [[TMP5]] -; ZVFHMIN-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) -; ZVFHMIN-NEXT: br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]] +; ZVFHMIN-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) +; ZVFHMIN-NEXT: br i1 [[TMP6]], label [[TMP8:%.*]], label [[TMP7:%.*]] ; ZVFHMIN: 7: ; ZVFHMIN-NEXT: ret void ; ZVFHMIN: 8: @@ -356,8 +356,8 @@ define void @reduce_or_2() { ; ZVL128-NEXT: [[TMP4:%.*]] = insertelement <16 x i64> , i64 [[TMP1]], i32 6 ; ZVL128-NEXT: [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer ; ZVL128-NEXT: [[RDX_OP:%.*]] = or <16 x i1> [[TMP3]], [[TMP5]] -; ZVL128-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) -; ZVL128-NEXT: br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]] +; ZVL128-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) +; ZVL128-NEXT: br i1 [[TMP6]], label [[TMP8:%.*]], label [[TMP7:%.*]] ; ZVL128: 7: ; ZVL128-NEXT: ret void ; ZVL128: 8: @@ -365,16 +365,14 @@ define void @reduce_or_2() { ; ; ZVL256-LABEL: @reduce_or_2( ; ZVL256-NEXT: [[TMP1:%.*]] = shl i64 0, 0 -; ZVL256-NEXT: [[TMP2:%.*]] = insertelement <16 x i64> , i64 [[TMP1]], i32 15 -; ZVL256-NEXT: [[TMP3:%.*]] = icmp ult <16 x i64> [[TMP2]], zeroinitializer -; ZVL256-NEXT: [[TMP4:%.*]] = insertelement <16 x i64> , i64 [[TMP1]], i32 6 -; ZVL256-NEXT: [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer -; ZVL256-NEXT: [[RDX_OP:%.*]] = or <16 x i1> [[TMP3]], [[TMP5]] -; ZVL256-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) -; ZVL256-NEXT: br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]] -; ZVL256: 7: +; ZVL256-NEXT: [[TMP2:%.*]] = insertelement <32 x i64> , i64 [[TMP1]], i32 15 +; ZVL256-NEXT: [[TMP3:%.*]] = shufflevector <32 x i64> [[TMP2]], <32 x i64> poison, <32 x i32> +; ZVL256-NEXT: [[TMP4:%.*]] = icmp ult <32 x i64> [[TMP3]], zeroinitializer +; ZVL256-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP4]]) +; ZVL256-NEXT: br i1 [[TMP5]], label [[TMP7:%.*]], label [[TMP6:%.*]] +; ZVL256: 6: ; ZVL256-NEXT: ret void -; ZVL256: 8: +; ZVL256: 7: ; ZVL256-NEXT: ret void ; ; ZVL512-LABEL: @reduce_or_2( diff --git a/llvm/test/Transforms/SandboxVectorizer/allow_files.ll b/llvm/test/Transforms/SandboxVectorizer/allow_files.ll new file mode 100644 index 0000000000000..0929eca6a1047 --- /dev/null +++ b/llvm/test/Transforms/SandboxVectorizer/allow_files.ll @@ -0,0 +1,39 @@ +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" -sbvec-allow-files="some_other_file" %s -S | FileCheck %s --check-prefix=ALLOW_OTHER +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" -sbvec-allow-files="allow_files.ll" %s -S | FileCheck %s --check-prefix=ALLOW_THIS +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" -sbvec-allow-files="al.*_files.ll" %s -S | FileCheck %s --check-prefix=ALLOW_REGEX +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" -sbvec-allow-files="some_file,.*_files.ll,some_other_file" %s -S | FileCheck %s --check-prefix=ALLOW_REGEX_CSV +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" -sbvec-allow-files="allow" %s -S | FileCheck %s --check-prefix=ALLOW_BAD_REGEX +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" -sbvec-allow-files="some_file,some_other_file1,some_other_file2" %s -S | FileCheck %s --check-prefix=ALLOW_OTHER_CSV +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" -sbvec-allow-files="" %s -S | FileCheck %s --check-prefix=ALLOW_EMPTY +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" %s -S | FileCheck %s --check-prefix=DEFAULT + +; Checks the command-line option `-sbvec-allow-files`. +define void @widen(ptr %ptr) { +; ALLOW_OTHER: store float {{%.*}}, ptr {{%.*}}, align 4 +; ALLOW_OTHER: store float {{%.*}}, ptr {{%.*}}, align 4 +; +; ALLOW_THIS: store <2 x float> {{%.*}}, ptr {{%.*}}, align 4 +; +; ALLOW_REGEX: store <2 x float> {{%.*}}, ptr {{%.*}}, align 4 +; +; ALLOW_REGEX_CSV: store <2 x float> {{%.*}}, ptr {{%.*}}, align 4 +; +; ALLOW_BAD_REGEX: store float {{%.*}}, ptr {{%.*}}, align 4 +; ALLOW_BAD_REGEX: store float {{%.*}}, ptr {{%.*}}, align 4 +; +; ALLOW_OTHER_CSV: store float {{%.*}}, ptr {{%.*}}, align 4 +; ALLOW_OTHER_CSV: store float {{%.*}}, ptr {{%.*}}, align 4 +; +; ALLOW_EMPTY: store float {{%.*}}, ptr {{%.*}}, align 4 +; ALLOW_EMPTY: store float {{%.*}}, ptr {{%.*}}, align 4 +; +; DEFAULT: store <2 x float> {{%.*}}, ptr {{%.*}}, align 4 +; + %ptr0 = getelementptr float, ptr %ptr, i32 0 + %ptr1 = getelementptr float, ptr %ptr, i32 1 + %ld0 = load float, ptr %ptr0 + %ld1 = load float, ptr %ptr1 + store float %ld0, ptr %ptr0 + store float %ld1, ptr %ptr1 + ret void +} diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/div.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/div.s index 0d14a0f734bdc..83de52b3c2cbc 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/div.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/div.s @@ -17,8 +17,6 @@ vsetvli zero, zero, e8, m4, tu, mu vdiv.vv v8, v16, v24 vsetvli zero, zero, e8, m8, tu, mu vdiv.vv v8, v16, v24 -vsetvli zero, zero, e16, mf8, tu, mu -vdiv.vv v8, v16, v24 vsetvli zero, zero, e16, mf4, tu, mu vdiv.vv v8, v16, v24 vsetvli zero, zero, e16, mf2, tu, mu @@ -33,10 +31,6 @@ vsetvli zero, zero, e16, m4, tu, mu vdiv.vv v8, v16, v24 vsetvli zero, zero, e16, m8, tu, mu vdiv.vv v8, v16, v24 -vsetvli zero, zero, e32, mf8, tu, mu -vdiv.vv v8, v16, v24 -vsetvli zero, zero, e32, mf4, tu, mu -vdiv.vv v8, v16, v24 vsetvli zero, zero, e32, mf2, tu, mu vdiv.vv v8, v16, v24 vsetvli zero, zero, e32, m1, tu, mu @@ -49,12 +43,6 @@ vsetvli zero, zero, e32, m4, tu, mu vdiv.vv v8, v16, v24 vsetvli zero, zero, e32, m8, tu, mu vdiv.vv v8, v16, v24 -vsetvli zero, zero, e64, mf8, tu, mu -vdiv.vv v8, v16, v24 -vsetvli zero, zero, e64, mf4, tu, mu -vdiv.vv v8, v16, v24 -vsetvli zero, zero, e64, mf2, tu, mu -vdiv.vv v8, v16, v24 vsetvli zero, zero, e64, m1, tu, mu vdiv.vv v8, v16, v24 vsetvli zero, zero, e64, m1, tu, mu @@ -82,8 +70,6 @@ vsetvli zero, zero, e8, m4, tu, mu vdiv.vx v8, v16, a0 vsetvli zero, zero, e8, m8, tu, mu vdiv.vx v8, v16, a0 -vsetvli zero, zero, e16, mf8, tu, mu -vdiv.vx v8, v16, a0 vsetvli zero, zero, e16, mf4, tu, mu vdiv.vx v8, v16, a0 vsetvli zero, zero, e16, mf2, tu, mu @@ -98,10 +84,6 @@ vsetvli zero, zero, e16, m4, tu, mu vdiv.vx v8, v16, a0 vsetvli zero, zero, e16, m8, tu, mu vdiv.vx v8, v16, a0 -vsetvli zero, zero, e32, mf8, tu, mu -vdiv.vx v8, v16, a0 -vsetvli zero, zero, e32, mf4, tu, mu -vdiv.vx v8, v16, a0 vsetvli zero, zero, e32, mf2, tu, mu vdiv.vx v8, v16, a0 vsetvli zero, zero, e32, m1, tu, mu @@ -114,12 +96,6 @@ vsetvli zero, zero, e32, m4, tu, mu vdiv.vx v8, v16, a0 vsetvli zero, zero, e32, m8, tu, mu vdiv.vx v8, v16, a0 -vsetvli zero, zero, e64, mf8, tu, mu -vdiv.vx v8, v16, a0 -vsetvli zero, zero, e64, mf4, tu, mu -vdiv.vx v8, v16, a0 -vsetvli zero, zero, e64, mf2, tu, mu -vdiv.vx v8, v16, a0 vsetvli zero, zero, e64, m1, tu, mu vdiv.vx v8, v16, a0 vsetvli zero, zero, e64, m1, tu, mu @@ -147,8 +123,6 @@ vsetvli zero, zero, e8, m4, tu, mu vfdiv.vv v8, v16, v24 vsetvli zero, zero, e8, m8, tu, mu vfdiv.vv v8, v16, v24 -vsetvli zero, zero, e16, mf8, tu, mu -vfdiv.vv v8, v16, v24 vsetvli zero, zero, e16, mf4, tu, mu vfdiv.vv v8, v16, v24 vsetvli zero, zero, e16, mf2, tu, mu @@ -163,10 +137,6 @@ vsetvli zero, zero, e16, m4, tu, mu vfdiv.vv v8, v16, v24 vsetvli zero, zero, e16, m8, tu, mu vfdiv.vv v8, v16, v24 -vsetvli zero, zero, e32, mf8, tu, mu -vfdiv.vv v8, v16, v24 -vsetvli zero, zero, e32, mf4, tu, mu -vfdiv.vv v8, v16, v24 vsetvli zero, zero, e32, mf2, tu, mu vfdiv.vv v8, v16, v24 vsetvli zero, zero, e32, m1, tu, mu @@ -179,12 +149,6 @@ vsetvli zero, zero, e32, m4, tu, mu vfdiv.vv v8, v16, v24 vsetvli zero, zero, e32, m8, tu, mu vfdiv.vv v8, v16, v24 -vsetvli zero, zero, e64, mf8, tu, mu -vfdiv.vv v8, v16, v24 -vsetvli zero, zero, e64, mf4, tu, mu -vfdiv.vv v8, v16, v24 -vsetvli zero, zero, e64, mf2, tu, mu -vfdiv.vv v8, v16, v24 vsetvli zero, zero, e64, m1, tu, mu vfdiv.vv v8, v16, v24 vsetvli zero, zero, e64, m1, tu, mu @@ -212,8 +176,6 @@ vsetvli zero, zero, e8, m4, tu, mu vfdiv.vf v8, v16, fa0 vsetvli zero, zero, e8, m8, tu, mu vfdiv.vf v8, v16, fa0 -vsetvli zero, zero, e16, mf8, tu, mu -vfdiv.vf v8, v16, fa0 vsetvli zero, zero, e16, mf4, tu, mu vfdiv.vf v8, v16, fa0 vsetvli zero, zero, e16, mf2, tu, mu @@ -228,10 +190,6 @@ vsetvli zero, zero, e16, m4, tu, mu vfdiv.vf v8, v16, fa0 vsetvli zero, zero, e16, m8, tu, mu vfdiv.vf v8, v16, fa0 -vsetvli zero, zero, e32, mf8, tu, mu -vfdiv.vf v8, v16, fa0 -vsetvli zero, zero, e32, mf4, tu, mu -vfdiv.vf v8, v16, fa0 vsetvli zero, zero, e32, mf2, tu, mu vfdiv.vf v8, v16, fa0 vsetvli zero, zero, e32, m1, tu, mu @@ -244,12 +202,6 @@ vsetvli zero, zero, e32, m4, tu, mu vfdiv.vf v8, v16, fa0 vsetvli zero, zero, e32, m8, tu, mu vfdiv.vf v8, v16, fa0 -vsetvli zero, zero, e64, mf8, tu, mu -vfdiv.vf v8, v16, fa0 -vsetvli zero, zero, e64, mf4, tu, mu -vfdiv.vf v8, v16, fa0 -vsetvli zero, zero, e64, mf2, tu, mu -vfdiv.vf v8, v16, fa0 vsetvli zero, zero, e64, m1, tu, mu vfdiv.vf v8, v16, fa0 vsetvli zero, zero, e64, m1, tu, mu @@ -277,8 +229,6 @@ vsetvli zero, zero, e8, m4, tu, mu vfsqrt.v v8, v16 vsetvli zero, zero, e8, m8, tu, mu vfsqrt.v v8, v16 -vsetvli zero, zero, e16, mf8, tu, mu -vfsqrt.v v8, v16 vsetvli zero, zero, e16, mf4, tu, mu vfsqrt.v v8, v16 vsetvli zero, zero, e16, mf2, tu, mu @@ -293,10 +243,6 @@ vsetvli zero, zero, e16, m4, tu, mu vfsqrt.v v8, v16 vsetvli zero, zero, e16, m8, tu, mu vfsqrt.v v8, v16 -vsetvli zero, zero, e32, mf8, tu, mu -vfsqrt.v v8, v16 -vsetvli zero, zero, e32, mf4, tu, mu -vfsqrt.v v8, v16 vsetvli zero, zero, e32, mf2, tu, mu vfsqrt.v v8, v16 vsetvli zero, zero, e32, m1, tu, mu @@ -309,12 +255,6 @@ vsetvli zero, zero, e32, m4, tu, mu vfsqrt.v v8, v16 vsetvli zero, zero, e32, m8, tu, mu vfsqrt.v v8, v16 -vsetvli zero, zero, e64, mf8, tu, mu -vfsqrt.v v8, v16 -vsetvli zero, zero, e64, mf4, tu, mu -vfsqrt.v v8, v16 -vsetvli zero, zero, e64, mf2, tu, mu -vfsqrt.v v8, v16 vsetvli zero, zero, e64, m1, tu, mu vfsqrt.v v8, v16 vsetvli zero, zero, e64, m1, tu, mu @@ -327,14 +267,14 @@ vsetvli zero, zero, e64, m8, tu, mu vfsqrt.v v8, v16 # CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 320 -# CHECK-NEXT: Total Cycles: 14397 -# CHECK-NEXT: Total uOps: 320 +# CHECK-NEXT: Instructions: 260 +# CHECK-NEXT: Total Cycles: 10243 +# CHECK-NEXT: Total uOps: 260 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.02 -# CHECK-NEXT: IPC: 0.02 -# CHECK-NEXT: Block RThroughput: 14361.0 +# CHECK-NEXT: uOps Per Cycle: 0.03 +# CHECK-NEXT: IPC: 0.03 +# CHECK-NEXT: Block RThroughput: 10185.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -361,8 +301,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 204 204.00 vdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: 1 45 45.00 vdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu @@ -377,10 +315,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 180 180.00 vdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: 1 360 360.00 vdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: 1 42 42.00 vdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu @@ -393,12 +327,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 168 168.00 vdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: 1 336 336.00 vdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: 1 72 72.00 vdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu @@ -425,8 +353,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 204 204.00 vdiv.vx v8, v16, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: 1 408 408.00 vdiv.vx v8, v16, a0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vx v8, v16, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: 1 45 45.00 vdiv.vx v8, v16, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu @@ -441,10 +367,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 180 180.00 vdiv.vx v8, v16, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: 1 360 360.00 vdiv.vx v8, v16, a0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vx v8, v16, a0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vx v8, v16, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: 1 42 42.00 vdiv.vx v8, v16, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu @@ -457,12 +379,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 168 168.00 vdiv.vx v8, v16, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: 1 336 336.00 vdiv.vx v8, v16, a0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vx v8, v16, a0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vx v8, v16, a0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vx v8, v16, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: 1 72 72.00 vdiv.vx v8, v16, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu @@ -489,8 +405,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 232 232.00 vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: 1 232 232.00 vfdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: 1 29 29.00 vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu @@ -505,10 +419,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 116 116.00 vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: 1 232 232.00 vfdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: 1 25 25.00 vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu @@ -521,12 +431,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 100 100.00 vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: 1 200 200.00 vfdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: 1 37 37.00 vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu @@ -553,8 +457,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 232 232.00 vfdiv.vf v8, v16, fa0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: 1 232 232.00 vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vf v8, v16, fa0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: 1 29 29.00 vfdiv.vf v8, v16, fa0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu @@ -569,10 +471,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 116 116.00 vfdiv.vf v8, v16, fa0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: 1 232 232.00 vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vf v8, v16, fa0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: 1 25 25.00 vfdiv.vf v8, v16, fa0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu @@ -585,12 +483,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 100 100.00 vfdiv.vf v8, v16, fa0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: 1 200 200.00 vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vf v8, v16, fa0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: 1 37 37.00 vfdiv.vf v8, v16, fa0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu @@ -617,8 +509,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 232 232.00 vfsqrt.v v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: 1 232 232.00 vfsqrt.v v8, v16 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: 1 232 232.00 vfsqrt.v v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: 1 29 29.00 vfsqrt.v v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu @@ -633,10 +523,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 116 116.00 vfsqrt.v v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: 1 232 232.00 vfsqrt.v v8, v16 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: 1 232 232.00 vfsqrt.v v8, v16 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: 1 232 232.00 vfsqrt.v v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: 1 25 25.00 vfsqrt.v v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu @@ -649,12 +535,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 100 100.00 vfsqrt.v v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: 1 200 200.00 vfsqrt.v v8, v16 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: 1 232 232.00 vfsqrt.v v8, v16 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: 1 232 232.00 vfsqrt.v v8, v16 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: 1 232 232.00 vfsqrt.v v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: 1 37 37.00 vfsqrt.v v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu @@ -686,7 +566,7 @@ vfsqrt.v v8, v16 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] -# CHECK-NEXT: - - - - 160.00 - - - - - 12186.00 - 725.00 14361.00 - - +# CHECK-NEXT: - - - - 130.00 - - - - - 7290.00 - 485.00 10185.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions: @@ -706,8 +586,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - 204.00 - 4.00 - - - vdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: - - - - - - - - - - 45.00 - 1.00 - - - vdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf2, tu, mu @@ -722,10 +600,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - 180.00 - 4.00 - - - vdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - 360.00 - 8.00 - - - vdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: - - - - - - - - - - 42.00 - 1.00 - - - vdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu @@ -738,12 +612,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - 168.00 - 4.00 - - - vdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - 336.00 - 8.00 - - - vdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: - - - - - - - - - - 72.00 - 1.00 - - - vdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu @@ -770,8 +638,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - 204.00 - 4.00 - - - vdiv.vx v8, v16, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vx v8, v16, a0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vx v8, v16, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: - - - - - - - - - - 45.00 - 1.00 - - - vdiv.vx v8, v16, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf2, tu, mu @@ -786,10 +652,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - 180.00 - 4.00 - - - vdiv.vx v8, v16, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - 360.00 - 8.00 - - - vdiv.vx v8, v16, a0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vx v8, v16, a0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vx v8, v16, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: - - - - - - - - - - 42.00 - 1.00 - - - vdiv.vx v8, v16, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu @@ -802,12 +664,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - 168.00 - 4.00 - - - vdiv.vx v8, v16, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - 336.00 - 8.00 - - - vdiv.vx v8, v16, a0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vx v8, v16, a0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vx v8, v16, a0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vx v8, v16, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: - - - - - - - - - - 72.00 - 1.00 - - - vdiv.vx v8, v16, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu @@ -834,8 +690,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 29.00 - - vfdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf2, tu, mu @@ -850,10 +704,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - - 4.00 116.00 - - vfdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 25.00 - - vfdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu @@ -866,12 +716,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - - 4.00 100.00 - - vfdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 200.00 - - vfdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 37.00 - - vfdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu @@ -898,8 +742,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vf v8, v16, fa0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vf v8, v16, fa0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 29.00 - - vfdiv.vf v8, v16, fa0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf2, tu, mu @@ -914,10 +756,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - - 4.00 116.00 - - vfdiv.vf v8, v16, fa0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vf v8, v16, fa0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 25.00 - - vfdiv.vf v8, v16, fa0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu @@ -930,12 +768,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - - 4.00 100.00 - - vfdiv.vf v8, v16, fa0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 200.00 - - vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vf v8, v16, fa0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 37.00 - - vfdiv.vf v8, v16, fa0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu @@ -962,8 +794,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfsqrt.v v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfsqrt.v v8, v16 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfsqrt.v v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 29.00 - - vfsqrt.v v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf2, tu, mu @@ -978,10 +808,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - - 4.00 116.00 - - vfsqrt.v v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfsqrt.v v8, v16 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfsqrt.v v8, v16 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfsqrt.v v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 25.00 - - vfsqrt.v v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu @@ -994,12 +820,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - - 4.00 100.00 - - vfsqrt.v v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 200.00 - - vfsqrt.v v8, v16 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfsqrt.v v8, v16 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfsqrt.v v8, v16 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfsqrt.v v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 37.00 - - vfsqrt.v v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu diff --git a/llvm/test/tools/llvm-objdump/ELF/private-headers.test b/llvm/test/tools/llvm-objdump/ELF/private-headers.test index c90cf59f4ed7d..157e2a2ea0490 100644 --- a/llvm/test/tools/llvm-objdump/ELF/private-headers.test +++ b/llvm/test/tools/llvm-objdump/ELF/private-headers.test @@ -38,6 +38,7 @@ Sections: Value: 0x0 - Name: .gnu.version_d Type: SHT_GNU_verdef + AddressAlign: 4 Entries: - Version: 1 Flags: 1 diff --git a/llvm/test/tools/llvm-objdump/ELF/verdef-invalid.test b/llvm/test/tools/llvm-objdump/ELF/verdef-invalid.test new file mode 100644 index 0000000000000..45f2331eadc34 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/verdef-invalid.test @@ -0,0 +1,77 @@ +## Adapted from test/llvm-readobj/ELF/verdef-invalid.test +## Check that we report a warning when a SHT_GNU_verdef section contains a version definition +## that refers to an auxiliary entry that goes past the end of the section. + +# RUN: yaml2obj %s -o %t +# RUN: llvm-objdump -p %t 2>&1 | FileCheck %s --check-prefix=AUX-PAST-END -DFILE=%t + +# AUX-PAST-END: warning: '[[FILE]]': invalid SHT_GNU_verdef section with index 1: version definition 1 refers to an auxiliary entry that goes past the end of the section + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Entries: + - Names: + - FOO + ## The correct sh_size is 28. + ShSize: 27 +DynamicSymbols: + - Name: foo + +## Check we report a warning when a version definition is not correctly aligned in memory. + +# RUN: yaml2obj %s --docnum=2 -o %t2 +# RUN: llvm-objdump -p %t2 2>&1 | FileCheck %s --check-prefix=MISALIGNED-DEF -DFILE=%t2 + +# MISALIGNED-DEF: warning: '[[FILE]]': invalid SHT_GNU_verdef section with index 1: found a misaligned version definition entry at offset 0x0 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN +Sections: + - Type: Fill + Size: 0x1 + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Link: .dynstr + Info: 0x1 + Entries: + - Names: + - FOO +DynamicSymbols: + - Name: foo + +## Check we report "invalid vda_name" when vda_name = size(.dynstr) + +# RUN: yaml2obj %s --docnum=3 -o %t3 +# RUN: llvm-objdump -p %t3 2>&1 | FileCheck %s --check-prefix=VDANAME-PAST-END --implicit-check-not=warning: + +# VDANAME-PAST-END: Version definitions: +# VDANAME-PAST-END-NEXT: 0 0x00 0x00000000 V0 +# VDANAME-PAST-END-NEXT: + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Flags: [ SHF_ALLOC ] + Link: .dynstr + Info: 0x1 +## The byte offset to the auxiliary entry is 0x13, i.e. it is not correctly aligned in memory. + Content: "010000000000020000000000140000000000000004000000080000000700000000000000" +DynamicSymbols: + - Name: V1 + Binding: STB_GLOBAL + - Name: V0 + Binding: STB_GLOBAL diff --git a/llvm/test/tools/llvm-objdump/ELF/verdef.test b/llvm/test/tools/llvm-objdump/ELF/verdef.test index e4ae33853deb4..dbb10bf87cbea 100644 --- a/llvm/test/tools/llvm-objdump/ELF/verdef.test +++ b/llvm/test/tools/llvm-objdump/ELF/verdef.test @@ -1,12 +1,14 @@ # RUN: yaml2obj %s -o %t -# RUN: llvm-objdump -p %t | FileCheck --strict-whitespace %s +# RUN: llvm-objdump -p %t | FileCheck --match-full-lines --strict-whitespace %s -# CHECK: Dynamic Section: -# CHECK-EMPTY: -# CHECK-NEXT: Version definitions: -# CHECK-NEXT: 1 0x01 0x075bcd15 foo -# CHECK-NEXT: 2 0x02 0x3ade68b1 VERSION_1 -# CHECK-NEXT: VERSION_2 +# CHECK:Dynamic Section: +#CHECK-EMPTY: +# CHECK-NEXT:Version definitions: +# CHECK-NEXT:2 0x01 0x075bcd15 foo +# CHECK-NEXT:3 0x02 0x3ade68b1 VERSION_1 +# CHECK-NEXT: VERSION_2 +# CHECK-NEXT:4 0x00 0x0000007b VERSION_3 +# CHECK-NEXT: VERSION_4 VERSION_5 --- !ELF FileHeader: @@ -24,17 +26,25 @@ Sections: Entries: - Version: 1 Flags: 1 - VersionNdx: 1 + VersionNdx: 2 Hash: 123456789 Names: - foo - Version: 1 Flags: 2 - VersionNdx: 2 + VersionNdx: 3 Hash: 987654321 Names: - VERSION_1 - VERSION_2 + - Version: 1 + Flags: 0 + VersionNdx: 4 + Hash: 123 + Names: + - VERSION_3 + - VERSION_4 + - VERSION_5 DynamicSymbols: - Name: bar Binding: STB_GLOBAL diff --git a/llvm/test/tools/llvm-readobj/ELF/verdef-invalid.test b/llvm/test/tools/llvm-readobj/ELF/verdef-invalid.test index e8bd4d21f7429..e768e13f4a1ec 100644 --- a/llvm/test/tools/llvm-readobj/ELF/verdef-invalid.test +++ b/llvm/test/tools/llvm-readobj/ELF/verdef-invalid.test @@ -128,7 +128,8 @@ Sections: Entries: - Names: - FOO - ShSize: 21 + ## The correct sh_size is 28. + ShSize: 27 DynamicSymbols: - Name: foo @@ -290,3 +291,36 @@ Sections: DynamicSymbols: - Name: foo Binding: STB_GLOBAL + +## Check we report "invalid vda_name" when vda_name = size(.dynstr) + +# RUN: yaml2obj %s --docnum=10 -o %t11 +# RUN: llvm-readobj -V %t11 2>&1 | FileCheck %s --check-prefix=VDANAME-PAST-END-LLVM -DFILE=%t11 --implicit-check-not=warning: +# RUN: llvm-readelf -V %t11 2>&1 | FileCheck %s --check-prefix=VDANAME-PAST-END-GNU -DFILE=%t11 --implicit-check-not=warning: + +# VDANAME-PAST-END-LLVM: Name: V0 +# VDANAME-PAST-END-LLVM-NEXT: Predecessors: [] + +# VDANAME-PAST-END-GNU: Version definition section '.gnu.version_d' contains 1 entries: +# VDANAME-PAST-END-GNU-NEXT: Addr: 0000000000000000 Offset: 0x000040 Link: 3 (.dynstr) +# VDANAME-PAST-END-GNU-NEXT: 0x0000: Rev: 1 Flags: none Index: 0 Cnt: 2 Name: V0 +# VDANAME-PAST-END-GNU-NEXT: 0x001c: Parent 1: + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Flags: [ SHF_ALLOC ] + Link: .dynstr + Info: 0x1 +## The byte offset to the auxiliary entry is 0x13, i.e. it is not correctly aligned in memory. + Content: "010000000000020000000000140000000000000004000000080000000700000000000000" +DynamicSymbols: + - Name: V1 + Binding: STB_GLOBAL + - Name: V0 + Binding: STB_GLOBAL diff --git a/llvm/tools/llvm-objdump/ELFDump.cpp b/llvm/tools/llvm-objdump/ELFDump.cpp index bce308c870ddf..b7899bc3bcbee 100644 --- a/llvm/tools/llvm-objdump/ELFDump.cpp +++ b/llvm/tools/llvm-objdump/ELFDump.cpp @@ -378,38 +378,6 @@ void ELFDumper::printSymbolVersionDependency( } } -template -static void printSymbolVersionDefinition(const typename ELFT::Shdr &Shdr, - ArrayRef Contents, - StringRef StrTab) { - outs() << "\nVersion definitions:\n"; - - const uint8_t *Buf = Contents.data(); - uint32_t VerdefIndex = 1; - // sh_info contains the number of entries in the SHT_GNU_verdef section. To - // make the index column have consistent width, we should insert blank spaces - // according to sh_info. - uint16_t VerdefIndexWidth = std::to_string(Shdr.sh_info).size(); - while (Buf) { - auto *Verdef = reinterpret_cast(Buf); - outs() << format_decimal(VerdefIndex++, VerdefIndexWidth) << " " - << format("0x%02" PRIx16 " ", (uint16_t)Verdef->vd_flags) - << format("0x%08" PRIx32 " ", (uint32_t)Verdef->vd_hash); - - const uint8_t *BufAux = Buf + Verdef->vd_aux; - uint16_t VerdauxIndex = 0; - while (BufAux) { - auto *Verdaux = reinterpret_cast(BufAux); - if (VerdauxIndex) - outs() << std::string(VerdefIndexWidth + 17, ' '); - outs() << StringRef(StrTab.drop_front(Verdaux->vda_name).data()) << '\n'; - BufAux = Verdaux->vda_next ? BufAux + Verdaux->vda_next : nullptr; - ++VerdauxIndex; - } - Buf = Verdef->vd_next ? Buf + Verdef->vd_next : nullptr; - } -} - template void ELFDumper::printSymbolVersion() { const ELFFile &Elf = getELFFile(); StringRef FileName = Obj.getFileName(); @@ -426,10 +394,26 @@ template void ELFDumper::printSymbolVersion() { unwrapOrError(Elf.getSection(Shdr.sh_link), FileName); StringRef StrTab = unwrapOrError(Elf.getStringTable(*StrTabSec), FileName); - if (Shdr.sh_type == ELF::SHT_GNU_verneed) + if (Shdr.sh_type == ELF::SHT_GNU_verneed) { printSymbolVersionDependency(Shdr); - else - printSymbolVersionDefinition(Shdr, Contents, StrTab); + } else { + OS << "\nVersion definitions:\n"; + Expected> V = + getELFFile().getVersionDefinitions(Shdr); + if (!V) { + this->reportUniqueWarning(V.takeError()); + continue; + } + for (const VerDef &Def : *V) { + OS << Def.Ndx << ' ' << format_hex(Def.Flags, 4) << ' ' + << format_hex(Def.Hash, 10) << ' ' << Def.Name << '\n'; + if (!Def.AuxV.empty()) { + for (auto [I, Aux] : enumerate(Def.AuxV)) + OS << (I ? ' ' : '\t') << Aux.Name; + OS << '\n'; + } + } + } } } diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 99e0440dce78d..115f04a4df778 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -360,7 +360,7 @@ static StringRef ToolName; std::unique_ptr BIDFetcher; -Dumper::Dumper(const object::ObjectFile &O) : O(O) { +Dumper::Dumper(const object::ObjectFile &O) : O(O), OS(outs()) { WarningHandler = [this](const Twine &Msg) { if (Warnings.insert(Msg.str()).second) reportWarning(Msg, this->O.getFileName()); diff --git a/llvm/tools/llvm-objdump/llvm-objdump.h b/llvm/tools/llvm-objdump/llvm-objdump.h index 7253cc3f4d91b..25d9c1e106a6c 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.h +++ b/llvm/tools/llvm-objdump/llvm-objdump.h @@ -77,6 +77,7 @@ class Dumper { StringSet<> Warnings; protected: + llvm::raw_ostream &OS; std::function WarningHandler; public: diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index fdae09ac767e6..e7825419ef9ec 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -7668,7 +7668,7 @@ void LLVMELFDumper::printVersionDefinitionSection(const Elf_Shdr *Sec) { W.printFlags("Flags", D.Flags, ArrayRef(SymVersionFlags)); W.printNumber("Index", D.Ndx); W.printNumber("Hash", D.Hash); - W.printString("Name", D.Name.c_str()); + W.printString("Name", D.Name); W.printList( "Predecessors", D.AuxV, [](raw_ostream &OS, const VerdAux &Aux) { OS << Aux.Name.c_str(); }); diff --git a/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn b/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn index 19d2b5d27c33d..285f41ee70203 100644 --- a/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn +++ b/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn @@ -19,7 +19,6 @@ static_library("Passes") { "BinaryPasses.cpp", "CMOVConversion.cpp", "CacheMetrics.cpp", - "ContinuityStats.cpp", "DataflowAnalysis.cpp", "DataflowInfoManager.cpp", "FixRISCVCallsPass.cpp", @@ -41,6 +40,7 @@ static_library("Passes") { "PLTCall.cpp", "PatchEntries.cpp", "PettisAndHansen.cpp", + "ProfileQualityStats.cpp", "RegAnalysis.cpp", "RegReAssign.cpp", "ReorderAlgorithm.cpp", diff --git a/llvm/utils/gn/secondary/clang/unittests/Frontend/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Frontend/BUILD.gn index 7ea260f952618..931ca8b20abc5 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Frontend/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Frontend/BUILD.gn @@ -10,6 +10,7 @@ unittest("FrontendTests") { "//clang/lib/FrontendTool", "//clang/lib/Lex", "//clang/lib/Sema", + "//clang/lib/Tooling", "//llvm/lib/Support", "//llvm/lib/TargetParser", ] @@ -20,6 +21,7 @@ unittest("FrontendTests") { "CompilerInvocationTest.cpp", "FixedPointString.cpp", "FrontendActionTest.cpp", + "NoAlterCodeGenActionTest.cpp", "OutputStreamTest.cpp", "PCHPreambleTest.cpp", "ParsedSourceLocationTest.cpp", diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td index 23692478755c6..ce17ad9362227 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td @@ -148,13 +148,11 @@ def Tosa_TransConvOpQuantInfoBuilder : OpBuilder< "::mlir::Value":$weight, "mlir::Value":$bias, "::mlir::DenseI64ArrayAttr":$outpad, "::mlir::DenseI64ArrayAttr":$stride, - "::mlir::DenseI64ArrayAttr":$outputShape, "::mlir::TypeAttr":$acc_type), [{ buildTransConvOpWithQuantInfo($_builder, $_state, outputType, input, weight, bias, - outpad, stride, - outputShape, acc_type); + outpad, stride, acc_type); }]>; // The tosa.matmul op is also intended to be generated where a fully_connected diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td index ddfec2c9bfcd3..f1a9d1fedac1b 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td @@ -301,7 +301,7 @@ def Tosa_MatMulOp : Tosa_InferShapedTypeOp<"matmul"> { ); let results = (outs - Tosa_Tensor3D:$c + Tosa_Tensor3D:$output ); list availability = [ @@ -408,7 +408,6 @@ def Tosa_TransposeConv2DOp : Tosa_ConvOp<"transpose_conv2d"> { Tosa_IntArrayAttr4:$out_pad, Tosa_IntArrayAttr2:$stride, - Tosa_IntArrayAttr4:$out_shape, TypeAttrOf:$acc_type, DefaultValuedOptionalAttr:$local_bound ); diff --git a/mlir/include/mlir/IR/BuiltinTypes.td b/mlir/include/mlir/IR/BuiltinTypes.td index e5a2ae81da0c9..af474b3e3ec47 100644 --- a/mlir/include/mlir/IR/BuiltinTypes.td +++ b/mlir/include/mlir/IR/BuiltinTypes.td @@ -1035,6 +1035,17 @@ def Builtin_RankedTensor : Builtin_Type<"RankedTensor", "tensor", [ RankedTensorType clone(::mlir::Type elementType) { return ::llvm::cast(cloneWith(getShape(), elementType)); } + + /// Return a clone of this type without the encoding. + RankedTensorType dropEncoding() { + return RankedTensorType::get(getShape(), getElementType()); + } + + /// Return a clone of this type with the given new encoding and the same + /// shape and element type as this type. + RankedTensorType cloneWithEncoding(::mlir::Attribute encoding) { + return RankedTensorType::get(getShape(), getElementType(), encoding); + } }]; let skipDefaultBuilders = 1; let genVerifyDecl = 1; diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp index 06831a642664e..8732ddafa24d4 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -49,6 +49,11 @@ using namespace mlir::tosa; // calculated result based on whether the lhs or rhs is NaN or not. In pseudo // code: // +// In the case that the op is operating on non floating point types we ignore +// the attribute completely, this is consistent with the TOSA spec which has +// the following wording: "This attribute is ignored by non floating-point +// types." +// // binary(lhs, rhs): // result = op(lhs, rhs) // if lhs == NaN return rhs @@ -58,6 +63,10 @@ template static Value materializeBinaryNanCheckIfRequired(OpTy op, PatternRewriter &rewriter, Value lhs, Value rhs, Value result) { + // NaN propagation has no meaning for non floating point types. + if (!isa(getElementTypeOrSelf(lhs))) + return result; + auto nanMode = op.getNanMode(); if (nanMode == "PROPAGATE") return result; @@ -449,6 +458,11 @@ static Value createLinalgBodyCalculationForElementwiseOp( auto clampOp = llvm::cast(op); const auto nanMode = clampOp.getNanMode(); + + // NaN propagation has no meaning for non floating point types. + if (!isa(elementTy)) + return result; + // In the case of "PROPAGATE" semantics no compare and selection is // required. if (nanMode == "PROPAGATE") @@ -1192,7 +1206,8 @@ static LogicalResult reduceMatchAndRewriteHelper(OpTy op, uint64_t axis, bool isNanIgnoreMode = false; if constexpr (std::is_same_v || std::is_same_v) { - if (op.getNanMode() == "IGNORE") { + // NaN propagation has no meaning for non floating point types. + if (isa(elementTy) && op.getNanMode() == "IGNORE") { isNanIgnoreMode = true; // Because the TOSA spec requires the result be NaN iff all elements in // the reduction are NaN we can't simply perform a compare and select. @@ -2282,7 +2297,8 @@ class ArgMaxConverter : public OpRewritePattern { // In the case "IGNORE" we check if the current argument is NaN and // select the old index and value otherwise take the updated index and // value. - if (const auto nanMode = argmaxOp.getNanMode(); nanMode == "IGNORE") { + if (const auto nanMode = argmaxOp.getNanMode(); + isa(inElementTy) && nanMode == "IGNORE") { // Unordered comparison of NaN against itself will always return // true. Value isNaN = rewriter.create( diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp index 006e35806d64f..e3400b9ba4358 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp @@ -748,6 +748,11 @@ class MaxPool2dConverter : public OpConversionPattern { dilationAttr); rewriter.replaceOp(op, resultOp); + + // NaN propagation has no meaning for non floating point types. + if (!isa(getElementTypeOrSelf(inputTy))) + return success(); + // "PROPAGATE" mode matches the behaviour of the LinAlg named op, so no // compare and select materialization is required. // diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp index 6833d6583c27a..a8c24e1423425 100644 --- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp @@ -1830,14 +1830,14 @@ static void getMultiLevelStrides(const MemRefRegion ®ion, } } -/// Generates a point-wise copy from/to `memref' to/from `fastMemRef' and -/// returns the outermost AffineForOp of the copy loop nest. `lbMaps` and -/// `ubMaps` along with `lbOperands` and `ubOperands` hold the lower and upper -/// bound information for the copy loop nest. `fastBufOffsets` contain the -/// expressions to be subtracted out from the respective copy loop iterators in -/// order to index the fast buffer. If `copyOut' is true, generates a copy-out; -/// otherwise a copy-in. Builder `b` should be set to the point the copy nest is -/// inserted. +/// Generates a point-wise copy from/to a non-zero ranked `memref' to/from +/// `fastMemRef' and returns the outermost AffineForOp of the copy loop nest. +/// `lbMaps` and `ubMaps` along with `lbOperands` and `ubOperands` hold the +/// lower and upper bound information for the copy loop nest. `fastBufOffsets` +/// contain the expressions to be subtracted out from the respective copy loop +/// iterators in order to index the fast buffer. If `copyOut' is true, generates +/// a copy-out; otherwise a copy-in. Builder `b` should be set to the point the +/// copy nest is inserted. // /// The copy-in nest is generated as follows as an example for a 2-d region: /// for x = ... @@ -1858,6 +1858,8 @@ generatePointWiseCopy(Location loc, Value memref, Value fastMemRef, })); unsigned rank = cast(memref.getType()).getRank(); + // A copy nest can't be generated for 0-ranked memrefs. + assert(rank != 0 && "non-zero rank memref expected"); assert(lbMaps.size() == rank && "wrong number of lb maps"); assert(ubMaps.size() == rank && "wrong number of ub maps"); @@ -1921,19 +1923,20 @@ emitRemarkForBlock(Block &block) { return block.getParentOp()->emitRemark(); } -/// Creates a buffer in the faster memory space for the specified memref region; -/// generates a copy from the lower memory space to this one, and replaces all -/// loads/stores in the block range [`begin', `end') of `block' to load/store -/// from that buffer. Returns failure if copies could not be generated due to -/// yet unimplemented cases. `copyInPlacementStart` and `copyOutPlacementStart` -/// in copyPlacementBlock specify the insertion points where the incoming copies -/// and outgoing copies, respectively, should be inserted (the insertion happens -/// right before the insertion point). Since `begin` can itself be invalidated -/// due to the memref rewriting done from this method, the output argument -/// `nBegin` is set to its replacement (set to `begin` if no invalidation -/// happens). Since outgoing copies could have been inserted at `end`, the -/// output argument `nEnd` is set to the new end. `sizeInBytes` is set to the -/// size of the fast buffer allocated. +/// Creates a buffer in the faster memory space for the specified memref region +/// (memref has to be non-zero ranked); generates a copy from the lower memory +/// space to this one, and replaces all loads/stores in the block range +/// [`begin', `end') of `block' to load/store from that buffer. Returns failure +/// if copies could not be generated due to yet unimplemented cases. +/// `copyInPlacementStart` and `copyOutPlacementStart` in copyPlacementBlock +/// specify the insertion points where the incoming copies and outgoing copies, +/// respectively, should be inserted (the insertion happens right before the +/// insertion point). Since `begin` can itself be invalidated due to the memref +/// rewriting done from this method, the output argument `nBegin` is set to its +/// replacement (set to `begin` if no invalidation happens). Since outgoing +/// copies could have been inserted at `end`, the output argument `nEnd` is set +/// to the new end. `sizeInBytes` is set to the size of the fast buffer +/// allocated. static LogicalResult generateCopy( const MemRefRegion ®ion, Block *block, Block::iterator begin, Block::iterator end, Block *copyPlacementBlock, @@ -1984,6 +1987,11 @@ static LogicalResult generateCopy( SmallVector bufIndices; unsigned rank = memRefType.getRank(); + if (rank == 0) { + LLVM_DEBUG(llvm::dbgs() << "Non-zero ranked memrefs supported\n"); + return failure(); + } + SmallVector fastBufferShape; // Compute the extents of the buffer. diff --git a/mlir/lib/Dialect/Math/Transforms/ExpandPatterns.cpp b/mlir/lib/Dialect/Math/Transforms/ExpandPatterns.cpp index bb592c667549c..7b5350ca26b60 100644 --- a/mlir/lib/Dialect/Math/Transforms/ExpandPatterns.cpp +++ b/mlir/lib/Dialect/Math/Transforms/ExpandPatterns.cpp @@ -646,6 +646,11 @@ static LogicalResult convertRsqrtOp(math::RsqrtOp op, auto operand = op.getOperand(); auto operandTy = operand.getType(); + // Operand type must be shatic shaped type to create const float. + auto shapedOperandType = dyn_cast(operandTy); + if (shapedOperandType && !shapedOperandType.hasStaticShape()) + return failure(); + auto eTy = getElementTypeOrSelf(operandTy); if (!isa(eTy)) return failure(); diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp index 81404fa664cd4..a9ba662348a52 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp @@ -337,14 +337,12 @@ struct ExpandShapeOpInterface if (failed(buffer)) return failure(); - // Memref result type is inferred by the builder based on reassociation - // indices and result shape. - // TODO: Instead of inferring the output shape argument of - // memref.expand_shape op, use output_shape argument of tensor.expand_shape - // op. - replaceOpWithNewBufferizedOp( - rewriter, op, tensorResultType.getShape(), *buffer, - expandShapeOp.getReassociationIndices()); + auto memrefExpandShape = rewriter.create( + op->getLoc(), tensorResultType.getShape(), *buffer, + expandShapeOp.getReassociationIndices(), + expandShapeOp.getMixedOutputShape()); + replaceOpWithBufferizedValues(rewriter, op, + memrefExpandShape->getResults()); return success(); } }; diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index 7b50eceb081dd..54f9fa917f2e0 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -569,15 +569,15 @@ static void buildConvOpWithQuantInfo(OpBuilder &builder, OperationState &result, /// Handles tosa.transpose_conv2d which has outpad and output shape /// attributes. -static void buildTransConvOpWithQuantInfo( - OpBuilder &builder, OperationState &result, Type outputType, Value input, - Value weight, Value bias, DenseI64ArrayAttr outpad, - DenseI64ArrayAttr stride, DenseI64ArrayAttr outputShape, TypeAttr accType) { +static void +buildTransConvOpWithQuantInfo(OpBuilder &builder, OperationState &result, + Type outputType, Value input, Value weight, + Value bias, DenseI64ArrayAttr outpad, + DenseI64ArrayAttr stride, TypeAttr accType) { auto zps = createZPsAsConst(builder, input, weight); result.addOperands({input, weight, bias, zps.first, zps.second}); result.addAttribute("out_pad", outpad); result.addAttribute("stride", stride); - result.addAttribute("out_shape", outputShape); result.addAttribute("acc_type", accType); Type finalOutputType = outputType; auto quantAttr = buildConvOpQuantizationAttr(builder, input, weight); @@ -2327,9 +2327,7 @@ LogicalResult TransposeConv2DOp::inferReturnTypeComponents( MLIRContext *context, ::std::optional location, TransposeConv2DOp::Adaptor adaptor, SmallVectorImpl &inferredReturnShapes) { - // outputShape is mutable. - llvm::SmallVector outputShape = - convertToMlirShape(adaptor.getOutShape()); + llvm::SmallVector outputShape(4, ShapedType::kDynamic); int64_t inputWidth = ShapedType::kDynamic; int64_t inputHeight = ShapedType::kDynamic; diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp index abff252575eb0..b00820ffc542b 100644 --- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp +++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp @@ -613,8 +613,10 @@ static LogicalResult printOperation(CppEmitter &emitter, Block &trueSuccessor = *condBranchOp.getTrueDest(); Block &falseSuccessor = *condBranchOp.getFalseDest(); - os << "if (" << emitter.getOrCreateName(condBranchOp.getCondition()) - << ") {\n"; + os << "if ("; + if (failed(emitter.emitOperand(condBranchOp.getCondition()))) + return failure(); + os << ") {\n"; os.indent(); diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp index 8445e609c2244..7ea82f61fadbb 100644 --- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp @@ -1071,11 +1071,21 @@ LogicalResult ModuleImport::convertGlobalCtorsAndDtors(llvm::GlobalVariable *globalVar) { if (!globalVar->hasInitializer() || !globalVar->hasAppendingLinkage()) return failure(); - auto *initializer = - dyn_cast(globalVar->getInitializer()); - if (!initializer) + llvm::Constant *initializer = globalVar->getInitializer(); + + bool knownInit = isa(initializer) || + isa(initializer); + if (!knownInit) return failure(); + // ConstantAggregateZero does not engage with the operand initialization + // in the loop that follows - there should be no operands. This implies + // empty ctor/dtor lists. + if (auto *caz = dyn_cast(initializer)) { + if (caz->getElementCount().getFixedValue() != 0) + return failure(); + } + SmallVector funcs; SmallVector priorities; for (llvm::Value *operand : initializer->operands()) { diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index 5cd841ee2df91..eda6b51ff45ea 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -1258,16 +1258,35 @@ LogicalResult ModuleTranslation::convertGlobalsAndAliases() { auto dtorOp = dyn_cast(op); if (!ctorOp && !dtorOp) continue; - auto range = ctorOp ? llvm::zip(ctorOp.getCtors(), ctorOp.getPriorities()) - : llvm::zip(dtorOp.getDtors(), dtorOp.getPriorities()); - auto appendGlobalFn = - ctorOp ? llvm::appendToGlobalCtors : llvm::appendToGlobalDtors; - for (auto symbolAndPriority : range) { - llvm::Function *f = lookupFunction( - cast(std::get<0>(symbolAndPriority)).getValue()); - appendGlobalFn(*llvmModule, f, - cast(std::get<1>(symbolAndPriority)).getInt(), - /*Data=*/nullptr); + + // The empty / zero initialized version of llvm.global_(c|d)tors cannot be + // handled by appendGlobalFn logic below, which just ignores empty (c|d)tor + // lists. Make sure it gets emitted. + if ((ctorOp && ctorOp.getCtors().empty()) || + (dtorOp && dtorOp.getDtors().empty())) { + llvm::IRBuilder builder( + llvmModule->getContext(), + llvm::TargetFolder(llvmModule->getDataLayout())); + llvm::Type *eltTy = llvm::StructType::get( + builder.getInt32Ty(), builder.getPtrTy(), builder.getPtrTy()); + llvm::ArrayType *at = llvm::ArrayType::get(eltTy, 0); + llvm::Constant *zeroInit = llvm::Constant::getNullValue(at); + (void)new llvm::GlobalVariable( + *llvmModule, zeroInit->getType(), false, + llvm::GlobalValue::AppendingLinkage, zeroInit, + ctorOp ? "llvm.global_ctors" : "llvm.global_dtors"); + } else { + auto range = ctorOp + ? llvm::zip(ctorOp.getCtors(), ctorOp.getPriorities()) + : llvm::zip(dtorOp.getDtors(), dtorOp.getPriorities()); + auto appendGlobalFn = + ctorOp ? llvm::appendToGlobalCtors : llvm::appendToGlobalDtors; + for (const auto &[sym, prio] : range) { + llvm::Function *f = + lookupFunction(cast(sym).getValue()); + appendGlobalFn(*llvmModule, f, cast(prio).getInt(), + /*Data=*/nullptr); + } } } diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir index 332b706871547..02d2f16b74ef8 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir @@ -940,6 +940,16 @@ func.func @max_pool2d_nan_propagate(%arg0: tensor<1x6x34x62xf32>) -> (tensor<1x4 // ----- +// CHECK-LABEL: @max_pool2d_nan_ignore_int +func.func @max_pool2d_nan_ignore_int(%arg0: tensor<1x6x34x62xi8>) -> (tensor<1x4x32x62xi8>) { + // CHECK: linalg.pooling_nhwc_max + // CHECK-NOT: linalg.generic + %0 = tosa.max_pool2d %arg0 {pad = array, kernel = array, stride = array, nan_mode = "IGNORE"} : (tensor<1x6x34x62xi8>) -> tensor<1x4x32x62xi8> + return %0: tensor<1x4x32x62xi8> +} + +// ----- + // CHECK-LABEL: @max_pool2d_nan_ignore func.func @max_pool2d_nan_ignore(%arg0: tensor<1x6x34x62xf32>) -> (tensor<1x4x32x62xf32>) { // CHECK-NOT: linalg.pooling_nhwc_max diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir index 78f2e173d7cb1..c3992d2cda46e 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -2033,6 +2033,44 @@ func.func @reduce_max_nan_propagate(%arg0: tensor<5x4xf32>, %arg1: tensor<5x4xf3 // ----- +// CHECK-LABEL: @reduce_min_nan_ignore_int +func.func @reduce_min_nan_ignore_int(%arg0: tensor<5x4xi8>, %arg1: tensor<5x4xi8>) -> () { + // CHECK: linalg.reduce + // CHECK: arith.minsi + // CHECK-NOT: arith.cmpf uno + // CHECK-NOT: arith.select + // CHECK: linalg.yield + // CHECK-NOT: arith.constant 0x7FC00000 + // CHECK-NOT: tensor.empty() + // CHECK-NOT: linalg.fill + // CHECK-NOT: tensor.empty() + // CHECK-NOT: select + // CHECK: return + %5 = tosa.reduce_min %arg0 {axis = 0 : i32, nan_mode = "IGNORE"} : (tensor<5x4xi8>) -> tensor<1x4xi8> + return +} + +// ----- + +// CHECK-LABEL: @reduce_max_nan_ignore_int +func.func @reduce_max_nan_ignore_int(%arg0: tensor<5x4xi8>, %arg1: tensor<5x4xi8>) -> () { + // CHECK: linalg.reduce + // CHECK: arith.maxsi + // CHECK-NOT: arith.cmpf uno + // CHECK-NOT: arith.select + // CHECK: linalg.yield + // CHECK-NOT: arith.constant 0x7FC00000 + // CHECK-NOT: tensor.empty() + // CHECK-NOT: linalg.fill + // CHECK-NOT: tensor.empty() + // CHECK-NOT: select + // CHECK: return + %6 = tosa.reduce_max %arg0 {axis = 0 : i32, nan_mode = "IGNORE"} : (tensor<5x4xi8>) -> tensor<1x4xi8> + return +} + +// ----- + // CHECK-LABEL: @reduce_min_nan_ignore func.func @reduce_min_nan_ignore(%arg0: tensor<5x4xf32>, %arg1: tensor<5x4xf32>) -> () { // CHECK: linalg.reduce @@ -2095,6 +2133,32 @@ func.func @maximum_nan_propagate(%arg0: tensor<5x4xf32>, %arg1: tensor<5x4xf32>) // ----- +// CHECK-LABEL: @minimum_nan_ignore_int +func.func @minimum_nan_ignore_int(%arg0: tensor<5x4xi8>, %arg1: tensor<5x4xi8>) -> () { + // CHECK: linalg.generic + // CHECK: arith.minsi + // CHECK-NOT: arith.cmpf uno + // CHECK-NOT: arith.select + // CHECK: linalg.yield + %9 = tosa.minimum %arg0, %arg1 {nan_mode = "IGNORE"} : (tensor<5x4xi8>, tensor<5x4xi8>) -> tensor<5x4xi8> + return +} + +// ----- + +// CHECK-LABEL: @maximum_nan_ignore_int +func.func @maximum_nan_ignore_int(%arg0: tensor<5x4xi8>, %arg1: tensor<5x4xi8>) -> () { + // CHECK: linalg.generic + // CHECK: arith.maxsi + // CHECK-NOT: arith.cmpf uno + // CHECK-NOT: arith.select + // CHECK: linalg.yield + %10 = tosa.maximum %arg0, %arg1 {nan_mode = "IGNORE"} : (tensor<5x4xi8>, tensor<5x4xi8>) -> tensor<5x4xi8> + return +} + +// ----- + // CHECK-LABEL: @minimum_nan_ignore func.func @minimum_nan_ignore(%arg0: tensor<5x4xf32>, %arg1: tensor<5x4xf32>) -> () { // CHECK: linalg.generic @@ -2142,6 +2206,23 @@ func.func @argmax_nan_propagate(%arg0: tensor<5x4xf32>, %arg1: tensor<5x4xf32>) // ----- +// CHECK-LABEL: @argmax_nan_ignore_int +func.func @argmax_nan_ignore_int(%arg0: tensor<5x4xi8>, %arg1: tensor<5x4xi8>) -> () { + // CHECK: linalg.generic + // CHECK: arith.cmpi sgt + // CHECK: arith.select + // CHECK: arith.select + // CHECK-NOT: arith.cmpf uno + // CHECK-NOT: arith.cmpf uno + // CHECK-NOT: arith.select + // CHECK-NOT: arith.select + // CHECK: linalg.yield + %12 = tosa.argmax %arg0 {axis = 0 : i32, nan_mode = "IGNORE"} : (tensor<5x4xi8>) -> tensor<4xi32> + return +} + +// ----- + // CHECK-LABEL: @argmax_nan_ignore func.func @argmax_nan_ignore(%arg0: tensor<5x4xf32>, %arg1: tensor<5x4xf32>) -> () { // CHECK: linalg.generic @@ -2172,6 +2253,20 @@ func.func @clamp_nan_propagate(%arg0: tensor<5x4xf32>, %arg1: tensor<5x4xf32>) - // ----- +// CHECK-LABEL: @clamp_nan_ignore_int +func.func @clamp_nan_ignore_int(%arg0: tensor<5x4xi8>, %arg1: tensor<5x4xi8>) -> () { + // CHECK: linalg.generic + // CHECK: arith.maxsi + // CHECK: arith.minsi + // CHECK-NOT: arith.cmpf uno + // CHECK-NOT: arith.select + // CHECK: linalg.yield + %14 = tosa.clamp %arg0 {min_val = 1 : i8, max_val = 5 : i8, nan_mode = "IGNORE"} : (tensor<5x4xi8>) -> tensor<5x4xi8> + return +} + +// ----- + // CHECK-LABEL: @clamp_nan_ignore func.func @clamp_nan_ignore(%arg0: tensor<5x4xf32>, %arg1: tensor<5x4xf32>) -> () { // CHECK: linalg.generic diff --git a/mlir/test/Dialect/Affine/affine-data-copy.mlir b/mlir/test/Dialect/Affine/affine-data-copy.mlir index 5615acae5ecc4..26eef0a7925a7 100644 --- a/mlir/test/Dialect/Affine/affine-data-copy.mlir +++ b/mlir/test/Dialect/Affine/affine-data-copy.mlir @@ -354,3 +354,68 @@ func.func @arbitrary_memory_space() { } return } + +// CHECK-LABEL: zero_ranked +func.func @zero_ranked(%3:memref<480xi1>) { + %false = arith.constant false + %4 = memref.alloc() {alignment = 128 : i64} : memref + affine.store %false, %4[] : memref + %5 = memref.alloc() {alignment = 128 : i64} : memref + memref.copy %4, %5 : memref to memref + affine.for %arg0 = 0 to 480 { + %11 = affine.load %3[%arg0] : memref<480xi1> + %12 = affine.load %5[] : memref + %13 = arith.cmpi slt, %11, %12 : i1 + %14 = arith.select %13, %11, %12 : i1 + affine.store %14, %5[] : memref + } + return +} + +// CHECK-LABEL: func @scalar_memref_copy_without_dma +func.func @scalar_memref_copy_without_dma() { + %false = arith.constant false + %4 = memref.alloc() {alignment = 128 : i64} : memref + affine.store %false, %4[] : memref + + // CHECK: %[[FALSE:.*]] = arith.constant false + // CHECK: %[[MEMREF:.*]] = memref.alloc() {alignment = 128 : i64} : memref + // CHECK: affine.store %[[FALSE]], %[[MEMREF]][] : memref + return +} + +// CHECK-LABEL: func @scalar_memref_copy_in_loop +func.func @scalar_memref_copy_in_loop(%3:memref<480xi1>) { + %false = arith.constant false + %4 = memref.alloc() {alignment = 128 : i64} : memref + affine.store %false, %4[] : memref + %5 = memref.alloc() {alignment = 128 : i64} : memref + memref.copy %4, %5 : memref to memref + affine.for %arg0 = 0 to 480 { + %11 = affine.load %3[%arg0] : memref<480xi1> + %12 = affine.load %5[] : memref + %13 = arith.cmpi slt, %11, %12 : i1 + %14 = arith.select %13, %11, %12 : i1 + affine.store %14, %5[] : memref + } + + // CHECK: %[[FALSE:.*]] = arith.constant false + // CHECK: %[[MEMREF:.*]] = memref.alloc() {alignment = 128 : i64} : memref + // CHECK: affine.store %[[FALSE]], %[[MEMREF]][] : memref + // CHECK: %[[TARGET:.*]] = memref.alloc() {alignment = 128 : i64} : memref + // CHECK: memref.copy %alloc, %[[TARGET]] : memref to memref + // CHECK: %[[FAST_MEMREF:.*]] = memref.alloc() : memref<480xi1> + // CHECK: affine.for %{{.*}} = 0 to 480 { + // CHECK: %{{.*}} = affine.load %arg0[%{{.*}}] : memref<480xi1> + // CHECK: affine.store %{{.*}}, %[[FAST_MEMREF]][%{{.*}}] : memref<480xi1> + // CHECK: } + // CHECK: affine.for %arg1 = 0 to 480 { + // CHECK: %[[L0:.*]] = affine.load %[[FAST_MEMREF]][%arg1] : memref<480xi1> + // CHECK: %[[L1:.*]] = affine.load %[[TARGET]][] : memref + // CHECK: %[[CMPI:.*]] = arith.cmpi slt, %[[L0]], %[[L1]] : i1 + // CHECK: %[[SELECT:.*]] = arith.select %[[CMPI]], %[[L0]], %[[L1]] : i1 + // CHECK: affine.store %[[SELECT]], %[[TARGET]][] : memref + // CHECK: } + // CHECK: memref.dealloc %[[FAST_MEMREF]] : memref<480xi1> + return +} diff --git a/mlir/test/Dialect/LLVMIR/global.mlir b/mlir/test/Dialect/LLVMIR/global.mlir index 79d1cafabfbed..bd3584de9a405 100644 --- a/mlir/test/Dialect/LLVMIR/global.mlir +++ b/mlir/test/Dialect/LLVMIR/global.mlir @@ -233,6 +233,14 @@ llvm.mlir.global_ctors { ctors = [@ctor], priorities = [0 : i32]} // ----- +// CHECK: llvm.mlir.global_ctors {ctors = [], priorities = []} +llvm.mlir.global_ctors {ctors = [], priorities = []} + +// CHECK: llvm.mlir.global_dtors {dtors = [], priorities = []} +llvm.mlir.global_dtors {dtors = [], priorities = []} + +// ----- + llvm.func @dtor() { llvm.return } diff --git a/mlir/test/Dialect/Math/expand-math.mlir b/mlir/test/Dialect/Math/expand-math.mlir index 946a411e4cc4b..1420acaa40d35 100644 --- a/mlir/test/Dialect/Math/expand-math.mlir +++ b/mlir/test/Dialect/Math/expand-math.mlir @@ -787,3 +787,29 @@ func.func @unranked_ceil_op(%arg: tensor<*xf32>) -> tensor<*xf32>{ %a = math.ceil %arg : tensor<*xf32> return %a: tensor<*xf32> } + +// ----- + +// CHECK-LABEL: func.func @non_static_shape_rsqrt_op +// CHECK-SAME: (%[[ARG:.*]]: tensor) +// CHECK-SAME: -> tensor +// CHECK: %[[RSQRT:.*]] = math.rsqrt %[[ARG]] : tensor +// CHECK: return %[[RSQRT]] : tensor + +func.func @non_static_shape_rsqrt_op(%arg: tensor) -> tensor{ + %a = math.rsqrt %arg : tensor + return %a: tensor +} + +// ----- + +// CHECK-LABEL: func.func @unranked_rsqrt_op +// CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) +// CHECK-SAME: -> tensor<*xf32> +// CHECK: %[[RSQRT:.*]] = math.rsqrt %[[ARG]] : tensor<*xf32> +// CHECK: return %[[RSQRT]] : tensor<*xf32> + +func.func @unranked_rsqrt_op(%arg: tensor<*xf32>) -> tensor<*xf32>{ + %a = math.rsqrt %arg : tensor<*xf32> + return %a: tensor<*xf32> +} diff --git a/mlir/test/Dialect/Tensor/bufferize.mlir b/mlir/test/Dialect/Tensor/bufferize.mlir index 9ea0a15f31185..c1beed95f2006 100644 --- a/mlir/test/Dialect/Tensor/bufferize.mlir +++ b/mlir/test/Dialect/Tensor/bufferize.mlir @@ -366,14 +366,10 @@ func.func @tensor.insert(%t1: tensor<5xf32>, %idx1: index, %f: f32) -> tensor<5x // ----- // CHECK-LABEL: func @tensor.expand_shape( -// CHECK-SAME: %[[t1:.*]]: tensor +// CHECK-SAME: %[[t1:.*]]: tensor, %[[sz0:.*]]: index func.func @tensor.expand_shape(%t1: tensor, %sz0: index) -> tensor<2x?x10xf32> { // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] - // CHECK: %[[C0:.*]] = arith.constant 0 : index - // CHECK: %[[DIM:.*]] = memref.dim %[[m1]], %[[C0]] : memref - // CHECK: %[[C2:.*]] = arith.constant 2 : index - // CHECK: %[[VAL_1:.*]] = arith.divsi %[[DIM]], %[[C2]] : index - // CHECK: %[[expanded:.*]] = memref.expand_shape %[[m1]] {{\[\[}}0, 1], [2]] output_shape [2, %[[VAL_1]], 10] : memref into memref<2x?x10xf32> + // CHECK: %[[expanded:.*]] = memref.expand_shape %[[m1]] {{\[\[}}0, 1], [2]] output_shape [2, %[[sz0]], 10] : memref into memref<2x?x10xf32> %0 = tensor.expand_shape %t1 [[0, 1], [2]] output_shape [2, %sz0, 10] : tensor into tensor<2x?x10xf32> @@ -385,23 +381,20 @@ func.func @tensor.expand_shape(%t1: tensor, %sz0: index) -> tensor<2x? // ----- // CHECK-LABEL: func @tensor.expand_shape_of_slice( -// CHECK-SAME: %[[t1:.*]]: tensor +// CHECK-SAME: %[[t1:.*]]: tensor, %{{.*}}: index, %{{.*}}: index, %[[sz0:.*]]: index func.func @tensor.expand_shape_of_slice( %t1: tensor, %o1: index, %s1: index, %sz0: index) -> tensor { // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] : // CHECK: %[[subview:.*]] = memref.subview %[[m1]][%{{.*}}, 5] [%{{.*}}, 10] [1, 1] : memref to memref> %0 = tensor.extract_slice %t1[%o1, 5][%s1, 10][1, 1] : tensor to tensor - // CHECK: %[[C7:.*]] = arith.constant 7 : index - // CHECK: %[[VAL_1:.*]] = arith.divsi %{{.*}}, %[[C7]] : index - // CHECK: %[[expanded:.*]] = memref.expand_shape %[[subview]] {{\[\[}}0, 1], [2, 3]] output_shape [%[[VAL_1]], 7, 2, 5] : memref> into memref> + // CHECK: %[[expanded:.*]] = memref.expand_shape %[[subview]] {{\[\[}}0, 1], [2, 3]] output_shape [%[[sz0]], 7, 2, 5] : memref> into memref> %1 = tensor.expand_shape %0 [[0, 1], [2, 3]] output_shape [%sz0, 7, 2, 5] : tensor into tensor // CHECK: %[[r:.*]] = bufferization.to_tensor %[[expanded]] // CHECK: return %[[r]] return %1 : tensor } - // ----- // CHECK-LABEL: func @tensor.expand_shape_of_scalar_slice( @@ -417,7 +410,20 @@ func.func @tensor.expand_shape_of_scalar_slice( // CHECK: return %[[r]] return %1 : tensor<1xf32> } +// ----- +// CHECK-LABEL: func @tensor.expand_shape_multiple_dynamic_indices( +// CHECK-SAME: %[[t1:.*]]: tensor, %[[sz0:.*]]: index, %[[sz1:.*]]: index, %[[sz2:.*]]: index +func.func @tensor.expand_shape_multiple_dynamic_indices(%t1: tensor, %sz0: index, %sz1: index, %sz2: index) -> tensor { + // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] + // CHECK: %[[expanded:.*]] = memref.expand_shape %[[m1]] {{\[\[}}0, 1, 2], [3]] output_shape [%[[sz0]], %[[sz1]], %[[sz2]], 256] : memref into memref + %0 = tensor.expand_shape %t1 [[0, 1, 2], [3]] output_shape [%sz0, %sz1, %sz2, 256] + : tensor into tensor + + // CHECK: %[[r:.*]] = bufferization.to_tensor %[[expanded]] + // CHECK: return %[[r]] + return %0 : tensor +} // ----- // CHECK-LABEL: func @tensor.collapse_shape( @@ -646,3 +652,6 @@ func.func @parallel_insert_slice_copy_before_write(%in: tensor<4xf32>, %out: ten // CHECK: } return } + +// ----- + diff --git a/mlir/test/Dialect/Tosa/invalid.mlir b/mlir/test/Dialect/Tosa/invalid.mlir index 123c65e1b4fcd..5b928a2489eea 100644 --- a/mlir/test/Dialect/Tosa/invalid.mlir +++ b/mlir/test/Dialect/Tosa/invalid.mlir @@ -168,7 +168,7 @@ func.func @test_depthwise_conv2d_acc_type(%arg0: tensor<1x4x4x4xi8>, %arg1: tens func.func @test_transpose_conv2d(%arg0: tensor<1x32x32x8xi8>, %arg1: tensor<16x1x1x8xi8>, %arg2: tensor<16xi8>) -> tensor<1x32x32x16xi8> { %zp = "tosa.const"() {value = dense<0> : tensor<1xi8>} : () -> tensor<1xi8> // expected-error@+1 {{'tosa.transpose_conv2d' op accumulator type for i8 tensor is not i32}} - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %zp, %zp {acc_type = f16, out_pad = array, out_shape = array, stride = array} : (tensor<1x32x32x8xi8>, tensor<16x1x1x8xi8>, tensor<16xi8>, tensor<1xi8>, tensor<1xi8>) -> tensor<1x32x32x16xi8> + %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %zp, %zp {acc_type = f16, out_pad = array, stride = array} : (tensor<1x32x32x8xi8>, tensor<16x1x1x8xi8>, tensor<16xi8>, tensor<1xi8>, tensor<1xi8>) -> tensor<1x32x32x16xi8> return %0 : tensor<1x32x32x16xi8> } @@ -741,15 +741,6 @@ func.func @test_table_io_shape_mismatch(%arg0: tensor, %arg1: tensor<6 // ----- -// CHECK-LABEL: test_transpose_conv2d_invalid_outshape -func.func @test_transpose_conv2d_invalid_outshape(%arg0: tensor<1x32x32x8xf32>, %arg1: tensor<16x1x1x8xf32>, %arg2: tensor<16xf32>) -> tensor<1x32x32x16xf32> { - // expected-error@+1 {{'tosa.transpose_conv2d' op attribute 'out_shape' failed to satisfy constraint: i64 dense array attribute with exactly 4 elements}} - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2 {out_pad = array, out_shape = array, stride = array} : (tensor<1x32x32x8xf32>, tensor<16x1x1x8xf32>, tensor<16xf32>) -> tensor<1x32x32x16xf32> - return %0 : tensor<1x32x32x16xf32> -} - -// ----- - // CHECK-LABEL: test_mul_type_mismatch func.func @test_mul_type_mismatch(%arg0: tensor<13x21x3xf32>, %arg1: tensor<13x1x3xf16>) -> tensor<13x21x3xf32> { %shift = "tosa.const"() {value = dense<0> : tensor<1xi8>} : () -> tensor<1xi8> diff --git a/mlir/test/Dialect/Tosa/tosa-decompose-transpose-conv.mlir b/mlir/test/Dialect/Tosa/tosa-decompose-transpose-conv.mlir index bb3c16cf52d63..0167bf10ed0ae 100644 --- a/mlir/test/Dialect/Tosa/tosa-decompose-transpose-conv.mlir +++ b/mlir/test/Dialect/Tosa/tosa-decompose-transpose-conv.mlir @@ -32,19 +32,16 @@ func.func @transpose_conv2d_quantized(%arg0: tensor<2x16x14x3xi8>, %arg1: tensor // CHECK-LABEL: @transpose_conv2d_quantized_padded func.func @transpose_conv2d_quantized_padded(%arg0: tensor<2x16x14x3xi8>, %arg1: tensor<5x3x6x3xi8>, %arg2: tensor<5xi32>) -> (tensor<2x21x26x5xi32>) { - // CHECK-DAG: %[[INPUT_ZP:.+]] = "tosa.const"() <{value = dense<-22> : tensor<1xi8>} - // CHECK-DAG: %[[WEIGHT_ZP:.+]] = "tosa.const"() <{value = dense<42> : tensor<1xi8>} - // CHECK-DAG: %[[REV0:.+]] = tosa.reverse %2 {axis = 2 : i32} - // CHECK-DAG: %[[REV1:.+]] = tosa.reverse %arg1 {axis = 1 : i32} - // CHECK: tosa.conv2d %arg0, %3, %arg2, %[[INPUT_ZP]], %[[WEIGHT_ZP]] - // CHECK-SAME: dilation = array, pad = array, - // CHECK-SAME: stride = array} - %input_zp = "tosa.const"() {value = dense<-22> : tensor<1xi8>} : () -> tensor<1xi8> - %weight_zp = "tosa.const"() {value = dense<42> : tensor<1xi8>} : () -> tensor<1xi8> + // CHECK-DAG: %[[INPUT_ZP:.+]] = "tosa.const"() <{value = dense<-22> : tensor<1xi8>}> : () -> tensor<1xi8> + // CHECK-DAG: %[[WEIGHT_ZP:.+]] = "tosa.const"() <{value = dense<42> : tensor<1xi8>}> : () -> tensor<1xi8> + // CHECK-DAG: %[[REV0:.+]] = tosa.reverse %arg1 {axis = 1 : i32} + // CHECK-DAG: %[[REV1:.+]] = tosa.reverse %[[REV0]] {axis = 2 : i32} + // CHECK: tosa.conv2d %arg0, %[[REV1]], %arg2, %[[INPUT_ZP]], %[[WEIGHT_ZP]] {acc_type = i32, dilation = array, pad = array, stride = array} + %input_zp = "tosa.const"() <{value = dense<-22> : tensor<1xi8>}> : () -> tensor<1xi8> + %weight_zp = "tosa.const"() <{value = dense<42> : tensor<1xi8>}> : () -> tensor<1xi8> %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %input_zp, %weight_zp { acc_type = i32, out_pad = array, - out_shape = array, stride = array} : (tensor<2x16x14x3xi8>, tensor<5x3x6x3xi8>, tensor<5xi32>, tensor<1xi8>, tensor<1xi8>) -> tensor<2x21x26x5xi32> return %0 : tensor<2x21x26x5xi32> } @@ -160,12 +157,11 @@ func.func @transpose_conv2d_strided_overpad(%arg0 : tensor<1x16x1x1xi8>, %arg1 : // CHECK: %[[PAD_RESULT:.+]] = tosa.pad %[[RESHAPE_RESULT_1]], %[[RESULT_PAD]] // CHECK: %[[RESHAPE_ARG2:.+]] = tosa.reshape %arg2, %[[CONST10]] // CHECK: %[[ADD:.+]] = tosa.add %[[PAD_RESULT]], %[[RESHAPE_ARG2]] - %input_zp = "tosa.const"() {value = dense<-103> : tensor<1xi8>} : () -> tensor<1xi8> - %weight_zp = "tosa.const"() {value = dense<93> : tensor<1xi8>} : () -> tensor<1xi8> + %input_zp = "tosa.const"() <{value = dense<-103> : tensor<1xi8>}> : () -> tensor<1xi8> + %weight_zp = "tosa.const"() <{value = dense<93> : tensor<1xi8>}> : () -> tensor<1xi8> %2 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %input_zp, %weight_zp { acc_type = i32, out_pad = array, - out_shape = array, stride = array} : (tensor<1x16x1x1xi8>, tensor<1x2x1x1xi8>, tensor<1xi32>, tensor<1xi8>, tensor<1xi8>) -> tensor<1x19x2x1xi32> "func.return" (%2) : (tensor<1x19x2x1xi32>) -> () diff --git a/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir b/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir index b87e9a78bf144..8a3dbfe17d686 100644 --- a/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir +++ b/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir @@ -907,7 +907,7 @@ func.func @depthwise_conv2d_strided(%arg0: tensor<1x13x14x1xf32>, %arg1: tensor< // CHECK-LABEL: @transpose_conv2d_out_shape func.func @transpose_conv2d_out_shape(%arg0: tensor<2x?x?x3xf32>, %arg1: tensor<5x3x6x3xf32>, %arg2: tensor<5xf32>, %arg3: tensor<1xf32>, %arg4: tensor<1xf32>) { // CHECK: -> tensor<2x8x9x5xf32> - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, out_shape = array, stride = array} : (tensor<2x?x?x3xf32>, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x8x9x5xf32> + %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, stride = array} : (tensor<2x?x?x3xf32>, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x8x9x5xf32> return } @@ -916,7 +916,7 @@ func.func @transpose_conv2d_out_shape(%arg0: tensor<2x?x?x3xf32>, %arg1: tensor< // CHECK-LABEL: @transpose_conv2d_static func.func @transpose_conv2d_static(%arg0: tensor<2x16x14x3xf32>, %arg1: tensor<5x3x6x3xf32>, %arg2: tensor<5xf32>, %arg3: tensor<1xf32>, %arg4: tensor<1xf32>) { // CHECK: -> tensor<2x18x19x5xf32> - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, out_shape = array, stride = array} : (tensor<2x16x14x3xf32>, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x?x?x5xf32> + %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, stride = array} : (tensor<2x16x14x3xf32>, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x?x?x5xf32> return } @@ -925,7 +925,7 @@ func.func @transpose_conv2d_static(%arg0: tensor<2x16x14x3xf32>, %arg1: tensor<5 // CHECK-LABEL: @transpose_conv2d_static_strided func.func @transpose_conv2d_static_strided(%arg0: tensor<2x16x14x3xf32>, %arg1: tensor<5x3x6x3xf32>, %arg2: tensor<5xf32>, %arg3: tensor<1xf32>, %arg4: tensor<1xf32>) { // CHECK: -> tensor<2x33x45x5xf32> - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, out_shape = array, stride = array} : (tensor<2x16x14x3xf32>, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x?x?x5xf32> + %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, stride = array} : (tensor<2x16x14x3xf32>, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x?x?x5xf32> return } @@ -934,7 +934,7 @@ func.func @transpose_conv2d_static_strided(%arg0: tensor<2x16x14x3xf32>, %arg1: // CHECK-LABEL: @transpose_conv2d_dynamic_input func.func @transpose_conv2d_dynamic_input(%arg0: tensor, %arg1: tensor<5x3x6x3xf32>, %arg2: tensor<5xf32>, %arg3: tensor<1xf32>, %arg4: tensor<1xf32>) { // CHECK: -> tensor - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, out_shape = array, stride = array} : (tensor, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor + %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, stride = array} : (tensor, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor return } @@ -943,7 +943,7 @@ func.func @transpose_conv2d_dynamic_input(%arg0: tensor, %arg1: ten // CHECK-LABEL: @transpose_conv2d_dynamic_weights func.func @transpose_conv2d_dynamic_weights(%arg0: tensor<2x6x4x3xf32>, %arg1: tensor, %arg2: tensor<5xf32>, %arg3: tensor<1xf32>, %arg4: tensor<1xf32>) { // CHECK: -> tensor<2x?x?x5xf32> - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, out_shape = array, stride = array} : (tensor<2x6x4x3xf32>, tensor, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x?x?x5xf32> + %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, stride = array} : (tensor<2x6x4x3xf32>, tensor, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x?x?x5xf32> return } @@ -952,7 +952,7 @@ func.func @transpose_conv2d_dynamic_weights(%arg0: tensor<2x6x4x3xf32>, %arg1: t // CHECK-LABEL: @transpose_conv2d_dynamic_bias func.func @transpose_conv2d_dynamic_bias(%arg0: tensor<2x6x4x3xf32>, %arg1: tensor<5x3x6x3xf32>, %arg2: tensor, %arg3: tensor<1xf32>, %arg4: tensor<1xf32>) { // CHECK: -> tensor<2x8x9x5xf32> - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, out_shape = array, stride = array} : (tensor<2x6x4x3xf32>, tensor<5x3x6x3xf32>, tensor, tensor<1xf32>, tensor<1xf32>) -> tensor<2x8x9x5xf32> + %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, stride = array} : (tensor<2x6x4x3xf32>, tensor<5x3x6x3xf32>, tensor, tensor<1xf32>, tensor<1xf32>) -> tensor<2x8x9x5xf32> return } @@ -961,14 +961,14 @@ func.func @transpose_conv2d_dynamic_bias(%arg0: tensor<2x6x4x3xf32>, %arg1: tens // CHECK-LABEL: @transpose_conv2d_padded func.func @transpose_conv2d_padded(%arg0: tensor<2x9x11x3xf32>, %arg1: tensor<5x3x6x3xf32>, %arg2: tensor<5xf32>, %arg3: tensor<1xf32>, %arg4: tensor<1xf32>) { // CHECK: -> tensor<2x10x13x5xf32> - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, out_shape = array, stride = array} : (tensor<2x9x11x3xf32>, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x10x13x5xf32> + %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, stride = array} : (tensor<2x9x11x3xf32>, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x10x13x5xf32> return } // CHECK-LABEL: @transpose_conv2d_strided func.func @transpose_conv2d_strided(%arg0: tensor<1x5x7x1xf32>, %arg1: tensor<1x1x1x1xf32>, %arg2: tensor<1xf32>, %arg3: tensor<1xf32>, %arg4: tensor<1xf32>) { // CHECK: -> tensor<1x13x13x1xf32> - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, out_shape = array, stride = array} : (tensor<1x5x7x1xf32>, tensor<1x1x1x1xf32>, tensor<1xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x13x13x1xf32> + %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, stride = array} : (tensor<1x5x7x1xf32>, tensor<1x1x1x1xf32>, tensor<1xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x13x13x1xf32> return } diff --git a/mlir/test/Target/Cpp/control_flow.mlir b/mlir/test/Target/Cpp/control_flow.mlir index 436543f7ace95..101b30c2521c9 100644 --- a/mlir/test/Target/Cpp/control_flow.mlir +++ b/mlir/test/Target/Cpp/control_flow.mlir @@ -68,3 +68,22 @@ func.func @block_labels1() { // CPP-DECLTOP-NEXT: label2: // CPP-DECLTOP-NEXT: return; // CPP-DECLTOP-NEXT: } + +emitc.func @expression_inlining(%0 : i32, %1 : i32) { + %2 = expression : i1 { + %3 = cmp lt, %0, %1 : (i32, i32) -> i1 + yield %3 : i1 + } + cf.cond_br %2, ^bb1, ^bb1 + ^bb1: // 2 preds: ^bb0, ^bb0 + return +} +// CPP-DECLTOP: void expression_inlining(int32_t [[v1:v.*]], int32_t [[v2:v.*]]) { +// CPP-DECLTOP-NEXT: if ([[v1]] < [[v2]]) { +// CPP-DECLTOP-NEXT: goto label2; +// CPP-DECLTOP-NEXT: } else { +// CPP-DECLTOP-NEXT: goto label2; +// CPP-DECLTOP-NEXT: } +// CPP-DECLTOP-NEXT: label2: +// CPP-DECLTOP-NEXT: return; +// CPP-DECLTOP-NEXT: } diff --git a/mlir/test/Target/LLVMIR/Import/global-variables.ll b/mlir/test/Target/LLVMIR/Import/global-variables.ll index fbeda4cd42af8..b809c93d772f5 100644 --- a/mlir/test/Target/LLVMIR/Import/global-variables.ll +++ b/mlir/test/Target/LLVMIR/Import/global-variables.ll @@ -256,6 +256,14 @@ define void @bar() { ; // ----- +; CHECK: llvm.mlir.global_ctors {ctors = [], priorities = []} +@llvm.global_ctors = appending global [0 x { i32, ptr, ptr }] zeroinitializer + +; CHECK: llvm.mlir.global_dtors {dtors = [], priorities = []} +@llvm.global_dtors = appending global [0 x { i32, ptr, ptr }] zeroinitializer + +; // ----- + ; Visibility attribute. ; CHECK: llvm.mlir.global external hidden constant @hidden("string") diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index 7f9a3ba79d724..db2e08742dbca 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -1859,6 +1859,14 @@ llvm.func @foo() { // ----- +// CHECK: @llvm.global_ctors = appending global [0 x { i32, ptr, ptr }] zeroinitializer +llvm.mlir.global_ctors {ctors = [], priorities = []} + +// CHECK: @llvm.global_dtors = appending global [0 x { i32, ptr, ptr }] zeroinitializer +llvm.mlir.global_dtors {dtors = [], priorities = []} + +// ----- + // CHECK: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @foo, ptr null }] llvm.mlir.global_dtors { dtors = [@foo], priorities = [0 : i32]} diff --git a/mlir/unittests/IR/ShapedTypeTest.cpp b/mlir/unittests/IR/ShapedTypeTest.cpp index c2900b5aaeeeb..bc4066ed210e8 100644 --- a/mlir/unittests/IR/ShapedTypeTest.cpp +++ b/mlir/unittests/IR/ShapedTypeTest.cpp @@ -282,6 +282,20 @@ TEST(ShapedTypeTest, RankedTensorTypeView) { ASSERT_TRUE(mlir::isa(viewCreated)); view = mlir::cast(viewCreated); EXPECT_EQ(view.getName(), "bob"); + + // Verify encoding clone methods. + EXPECT_EQ(unitEncodingRankedTensorType, + cast(noEncodingRankedTensorType) + .cloneWithEncoding(unitAttr)); + EXPECT_EQ(stringEncodingRankedTensorType, + cast(noEncodingRankedTensorType) + .cloneWithEncoding(stringAttr)); + EXPECT_EQ( + noEncodingRankedTensorType, + cast(unitEncodingRankedTensorType).dropEncoding()); + EXPECT_EQ( + noEncodingRankedTensorType, + cast(stringEncodingRankedTensorType).dropEncoding()); } } // namespace diff --git a/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel index 010a617066c7b..4aa9d562124fe 100644 --- a/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel @@ -241,6 +241,7 @@ cc_test( "//clang:lex", "//clang:sema", "//clang:serialization", + "//clang:tooling", "//llvm:Support", "//llvm:TargetParser", "//third-party/unittest:gmock", diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index b45145acc2cfe..6f5d381666e6d 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -5,6 +5,7 @@ # LLVM libc project. load("@bazel_skylib//lib:selects.bzl", "selects") load("@bazel_skylib//rules:common_settings.bzl", "string_flag") +load("@rules_python//python:defs.bzl", "py_binary") load( ":libc_build_rules.bzl", "libc_function", @@ -51,6 +52,15 @@ config_setting( flag_values = {":mpfr": "system"}, ) +########################### Header Generation ################################## + +py_binary( + name = "hdrgen", + srcs = glob(["utils/hdrgen/hdrgen/**/*.py"]), + imports = ["utils/hdrgen"], + main = "utils/hdrgen/hdrgen/main.py", +) + ################################## Base Config ################################# libc_support_library(